1
2 /*--------------------------------------------------------------------*/
3 /*--- begin guest_x86_toIR.c ---*/
4 /*--------------------------------------------------------------------*/
5
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
9
10 Copyright (C) 2004-2015 OpenWorks LLP
11 info@open-works.net
12
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 02110-1301, USA.
27
28 The GNU General Public License is contained in the file COPYING.
29
30 Neither the names of the U.S. Department of Energy nor the
31 University of California nor the names of its contributors may be
32 used to endorse or promote products derived from this software
33 without prior written permission.
34 */
35
36 /* Translates x86 code to IR. */
37
38 /* TODO:
39
40 All Puts to CC_OP/CC_DEP1/CC_DEP2/CC_NDEP should really be checked
41 to ensure a 32-bit value is being written.
42
43 FUCOMI(P): what happens to A and S flags? Currently are forced
44 to zero.
45
46 x87 FP Limitations:
47
48 * all arithmetic done at 64 bits
49
50 * no FP exceptions, except for handling stack over/underflow
51
52 * FP rounding mode observed only for float->int conversions
53 and int->float conversions which could lose accuracy, and
54 for float-to-float rounding. For all other operations,
55 round-to-nearest is used, regardless.
56
57 * some of the FCOM cases could do with testing -- not convinced
58 that the args are the right way round.
59
60 * FSAVE does not re-initialise the FPU; it should do
61
62 * FINIT not only initialises the FPU environment, it also
63 zeroes all the FP registers. It should leave the registers
64 unchanged.
65
66 SAHF should cause eflags[1] == 1, and in fact it produces 0. As
67 per Intel docs this bit has no meaning anyway. Since PUSHF is the
68 only way to observe eflags[1], a proper fix would be to make that
69 bit be set by PUSHF.
70
71 The state of %eflags.AC (alignment check, bit 18) is recorded by
72 the simulation (viz, if you set it with popf then a pushf produces
73 the value you set it to), but it is otherwise ignored. In
74 particular, setting it to 1 does NOT cause alignment checking to
75 happen. Programs that set it to 1 and then rely on the resulting
76 SIGBUSs to inform them of misaligned accesses will not work.
77
78 Implementation of sysenter is necessarily partial. sysenter is a
79 kind of system call entry. When doing a sysenter, the return
80 address is not known -- that is something that is beyond Vex's
81 knowledge. So the generated IR forces a return to the scheduler,
82 which can do what it likes to simulate the systenter, but it MUST
83 set this thread's guest_EIP field with the continuation address
84 before resuming execution. If that doesn't happen, the thread will
85 jump to address zero, which is probably fatal.
86
87 This module uses global variables and so is not MT-safe (if that
88 should ever become relevant).
89
90 The delta values are 32-bit ints, not 64-bit ints. That means
91 this module may not work right if run on a 64-bit host. That should
92 be fixed properly, really -- if anyone ever wants to use Vex to
93 translate x86 code for execution on a 64-bit host.
94
95 casLE (implementation of lock-prefixed insns) and rep-prefixed
96 insns: the side-exit back to the start of the insn is done with
97 Ijk_Boring. This is quite wrong, it should be done with
98 Ijk_NoRedir, since otherwise the side exit, which is intended to
99 restart the instruction for whatever reason, could go somewhere
100 entirely else. Doing it right (with Ijk_NoRedir jumps) would make
101 no-redir jumps performance critical, at least for rep-prefixed
102 instructions, since all iterations thereof would involve such a
103 jump. It's not such a big deal with casLE since the side exit is
104 only taken if the CAS fails, that is, the location is contended,
105 which is relatively unlikely.
106
107 XXXX: Nov 2009: handling of SWP on ARM suffers from the same
108 problem.
109
110 Note also, the test for CAS success vs failure is done using
111 Iop_CasCmp{EQ,NE}{8,16,32,64} rather than the ordinary
112 Iop_Cmp{EQ,NE} equivalents. This is so as to tell Memcheck that it
113 shouldn't definedness-check these comparisons. See
114 COMMENT_ON_CasCmpEQ in memcheck/mc_translate.c for
115 background/rationale.
116 */
117
118 /* Performance holes:
119
120 - fcom ; fstsw %ax ; sahf
121 sahf does not update the O flag (sigh) and so O needs to
122 be computed. This is done expensively; it would be better
123 to have a calculate_eflags_o helper.
124
125 - emwarns; some FP codes can generate huge numbers of these
126 if the fpucw is changed in an inner loop. It would be
127 better for the guest state to have an emwarn-enable reg
128 which can be set zero or nonzero. If it is zero, emwarns
129 are not flagged, and instead control just flows all the
130 way through bbs as usual.
131 */
132
133 /* "Special" instructions.
134
135 This instruction decoder can decode three special instructions
136 which mean nothing natively (are no-ops as far as regs/mem are
137 concerned) but have meaning for supporting Valgrind. A special
138 instruction is flagged by the 12-byte preamble C1C703 C1C70D C1C71D
139 C1C713 (in the standard interpretation, that means: roll $3, %edi;
140 roll $13, %edi; roll $29, %edi; roll $19, %edi). Following that,
141 one of the following 3 are allowed (standard interpretation in
142 parentheses):
143
144 87DB (xchgl %ebx,%ebx) %EDX = client_request ( %EAX )
145 87C9 (xchgl %ecx,%ecx) %EAX = guest_NRADDR
146 87D2 (xchgl %edx,%edx) call-noredir *%EAX
147 87FF (xchgl %edi,%edi) IR injection
148
149 Any other bytes following the 12-byte preamble are illegal and
150 constitute a failure in instruction decoding. This all assumes
151 that the preamble will never occur except in specific code
152 fragments designed for Valgrind to catch.
153
154 No prefixes may precede a "Special" instruction.
155 */
156
157 /* LOCK prefixed instructions. These are translated using IR-level
158 CAS statements (IRCAS) and are believed to preserve atomicity, even
159 from the point of view of some other process racing against a
160 simulated one (presumably they communicate via a shared memory
161 segment).
162
163 Handlers which are aware of LOCK prefixes are:
164 dis_op2_G_E (add, or, adc, sbb, and, sub, xor)
165 dis_cmpxchg_G_E (cmpxchg)
166 dis_Grp1 (add, or, adc, sbb, and, sub, xor)
167 dis_Grp3 (not, neg)
168 dis_Grp4 (inc, dec)
169 dis_Grp5 (inc, dec)
170 dis_Grp8_Imm (bts, btc, btr)
171 dis_bt_G_E (bts, btc, btr)
172 dis_xadd_G_E (xadd)
173 */
174
175
176 #include "libvex_basictypes.h"
177 #include "libvex_ir.h"
178 #include "libvex.h"
179 #include "libvex_guest_x86.h"
180
181 #include "main_util.h"
182 #include "main_globals.h"
183 #include "guest_generic_bb_to_IR.h"
184 #include "guest_generic_x87.h"
185 #include "guest_x86_defs.h"
186
187
188 /*------------------------------------------------------------*/
189 /*--- Globals ---*/
190 /*------------------------------------------------------------*/
191
192 /* These are set at the start of the translation of an insn, right
193 down in disInstr_X86, so that we don't have to pass them around
194 endlessly. They are all constant during the translation of any
195 given insn. */
196
197 /* We need to know this to do sub-register accesses correctly. */
198 static VexEndness host_endness;
199
200 /* Pointer to the guest code area (points to start of BB, not to the
201 insn being processed). */
202 static const UChar* guest_code;
203
204 /* The guest address corresponding to guest_code[0]. */
205 static Addr32 guest_EIP_bbstart;
206
207 /* The guest address for the instruction currently being
208 translated. */
209 static Addr32 guest_EIP_curr_instr;
210
211 /* The IRSB* into which we're generating code. */
212 static IRSB* irsb;
213
214 /* Whether are not we are in protected mode */
215 static Bool protected_mode;
216
217 /* The addr-op size of the instruction
218 * By default it is 4 for protected mode and 2 for real mode.
219 * If there is the 0x67 prefix it is swapped
220 */
221 static Int current_sz_addr;
222
223 /* The data-op size of the instruction
224 * By default it is 4 for protected mode and 2 for real mode.
225 * If there is the 0x66 prefix it is swapped
226 */
227 static Int current_sz_data;
228
229
230 /*------------------------------------------------------------*/
231 /*--- Debugging output ---*/
232 /*------------------------------------------------------------*/
233
234 #ifndef _MSC_VER
235 #define DIP(format, args...) \
236 if (vex_traceflags & VEX_TRACE_FE) \
237 vex_printf(format, ## args)
238
239 #define DIS(buf, format, args...) \
240 if (vex_traceflags & VEX_TRACE_FE) \
241 vex_sprintf(buf, format, ## args)
242 #else
243 #define DIP(format, ...) \
244 if (vex_traceflags & VEX_TRACE_FE) \
245 vex_printf(format, __VA_ARGS__)
246
247 #define DIS(buf, format, ...) \
248 if (vex_traceflags & VEX_TRACE_FE) \
249 vex_sprintf(buf, format, __VA_ARGS__)
250 #endif
251
252
253
254 /*------------------------------------------------------------*/
255 /*--- Offsets of various parts of the x86 guest state. ---*/
256 /*------------------------------------------------------------*/
257
258 #define OFFB_EAX offsetof(VexGuestX86State,guest_EAX)
259 #define OFFB_EBX offsetof(VexGuestX86State,guest_EBX)
260 #define OFFB_ECX offsetof(VexGuestX86State,guest_ECX)
261 #define OFFB_EDX offsetof(VexGuestX86State,guest_EDX)
262 #define OFFB_ESP offsetof(VexGuestX86State,guest_ESP)
263 #define OFFB_EBP offsetof(VexGuestX86State,guest_EBP)
264 #define OFFB_ESI offsetof(VexGuestX86State,guest_ESI)
265 #define OFFB_EDI offsetof(VexGuestX86State,guest_EDI)
266
267 #define OFFB_EIP offsetof(VexGuestX86State,guest_EIP)
268
269 #define OFFB_CC_OP offsetof(VexGuestX86State,guest_CC_OP)
270 #define OFFB_CC_DEP1 offsetof(VexGuestX86State,guest_CC_DEP1)
271 #define OFFB_CC_DEP2 offsetof(VexGuestX86State,guest_CC_DEP2)
272 #define OFFB_CC_NDEP offsetof(VexGuestX86State,guest_CC_NDEP)
273
274 #define OFFB_FPREGS offsetof(VexGuestX86State,guest_FPREG[0])
275 #define OFFB_FPTAGS offsetof(VexGuestX86State,guest_FPTAG[0])
276 #define OFFB_DFLAG offsetof(VexGuestX86State,guest_DFLAG)
277 #define OFFB_IDFLAG offsetof(VexGuestX86State,guest_IDFLAG)
278 #define OFFB_ACFLAG offsetof(VexGuestX86State,guest_ACFLAG)
279 #define OFFB_FTOP offsetof(VexGuestX86State,guest_FTOP)
280 #define OFFB_FC3210 offsetof(VexGuestX86State,guest_FC3210)
281 #define OFFB_FPROUND offsetof(VexGuestX86State,guest_FPROUND)
282
283 #define OFFB_CS offsetof(VexGuestX86State,guest_CS)
284 #define OFFB_DS offsetof(VexGuestX86State,guest_DS)
285 #define OFFB_ES offsetof(VexGuestX86State,guest_ES)
286 #define OFFB_FS offsetof(VexGuestX86State,guest_FS)
287 #define OFFB_GS offsetof(VexGuestX86State,guest_GS)
288 #define OFFB_SS offsetof(VexGuestX86State,guest_SS)
289 #define OFFB_LDT offsetof(VexGuestX86State,guest_LDT)
290 #define OFFB_GDT offsetof(VexGuestX86State,guest_GDT)
291
292 #define OFFB_SSEROUND offsetof(VexGuestX86State,guest_SSEROUND)
293 #define OFFB_XMM0 offsetof(VexGuestX86State,guest_XMM0)
294 #define OFFB_XMM1 offsetof(VexGuestX86State,guest_XMM1)
295 #define OFFB_XMM2 offsetof(VexGuestX86State,guest_XMM2)
296 #define OFFB_XMM3 offsetof(VexGuestX86State,guest_XMM3)
297 #define OFFB_XMM4 offsetof(VexGuestX86State,guest_XMM4)
298 #define OFFB_XMM5 offsetof(VexGuestX86State,guest_XMM5)
299 #define OFFB_XMM6 offsetof(VexGuestX86State,guest_XMM6)
300 #define OFFB_XMM7 offsetof(VexGuestX86State,guest_XMM7)
301
302 #define OFFB_EMNOTE offsetof(VexGuestX86State,guest_EMNOTE)
303
304 #define OFFB_CMSTART offsetof(VexGuestX86State,guest_CMSTART)
305 #define OFFB_CMLEN offsetof(VexGuestX86State,guest_CMLEN)
306 #define OFFB_NRADDR offsetof(VexGuestX86State,guest_NRADDR)
307
308 #define OFFB_IP_AT_SYSCALL offsetof(VexGuestX86State,guest_IP_AT_SYSCALL)
309
310
311 /*------------------------------------------------------------*/
312 /*--- Helper bits and pieces for deconstructing the ---*/
313 /*--- x86 insn stream. ---*/
314 /*------------------------------------------------------------*/
315
316 /* This is the Intel register encoding -- integer regs. */
317 #define R_EAX 0
318 #define R_ECX 1
319 #define R_EDX 2
320 #define R_EBX 3
321 #define R_ESP 4
322 #define R_EBP 5
323 #define R_ESI 6
324 #define R_EDI 7
325
326 #define R_AL (0+R_EAX)
327 #define R_AH (4+R_EAX)
328
329 /* This is the Intel register encoding -- segment regs. */
330 #define R_ES 0
331 #define R_CS 1
332 #define R_SS 2
333 #define R_DS 3
334 #define R_FS 4
335 #define R_GS 5
336
337
338 /* Add a statement to the list held by "irbb". */
stmt(IRStmt * st)339 static void stmt ( IRStmt* st )
340 {
341 addStmtToIRSB( irsb, st );
342 }
343
344 /* Generate a new temporary of the given type. */
newTemp(IRType ty)345 static IRTemp newTemp ( IRType ty )
346 {
347 vassert(isPlausibleIRType(ty));
348 return newIRTemp( irsb->tyenv, ty );
349 }
350
351 /* Various simple conversions */
352
extend_s_8to32(UInt x)353 static UInt extend_s_8to32( UInt x )
354 {
355 return (UInt)((Int)(x << 24) >> 24);
356 }
357
extend_s_16to32(UInt x)358 static UInt extend_s_16to32 ( UInt x )
359 {
360 return (UInt)((Int)(x << 16) >> 16);
361 }
362
363 /* Fetch a byte from the guest insn stream. */
getIByte(Int delta)364 static UChar getIByte ( Int delta )
365 {
366 return guest_code[delta];
367 }
368
369 /* Extract the reg field from a modRM byte. */
gregOfRM(UChar mod_reg_rm)370 static Int gregOfRM ( UChar mod_reg_rm )
371 {
372 return (Int)( (mod_reg_rm >> 3) & 7 );
373 }
374
375 /* Figure out whether the mod and rm parts of a modRM byte refer to a
376 register or memory. If so, the byte will have the form 11XXXYYY,
377 where YYY is the register number. */
epartIsReg(UChar mod_reg_rm)378 static Bool epartIsReg ( UChar mod_reg_rm )
379 {
380 return toBool(0xC0 == (mod_reg_rm & 0xC0));
381 }
382
383 /* ... and extract the register number ... */
eregOfRM(UChar mod_reg_rm)384 static Int eregOfRM ( UChar mod_reg_rm )
385 {
386 return (Int)(mod_reg_rm & 0x7);
387 }
388
389 /* Get a 8/16/32-bit unsigned value out of the insn stream. */
390
getUChar(Int delta)391 static UChar getUChar ( Int delta )
392 {
393 UChar v = guest_code[delta+0];
394 return toUChar(v);
395 }
396
getUDisp16(Int delta)397 static UInt getUDisp16 ( Int delta )
398 {
399 UInt v = guest_code[delta+1]; v <<= 8;
400 v |= guest_code[delta+0];
401 return v & 0xFFFF;
402 }
403
getUDisp32(Int delta)404 static UInt getUDisp32 ( Int delta )
405 {
406 UInt v = guest_code[delta+3]; v <<= 8;
407 v |= guest_code[delta+2]; v <<= 8;
408 v |= guest_code[delta+1]; v <<= 8;
409 v |= guest_code[delta+0];
410 return v;
411 }
412
getUDisp(Int size,Int delta)413 static UInt getUDisp ( Int size, Int delta )
414 {
415 switch (size) {
416 case 4: return getUDisp32(delta);
417 case 2: return getUDisp16(delta);
418 case 1: return (UInt)getUChar(delta);
419 default: vpanic("getUDisp(x86)");
420 }
421 return 0; /*notreached*/
422 }
423
424
425 /* Get a byte value out of the insn stream and sign-extend to 32
426 bits. */
getSDisp8(Int delta)427 static UInt getSDisp8 ( Int delta )
428 {
429 return extend_s_8to32( (UInt) (guest_code[delta]) );
430 }
431
getSDisp16(Int delta0)432 static UInt getSDisp16 ( Int delta0 )
433 {
434 const UChar* eip = &guest_code[delta0];
435 UInt d = *eip++;
436 d |= ((*eip++) << 8);
437 return extend_s_16to32(d);
438 }
439
getSDisp(Int size,Int delta)440 static UInt getSDisp ( Int size, Int delta )
441 {
442 switch (size) {
443 case 4: return getUDisp32(delta);
444 case 2: return getSDisp16(delta);
445 case 1: return getSDisp8(delta);
446 default: vpanic("getSDisp(x86)");
447 }
448 return 0; /*notreached*/
449 }
450
451
452 /*------------------------------------------------------------*/
453 /*--- Helpers for constructing IR. ---*/
454 /*------------------------------------------------------------*/
455
456 /* Create a 1/2/4 byte read of an x86 integer registers. For 16/8 bit
457 register references, we need to take the host endianness into
458 account. Supplied value is 0 .. 7 and in the Intel instruction
459 encoding. */
460
szToITy(Int n)461 static IRType szToITy ( Int n )
462 {
463 switch (n) {
464 case 1: return Ity_I8;
465 case 2: return Ity_I16;
466 case 4: return Ity_I32;
467 default: vpanic("szToITy(x86)");
468 }
469 }
470
471 /* On a little-endian host, less significant bits of the guest
472 registers are at lower addresses. Therefore, if a reference to a
473 register low half has the safe guest state offset as a reference to
474 the full register.
475 */
integerGuestRegOffset(Int sz,UInt archreg)476 static Int integerGuestRegOffset ( Int sz, UInt archreg )
477 {
478 vassert(archreg < 8);
479
480 /* Correct for little-endian host only. */
481 vassert(host_endness == VexEndnessLE);
482
483 if (sz == 4 || sz == 2 || (sz == 1 && archreg < 4)) {
484 switch (archreg) {
485 case R_EAX: return OFFB_EAX;
486 case R_EBX: return OFFB_EBX;
487 case R_ECX: return OFFB_ECX;
488 case R_EDX: return OFFB_EDX;
489 case R_ESI: return OFFB_ESI;
490 case R_EDI: return OFFB_EDI;
491 case R_ESP: return OFFB_ESP;
492 case R_EBP: return OFFB_EBP;
493 default: vpanic("integerGuestRegOffset(x86,le)(4,2)");
494 }
495 }
496
497 vassert(archreg >= 4 && archreg < 8 && sz == 1);
498 switch (archreg-4) {
499 case R_EAX: return 1+ OFFB_EAX;
500 case R_EBX: return 1+ OFFB_EBX;
501 case R_ECX: return 1+ OFFB_ECX;
502 case R_EDX: return 1+ OFFB_EDX;
503 default: vpanic("integerGuestRegOffset(x86,le)(1h)");
504 }
505
506 /* NOTREACHED */
507 vpanic("integerGuestRegOffset(x86,le)");
508 }
509
segmentGuestRegOffset(UInt sreg)510 static Int segmentGuestRegOffset ( UInt sreg )
511 {
512 switch (sreg) {
513 case R_ES: return OFFB_ES;
514 case R_CS: return OFFB_CS;
515 case R_SS: return OFFB_SS;
516 case R_DS: return OFFB_DS;
517 case R_FS: return OFFB_FS;
518 case R_GS: return OFFB_GS;
519 default: vpanic("segmentGuestRegOffset(x86)");
520 }
521 }
522
xmmGuestRegOffset(UInt xmmreg)523 static Int xmmGuestRegOffset ( UInt xmmreg )
524 {
525 switch (xmmreg) {
526 case 0: return OFFB_XMM0;
527 case 1: return OFFB_XMM1;
528 case 2: return OFFB_XMM2;
529 case 3: return OFFB_XMM3;
530 case 4: return OFFB_XMM4;
531 case 5: return OFFB_XMM5;
532 case 6: return OFFB_XMM6;
533 case 7: return OFFB_XMM7;
534 default: vpanic("xmmGuestRegOffset");
535 }
536 }
537
538 /* Lanes of vector registers are always numbered from zero being the
539 least significant lane (rightmost in the register). */
540
xmmGuestRegLane16offset(UInt xmmreg,Int laneno)541 static Int xmmGuestRegLane16offset ( UInt xmmreg, Int laneno )
542 {
543 /* Correct for little-endian host only. */
544 vassert(host_endness == VexEndnessLE);
545 vassert(laneno >= 0 && laneno < 8);
546 return xmmGuestRegOffset( xmmreg ) + 2 * laneno;
547 }
548
xmmGuestRegLane32offset(UInt xmmreg,Int laneno)549 static Int xmmGuestRegLane32offset ( UInt xmmreg, Int laneno )
550 {
551 /* Correct for little-endian host only. */
552 vassert(host_endness == VexEndnessLE);
553 vassert(laneno >= 0 && laneno < 4);
554 return xmmGuestRegOffset( xmmreg ) + 4 * laneno;
555 }
556
xmmGuestRegLane64offset(UInt xmmreg,Int laneno)557 static Int xmmGuestRegLane64offset ( UInt xmmreg, Int laneno )
558 {
559 /* Correct for little-endian host only. */
560 vassert(host_endness == VexEndnessLE);
561 vassert(laneno >= 0 && laneno < 2);
562 return xmmGuestRegOffset( xmmreg ) + 8 * laneno;
563 }
564
getIReg(Int sz,UInt archreg)565 static IRExpr* getIReg ( Int sz, UInt archreg )
566 {
567 vassert(sz == 1 || sz == 2 || sz == 4);
568 vassert(archreg < 8);
569 return IRExpr_Get( integerGuestRegOffset(sz,archreg),
570 szToITy(sz) );
571 }
572
573 /* Ditto, but write to a reg instead. */
putIReg(Int sz,UInt archreg,IRExpr * e)574 static void putIReg ( Int sz, UInt archreg, IRExpr* e )
575 {
576 IRType ty = typeOfIRExpr(irsb->tyenv, e);
577 switch (sz) {
578 case 1: vassert(ty == Ity_I8); break;
579 case 2: vassert(ty == Ity_I16); break;
580 case 4: vassert(ty == Ity_I32); break;
581 default: vpanic("putIReg(x86)");
582 }
583 vassert(archreg < 8);
584 stmt( IRStmt_Put(integerGuestRegOffset(sz,archreg), e) );
585 }
586
getSReg(UInt sreg)587 static IRExpr* getSReg ( UInt sreg )
588 {
589 return IRExpr_Get( segmentGuestRegOffset(sreg), Ity_I16 );
590 }
591
putSReg(UInt sreg,IRExpr * e)592 static void putSReg ( UInt sreg, IRExpr* e )
593 {
594 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I16);
595 stmt( IRStmt_Put( segmentGuestRegOffset(sreg), e ) );
596 }
597
getXMMReg(UInt xmmreg)598 static IRExpr* getXMMReg ( UInt xmmreg )
599 {
600 return IRExpr_Get( xmmGuestRegOffset(xmmreg), Ity_V128 );
601 }
602
getXMMRegLane64(UInt xmmreg,Int laneno)603 static IRExpr* getXMMRegLane64 ( UInt xmmreg, Int laneno )
604 {
605 return IRExpr_Get( xmmGuestRegLane64offset(xmmreg,laneno), Ity_I64 );
606 }
607
getXMMRegLane64F(UInt xmmreg,Int laneno)608 static IRExpr* getXMMRegLane64F ( UInt xmmreg, Int laneno )
609 {
610 return IRExpr_Get( xmmGuestRegLane64offset(xmmreg,laneno), Ity_F64 );
611 }
612
getXMMRegLane32(UInt xmmreg,Int laneno)613 static IRExpr* getXMMRegLane32 ( UInt xmmreg, Int laneno )
614 {
615 return IRExpr_Get( xmmGuestRegLane32offset(xmmreg,laneno), Ity_I32 );
616 }
617
getXMMRegLane32F(UInt xmmreg,Int laneno)618 static IRExpr* getXMMRegLane32F ( UInt xmmreg, Int laneno )
619 {
620 return IRExpr_Get( xmmGuestRegLane32offset(xmmreg,laneno), Ity_F32 );
621 }
622
putXMMReg(UInt xmmreg,IRExpr * e)623 static void putXMMReg ( UInt xmmreg, IRExpr* e )
624 {
625 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_V128);
626 stmt( IRStmt_Put( xmmGuestRegOffset(xmmreg), e ) );
627 }
628
putXMMRegLane64(UInt xmmreg,Int laneno,IRExpr * e)629 static void putXMMRegLane64 ( UInt xmmreg, Int laneno, IRExpr* e )
630 {
631 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64);
632 stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg,laneno), e ) );
633 }
634
putXMMRegLane64F(UInt xmmreg,Int laneno,IRExpr * e)635 static void putXMMRegLane64F ( UInt xmmreg, Int laneno, IRExpr* e )
636 {
637 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F64);
638 stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg,laneno), e ) );
639 }
640
putXMMRegLane32F(UInt xmmreg,Int laneno,IRExpr * e)641 static void putXMMRegLane32F ( UInt xmmreg, Int laneno, IRExpr* e )
642 {
643 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F32);
644 stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg,laneno), e ) );
645 }
646
putXMMRegLane32(UInt xmmreg,Int laneno,IRExpr * e)647 static void putXMMRegLane32 ( UInt xmmreg, Int laneno, IRExpr* e )
648 {
649 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I32);
650 stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg,laneno), e ) );
651 }
652
putXMMRegLane16(UInt xmmreg,Int laneno,IRExpr * e)653 static void putXMMRegLane16 ( UInt xmmreg, Int laneno, IRExpr* e )
654 {
655 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I16);
656 stmt( IRStmt_Put( xmmGuestRegLane16offset(xmmreg,laneno), e ) );
657 }
658
assign(IRTemp dst,IRExpr * e)659 static void assign ( IRTemp dst, IRExpr* e )
660 {
661 stmt( IRStmt_WrTmp(dst, e) );
662 }
663
storeLE(IRExpr * addr,IRExpr * data)664 static void storeLE ( IRExpr* addr, IRExpr* data )
665 {
666 stmt( IRStmt_Store(Iend_LE, addr, data) );
667 }
668
unop(IROp op,IRExpr * a)669 static IRExpr* unop ( IROp op, IRExpr* a )
670 {
671 return IRExpr_Unop(op, a);
672 }
673
binop(IROp op,IRExpr * a1,IRExpr * a2)674 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
675 {
676 return IRExpr_Binop(op, a1, a2);
677 }
678
triop(IROp op,IRExpr * a1,IRExpr * a2,IRExpr * a3)679 static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 )
680 {
681 return IRExpr_Triop(op, a1, a2, a3);
682 }
683
mkexpr(IRTemp tmp)684 static IRExpr* mkexpr ( IRTemp tmp )
685 {
686 return IRExpr_RdTmp(tmp);
687 }
688
mkU8(UInt i)689 static IRExpr* mkU8 ( UInt i )
690 {
691 vassert(i < 256);
692 return IRExpr_Const(IRConst_U8( (UChar)i ));
693 }
694
mkU16(UInt i)695 static IRExpr* mkU16 ( UInt i )
696 {
697 vassert(i < 65536);
698 return IRExpr_Const(IRConst_U16( (UShort)i ));
699 }
700
mkU32(UInt i)701 static IRExpr* mkU32 ( UInt i )
702 {
703 return IRExpr_Const(IRConst_U32(i));
704 }
705
mkU64(ULong i)706 static IRExpr* mkU64 ( ULong i )
707 {
708 return IRExpr_Const(IRConst_U64(i));
709 }
710
mkU(IRType ty,UInt i)711 static IRExpr* mkU ( IRType ty, UInt i )
712 {
713 if (ty == Ity_I8) return mkU8(i);
714 if (ty == Ity_I16) return mkU16(i);
715 if (ty == Ity_I32) return mkU32(i);
716 /* If this panics, it usually means you passed a size (1,2,4)
717 value as the IRType, rather than a real IRType. */
718 vpanic("mkU(x86)");
719 }
720
mkV128(UShort mask)721 static IRExpr* mkV128 ( UShort mask )
722 {
723 return IRExpr_Const(IRConst_V128(mask));
724 }
725
loadLE(IRType ty,IRExpr * addr)726 static IRExpr* loadLE ( IRType ty, IRExpr* addr )
727 {
728 return IRExpr_Load(Iend_LE, ty, addr);
729 }
730
mkSizedOp(IRType ty,IROp op8)731 static IROp mkSizedOp ( IRType ty, IROp op8 )
732 {
733 Int adj;
734 vassert(ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32);
735 vassert(op8 == Iop_Add8 || op8 == Iop_Sub8
736 || op8 == Iop_Mul8
737 || op8 == Iop_Or8 || op8 == Iop_And8 || op8 == Iop_Xor8
738 || op8 == Iop_Shl8 || op8 == Iop_Shr8 || op8 == Iop_Sar8
739 || op8 == Iop_CmpEQ8 || op8 == Iop_CmpNE8
740 || op8 == Iop_CasCmpNE8
741 || op8 == Iop_ExpCmpNE8
742 || op8 == Iop_Not8);
743 adj = ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1 : 2);
744 return adj + op8;
745 }
746
mkWidenOp(Int szSmall,Int szBig,Bool signd)747 static IROp mkWidenOp ( Int szSmall, Int szBig, Bool signd )
748 {
749 if (szSmall == 1 && szBig == 4) {
750 return signd ? Iop_8Sto32 : Iop_8Uto32;
751 }
752 if (szSmall == 1 && szBig == 2) {
753 return signd ? Iop_8Sto16 : Iop_8Uto16;
754 }
755 if (szSmall == 2 && szBig == 4) {
756 return signd ? Iop_16Sto32 : Iop_16Uto32;
757 }
758 vpanic("mkWidenOp(x86,guest)");
759 }
760
mkAnd1(IRExpr * x,IRExpr * y)761 static IRExpr* mkAnd1 ( IRExpr* x, IRExpr* y )
762 {
763 vassert(typeOfIRExpr(irsb->tyenv,x) == Ity_I1);
764 vassert(typeOfIRExpr(irsb->tyenv,y) == Ity_I1);
765 return unop(Iop_32to1,
766 binop(Iop_And32,
767 unop(Iop_1Uto32,x),
768 unop(Iop_1Uto32,y)));
769 }
770
771 /* Generate a compare-and-swap operation, operating on memory at
772 'addr'. The expected value is 'expVal' and the new value is
773 'newVal'. If the operation fails, then transfer control (with a
774 no-redir jump (XXX no -- see comment at top of this file)) to
775 'restart_point', which is presumably the address of the guest
776 instruction again -- retrying, essentially. */
casLE(IRExpr * addr,IRExpr * expVal,IRExpr * newVal,Addr32 restart_point)777 static void casLE ( IRExpr* addr, IRExpr* expVal, IRExpr* newVal,
778 Addr32 restart_point )
779 {
780 IRCAS* cas;
781 IRType tyE = typeOfIRExpr(irsb->tyenv, expVal);
782 IRType tyN = typeOfIRExpr(irsb->tyenv, newVal);
783 IRTemp oldTmp = newTemp(tyE);
784 IRTemp expTmp = newTemp(tyE);
785 vassert(tyE == tyN);
786 vassert(tyE == Ity_I32 || tyE == Ity_I16 || tyE == Ity_I8);
787 assign(expTmp, expVal);
788 cas = mkIRCAS( IRTemp_INVALID, oldTmp, Iend_LE, addr,
789 NULL, mkexpr(expTmp), NULL, newVal );
790 stmt( IRStmt_CAS(cas) );
791 stmt( IRStmt_Exit(
792 binop( mkSizedOp(tyE,Iop_CasCmpNE8),
793 mkexpr(oldTmp), mkexpr(expTmp) ),
794 Ijk_Boring, /*Ijk_NoRedir*/
795 IRConst_U32( restart_point ),
796 OFFB_EIP
797 ));
798 }
799
800
801 /*------------------------------------------------------------*/
802 /*--- Helpers for %eflags. ---*/
803 /*------------------------------------------------------------*/
804
805 /* -------------- Evaluating the flags-thunk. -------------- */
806
807 /* Build IR to calculate all the eflags from stored
808 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
809 Ity_I32. */
mk_x86g_calculate_eflags_all(void)810 static IRExpr* mk_x86g_calculate_eflags_all ( void )
811 {
812 IRExpr** args
813 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I32),
814 IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
815 IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
816 IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
817 IRExpr* call
818 = mkIRExprCCall(
819 Ity_I32,
820 0/*regparm*/,
821 "x86g_calculate_eflags_all", &x86g_calculate_eflags_all,
822 args
823 );
824 /* Exclude OP and NDEP from definedness checking. We're only
825 interested in DEP1 and DEP2. */
826 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
827 return call;
828 }
829
830 /* Build IR to calculate some particular condition from stored
831 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
832 Ity_Bit. */
mk_x86g_calculate_condition(X86Condcode cond)833 static IRExpr* mk_x86g_calculate_condition ( X86Condcode cond )
834 {
835 IRExpr** args
836 = mkIRExprVec_5( mkU32(cond),
837 IRExpr_Get(OFFB_CC_OP, Ity_I32),
838 IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
839 IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
840 IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
841 IRExpr* call
842 = mkIRExprCCall(
843 Ity_I32,
844 0/*regparm*/,
845 "x86g_calculate_condition", &x86g_calculate_condition,
846 args
847 );
848 /* Exclude the requested condition, OP and NDEP from definedness
849 checking. We're only interested in DEP1 and DEP2. */
850 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<1) | (1<<4);
851 return unop(Iop_32to1, call);
852 }
853
854 /* Build IR to calculate just the carry flag from stored
855 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: Ity_I32. */
mk_x86g_calculate_eflags_c(void)856 static IRExpr* mk_x86g_calculate_eflags_c ( void )
857 {
858 IRExpr** args
859 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I32),
860 IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
861 IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
862 IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
863 IRExpr* call
864 = mkIRExprCCall(
865 Ity_I32,
866 3/*regparm*/,
867 "x86g_calculate_eflags_c", &x86g_calculate_eflags_c,
868 args
869 );
870 /* Exclude OP and NDEP from definedness checking. We're only
871 interested in DEP1 and DEP2. */
872 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
873 return call;
874 }
875
876
877 /* -------------- Building the flags-thunk. -------------- */
878
879 /* The machinery in this section builds the flag-thunk following a
880 flag-setting operation. Hence the various setFlags_* functions.
881 */
882
isAddSub(IROp op8)883 static Bool isAddSub ( IROp op8 )
884 {
885 return toBool(op8 == Iop_Add8 || op8 == Iop_Sub8);
886 }
887
isLogic(IROp op8)888 static Bool isLogic ( IROp op8 )
889 {
890 return toBool(op8 == Iop_And8 || op8 == Iop_Or8 || op8 == Iop_Xor8);
891 }
892
893 /* U-widen 8/16/32 bit int expr to 32. */
widenUto32(IRExpr * e)894 static IRExpr* widenUto32 ( IRExpr* e )
895 {
896 switch (typeOfIRExpr(irsb->tyenv,e)) {
897 case Ity_I32: return e;
898 case Ity_I16: return unop(Iop_16Uto32,e);
899 case Ity_I8: return unop(Iop_8Uto32,e);
900 default: vpanic("widenUto32");
901 }
902 }
903
904 /* S-widen 8/16/32 bit int expr to 32. */
widenSto32(IRExpr * e)905 static IRExpr* widenSto32 ( IRExpr* e )
906 {
907 switch (typeOfIRExpr(irsb->tyenv,e)) {
908 case Ity_I32: return e;
909 case Ity_I16: return unop(Iop_16Sto32,e);
910 case Ity_I8: return unop(Iop_8Sto32,e);
911 default: vpanic("widenSto32");
912 }
913 }
914
915 /* Narrow 8/16/32 bit int expr to 8/16/32. Clearly only some
916 of these combinations make sense. */
narrowTo(IRType dst_ty,IRExpr * e)917 static IRExpr* narrowTo ( IRType dst_ty, IRExpr* e )
918 {
919 IRType src_ty = typeOfIRExpr(irsb->tyenv,e);
920 if (src_ty == dst_ty)
921 return e;
922 if (src_ty == Ity_I32 && dst_ty == Ity_I16)
923 return unop(Iop_32to16, e);
924 if (src_ty == Ity_I32 && dst_ty == Ity_I8)
925 return unop(Iop_32to8, e);
926
927 vex_printf("\nsrc, dst tys are: ");
928 ppIRType(src_ty);
929 vex_printf(", ");
930 ppIRType(dst_ty);
931 vex_printf("\n");
932 vpanic("narrowTo(x86)");
933 }
934
935
936 /* Set the flags thunk OP, DEP1 and DEP2 fields. The supplied op is
937 auto-sized up to the real op. */
938
939 static
setFlags_DEP1_DEP2(IROp op8,IRTemp dep1,IRTemp dep2,IRType ty)940 void setFlags_DEP1_DEP2 ( IROp op8, IRTemp dep1, IRTemp dep2, IRType ty )
941 {
942 Int ccOp = ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1 : 2);
943
944 vassert(ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32);
945
946 switch (op8) {
947 case Iop_Add8: ccOp += X86G_CC_OP_ADDB; break;
948 case Iop_Sub8: ccOp += X86G_CC_OP_SUBB; break;
949 default: ppIROp(op8);
950 vpanic("setFlags_DEP1_DEP2(x86)");
951 }
952 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(ccOp)) );
953 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto32(mkexpr(dep1))) );
954 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto32(mkexpr(dep2))) );
955 /* Set NDEP even though it isn't used. This makes redundant-PUT
956 elimination of previous stores to this field work better. */
957 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
958 }
959
960
961 /* Set the OP and DEP1 fields only, and write zero to DEP2. */
962
963 static
setFlags_DEP1(IROp op8,IRTemp dep1,IRType ty)964 void setFlags_DEP1 ( IROp op8, IRTemp dep1, IRType ty )
965 {
966 Int ccOp = ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1 : 2);
967
968 vassert(ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32);
969
970 switch (op8) {
971 case Iop_Or8:
972 case Iop_And8:
973 case Iop_Xor8: ccOp += X86G_CC_OP_LOGICB; break;
974 default: ppIROp(op8);
975 vpanic("setFlags_DEP1(x86)");
976 }
977 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(ccOp)) );
978 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto32(mkexpr(dep1))) );
979 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0)) );
980 /* Set NDEP even though it isn't used. This makes redundant-PUT
981 elimination of previous stores to this field work better. */
982 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
983 }
984
985
986 /* For shift operations, we put in the result and the undershifted
987 result. Except if the shift amount is zero, the thunk is left
988 unchanged. */
989
setFlags_DEP1_DEP2_shift(IROp op32,IRTemp res,IRTemp resUS,IRType ty,IRTemp guard)990 static void setFlags_DEP1_DEP2_shift ( IROp op32,
991 IRTemp res,
992 IRTemp resUS,
993 IRType ty,
994 IRTemp guard )
995 {
996 Int ccOp = ty==Ity_I8 ? 2 : (ty==Ity_I16 ? 1 : 0);
997
998 vassert(ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32);
999 vassert(guard);
1000
1001 /* Both kinds of right shifts are handled by the same thunk
1002 operation. */
1003 switch (op32) {
1004 case Iop_Shr32:
1005 case Iop_Sar32: ccOp = X86G_CC_OP_SHRL - ccOp; break;
1006 case Iop_Shl32: ccOp = X86G_CC_OP_SHLL - ccOp; break;
1007 default: ppIROp(op32);
1008 vpanic("setFlags_DEP1_DEP2_shift(x86)");
1009 }
1010
1011 /* guard :: Ity_I8. We need to convert it to I1. */
1012 IRTemp guardB = newTemp(Ity_I1);
1013 assign( guardB, binop(Iop_CmpNE8, mkexpr(guard), mkU8(0)) );
1014
1015 /* DEP1 contains the result, DEP2 contains the undershifted value. */
1016 stmt( IRStmt_Put( OFFB_CC_OP,
1017 IRExpr_ITE( mkexpr(guardB),
1018 mkU32(ccOp),
1019 IRExpr_Get(OFFB_CC_OP,Ity_I32) ) ));
1020 stmt( IRStmt_Put( OFFB_CC_DEP1,
1021 IRExpr_ITE( mkexpr(guardB),
1022 widenUto32(mkexpr(res)),
1023 IRExpr_Get(OFFB_CC_DEP1,Ity_I32) ) ));
1024 stmt( IRStmt_Put( OFFB_CC_DEP2,
1025 IRExpr_ITE( mkexpr(guardB),
1026 widenUto32(mkexpr(resUS)),
1027 IRExpr_Get(OFFB_CC_DEP2,Ity_I32) ) ));
1028 /* Set NDEP even though it isn't used. This makes redundant-PUT
1029 elimination of previous stores to this field work better. */
1030 stmt( IRStmt_Put( OFFB_CC_NDEP,
1031 IRExpr_ITE( mkexpr(guardB),
1032 mkU32(0),
1033 IRExpr_Get(OFFB_CC_NDEP,Ity_I32) ) ));
1034 }
1035
1036
1037 /* For the inc/dec case, we store in DEP1 the result value and in NDEP
1038 the former value of the carry flag, which unfortunately we have to
1039 compute. */
1040
setFlags_INC_DEC(Bool inc,IRTemp res,IRType ty)1041 static void setFlags_INC_DEC ( Bool inc, IRTemp res, IRType ty )
1042 {
1043 Int ccOp = inc ? X86G_CC_OP_INCB : X86G_CC_OP_DECB;
1044
1045 ccOp += ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1 : 2);
1046 vassert(ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32);
1047
1048 /* This has to come first, because calculating the C flag
1049 may require reading all four thunk fields. */
1050 stmt( IRStmt_Put( OFFB_CC_NDEP, mk_x86g_calculate_eflags_c()) );
1051 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(ccOp)) );
1052 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto32(mkexpr(res))) );
1053 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0)) );
1054 }
1055
1056
1057 /* Multiplies are pretty much like add and sub: DEP1 and DEP2 hold the
1058 two arguments. */
1059
1060 static
setFlags_MUL(IRType ty,IRTemp arg1,IRTemp arg2,UInt base_op)1061 void setFlags_MUL ( IRType ty, IRTemp arg1, IRTemp arg2, UInt base_op )
1062 {
1063 switch (ty) {
1064 case Ity_I8:
1065 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(base_op+0) ) );
1066 break;
1067 case Ity_I16:
1068 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(base_op+1) ) );
1069 break;
1070 case Ity_I32:
1071 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(base_op+2) ) );
1072 break;
1073 default:
1074 vpanic("setFlags_MUL(x86)");
1075 }
1076 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto32(mkexpr(arg1)) ));
1077 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto32(mkexpr(arg2)) ));
1078 /* Set NDEP even though it isn't used. This makes redundant-PUT
1079 elimination of previous stores to this field work better. */
1080 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
1081 }
1082
1083
1084 /* -------------- Condition codes. -------------- */
1085
1086 /* Condition codes, using the Intel encoding. */
1087
name_X86Condcode(X86Condcode cond)1088 static const HChar* name_X86Condcode ( X86Condcode cond )
1089 {
1090 switch (cond) {
1091 case X86CondO: return "o";
1092 case X86CondNO: return "no";
1093 case X86CondB: return "b";
1094 case X86CondNB: return "nb";
1095 case X86CondZ: return "z";
1096 case X86CondNZ: return "nz";
1097 case X86CondBE: return "be";
1098 case X86CondNBE: return "nbe";
1099 case X86CondS: return "s";
1100 case X86CondNS: return "ns";
1101 case X86CondP: return "p";
1102 case X86CondNP: return "np";
1103 case X86CondL: return "l";
1104 case X86CondNL: return "nl";
1105 case X86CondLE: return "le";
1106 case X86CondNLE: return "nle";
1107 case X86CondAlways: return "ALWAYS";
1108 default: vpanic("name_X86Condcode");
1109 }
1110 }
1111
1112 static
positiveIse_X86Condcode(X86Condcode cond,Bool * needInvert)1113 X86Condcode positiveIse_X86Condcode ( X86Condcode cond,
1114 Bool* needInvert )
1115 {
1116 vassert(cond >= X86CondO && cond <= X86CondNLE);
1117 if (cond & 1) {
1118 *needInvert = True;
1119 return cond-1;
1120 } else {
1121 *needInvert = False;
1122 return cond;
1123 }
1124 }
1125
1126
1127 /* -------------- Helpers for ADD/SUB with carry. -------------- */
1128
1129 /* Given ta1, ta2 and tres, compute tres = ADC(ta1,ta2) and set flags
1130 appropriately.
1131
1132 Optionally, generate a store for the 'tres' value. This can either
1133 be a normal store, or it can be a cas-with-possible-failure style
1134 store:
1135
1136 if taddr is IRTemp_INVALID, then no store is generated.
1137
1138 if taddr is not IRTemp_INVALID, then a store (using taddr as
1139 the address) is generated:
1140
1141 if texpVal is IRTemp_INVALID then a normal store is
1142 generated, and restart_point must be zero (it is irrelevant).
1143
1144 if texpVal is not IRTemp_INVALID then a cas-style store is
1145 generated. texpVal is the expected value, restart_point
1146 is the restart point if the store fails, and texpVal must
1147 have the same type as tres.
1148 */
helper_ADC(Int sz,IRTemp tres,IRTemp ta1,IRTemp ta2,IRTemp taddr,IRTemp texpVal,Addr32 restart_point)1149 static void helper_ADC ( Int sz,
1150 IRTemp tres, IRTemp ta1, IRTemp ta2,
1151 /* info about optional store: */
1152 IRTemp taddr, IRTemp texpVal, Addr32 restart_point )
1153 {
1154 UInt thunkOp;
1155 IRType ty = szToITy(sz);
1156 IRTemp oldc = newTemp(Ity_I32);
1157 IRTemp oldcn = newTemp(ty);
1158 IROp plus = mkSizedOp(ty, Iop_Add8);
1159 IROp xor = mkSizedOp(ty, Iop_Xor8);
1160
1161 vassert(typeOfIRTemp(irsb->tyenv, tres) == ty);
1162 vassert(sz == 1 || sz == 2 || sz == 4);
1163 thunkOp = sz==4 ? X86G_CC_OP_ADCL
1164 : (sz==2 ? X86G_CC_OP_ADCW : X86G_CC_OP_ADCB);
1165
1166 /* oldc = old carry flag, 0 or 1 */
1167 assign( oldc, binop(Iop_And32,
1168 mk_x86g_calculate_eflags_c(),
1169 mkU32(1)) );
1170
1171 assign( oldcn, narrowTo(ty, mkexpr(oldc)) );
1172
1173 assign( tres, binop(plus,
1174 binop(plus,mkexpr(ta1),mkexpr(ta2)),
1175 mkexpr(oldcn)) );
1176
1177 /* Possibly generate a store of 'tres' to 'taddr'. See comment at
1178 start of this function. */
1179 if (taddr != IRTemp_INVALID) {
1180 if (texpVal == IRTemp_INVALID) {
1181 vassert(restart_point == 0);
1182 storeLE( mkexpr(taddr), mkexpr(tres) );
1183 } else {
1184 vassert(typeOfIRTemp(irsb->tyenv, texpVal) == ty);
1185 /* .. and hence 'texpVal' has the same type as 'tres'. */
1186 casLE( mkexpr(taddr),
1187 mkexpr(texpVal), mkexpr(tres), restart_point );
1188 }
1189 }
1190
1191 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(thunkOp) ) );
1192 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto32(mkexpr(ta1)) ));
1193 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto32(binop(xor, mkexpr(ta2),
1194 mkexpr(oldcn)) )) );
1195 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(oldc) ) );
1196 }
1197
1198
1199 /* Given ta1, ta2 and tres, compute tres = SBB(ta1,ta2) and set flags
1200 appropriately. As with helper_ADC, possibly generate a store of
1201 the result -- see comments on helper_ADC for details.
1202 */
helper_SBB(Int sz,IRTemp tres,IRTemp ta1,IRTemp ta2,IRTemp taddr,IRTemp texpVal,Addr32 restart_point)1203 static void helper_SBB ( Int sz,
1204 IRTemp tres, IRTemp ta1, IRTemp ta2,
1205 /* info about optional store: */
1206 IRTemp taddr, IRTemp texpVal, Addr32 restart_point )
1207 {
1208 UInt thunkOp;
1209 IRType ty = szToITy(sz);
1210 IRTemp oldc = newTemp(Ity_I32);
1211 IRTemp oldcn = newTemp(ty);
1212 IROp minus = mkSizedOp(ty, Iop_Sub8);
1213 IROp xor = mkSizedOp(ty, Iop_Xor8);
1214
1215 vassert(typeOfIRTemp(irsb->tyenv, tres) == ty);
1216 vassert(sz == 1 || sz == 2 || sz == 4);
1217 thunkOp = sz==4 ? X86G_CC_OP_SBBL
1218 : (sz==2 ? X86G_CC_OP_SBBW : X86G_CC_OP_SBBB);
1219
1220 /* oldc = old carry flag, 0 or 1 */
1221 assign( oldc, binop(Iop_And32,
1222 mk_x86g_calculate_eflags_c(),
1223 mkU32(1)) );
1224
1225 assign( oldcn, narrowTo(ty, mkexpr(oldc)) );
1226
1227 assign( tres, binop(minus,
1228 binop(minus,mkexpr(ta1),mkexpr(ta2)),
1229 mkexpr(oldcn)) );
1230
1231 /* Possibly generate a store of 'tres' to 'taddr'. See comment at
1232 start of this function. */
1233 if (taddr != IRTemp_INVALID) {
1234 if (texpVal == IRTemp_INVALID) {
1235 vassert(restart_point == 0);
1236 storeLE( mkexpr(taddr), mkexpr(tres) );
1237 } else {
1238 vassert(typeOfIRTemp(irsb->tyenv, texpVal) == ty);
1239 /* .. and hence 'texpVal' has the same type as 'tres'. */
1240 casLE( mkexpr(taddr),
1241 mkexpr(texpVal), mkexpr(tres), restart_point );
1242 }
1243 }
1244
1245 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(thunkOp) ) );
1246 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto32(mkexpr(ta1) )) );
1247 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto32(binop(xor, mkexpr(ta2),
1248 mkexpr(oldcn)) )) );
1249 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(oldc) ) );
1250 }
1251
1252
1253 /* -------------- Helpers for disassembly printing. -------------- */
1254
nameGrp1(Int opc_aux)1255 static const HChar* nameGrp1 ( Int opc_aux )
1256 {
1257 static const HChar* grp1_names[8]
1258 = { "add", "or", "adc", "sbb", "and", "sub", "xor", "cmp" };
1259 if (opc_aux < 0 || opc_aux > 7) vpanic("nameGrp1(x86)");
1260 return grp1_names[opc_aux];
1261 }
1262
nameGrp2(Int opc_aux)1263 static const HChar* nameGrp2 ( Int opc_aux )
1264 {
1265 static const HChar* grp2_names[8]
1266 = { "rol", "ror", "rcl", "rcr", "shl", "shr", "shl", "sar" };
1267 if (opc_aux < 0 || opc_aux > 7) vpanic("nameGrp2(x86)");
1268 return grp2_names[opc_aux];
1269 }
1270
nameGrp4(Int opc_aux)1271 static const HChar* nameGrp4 ( Int opc_aux )
1272 {
1273 static const HChar* grp4_names[8]
1274 = { "inc", "dec", "???", "???", "???", "???", "???", "???" };
1275 if (opc_aux < 0 || opc_aux > 1) vpanic("nameGrp4(x86)");
1276 return grp4_names[opc_aux];
1277 }
1278
nameGrp5(Int opc_aux)1279 static const HChar* nameGrp5 ( Int opc_aux )
1280 {
1281 static const HChar* grp5_names[8]
1282 = { "inc", "dec", "call*", "call*", "jmp*", "jmp*", "push", "???" };
1283 if (opc_aux < 0 || opc_aux > 6) vpanic("nameGrp5(x86)");
1284 return grp5_names[opc_aux];
1285 }
1286
nameGrp8(Int opc_aux)1287 static const HChar* nameGrp8 ( Int opc_aux )
1288 {
1289 static const HChar* grp8_names[8]
1290 = { "???", "???", "???", "???", "bt", "bts", "btr", "btc" };
1291 if (opc_aux < 4 || opc_aux > 7) vpanic("nameGrp8(x86)");
1292 return grp8_names[opc_aux];
1293 }
1294
nameIReg(Int size,Int reg)1295 static const HChar* nameIReg ( Int size, Int reg )
1296 {
1297 static const HChar* ireg32_names[8]
1298 = { "%eax", "%ecx", "%edx", "%ebx",
1299 "%esp", "%ebp", "%esi", "%edi" };
1300 static const HChar* ireg16_names[8]
1301 = { "%ax", "%cx", "%dx", "%bx", "%sp", "%bp", "%si", "%di" };
1302 static const HChar* ireg8_names[8]
1303 = { "%al", "%cl", "%dl", "%bl",
1304 "%ah{sp}", "%ch{bp}", "%dh{si}", "%bh{di}" };
1305 if (reg < 0 || reg > 7) goto bad;
1306 switch (size) {
1307 case 4: return ireg32_names[reg];
1308 case 2: return ireg16_names[reg];
1309 case 1: return ireg8_names[reg];
1310 }
1311 bad:
1312 vpanic("nameIReg(X86)");
1313 return NULL; /*notreached*/
1314 }
1315
nameSReg(UInt sreg)1316 static const HChar* nameSReg ( UInt sreg )
1317 {
1318 switch (sreg) {
1319 case R_ES: return "%es";
1320 case R_CS: return "%cs";
1321 case R_SS: return "%ss";
1322 case R_DS: return "%ds";
1323 case R_FS: return "%fs";
1324 case R_GS: return "%gs";
1325 default: vpanic("nameSReg(x86)");
1326 }
1327 }
1328
nameMMXReg(Int mmxreg)1329 static const HChar* nameMMXReg ( Int mmxreg )
1330 {
1331 static const HChar* mmx_names[8]
1332 = { "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7" };
1333 if (mmxreg < 0 || mmxreg > 7) vpanic("nameMMXReg(x86,guest)");
1334 return mmx_names[mmxreg];
1335 }
1336
nameXMMReg(Int xmmreg)1337 static const HChar* nameXMMReg ( Int xmmreg )
1338 {
1339 static const HChar* xmm_names[8]
1340 = { "%xmm0", "%xmm1", "%xmm2", "%xmm3",
1341 "%xmm4", "%xmm5", "%xmm6", "%xmm7" };
1342 if (xmmreg < 0 || xmmreg > 7) vpanic("name_of_xmm_reg");
1343 return xmm_names[xmmreg];
1344 }
1345
nameMMXGran(Int gran)1346 static const HChar* nameMMXGran ( Int gran )
1347 {
1348 switch (gran) {
1349 case 0: return "b";
1350 case 1: return "w";
1351 case 2: return "d";
1352 case 3: return "q";
1353 default: vpanic("nameMMXGran(x86,guest)");
1354 }
1355 }
1356
nameISize(Int size)1357 static HChar nameISize ( Int size )
1358 {
1359 switch (size) {
1360 case 4: return 'l';
1361 case 2: return 'w';
1362 case 1: return 'b';
1363 default: vpanic("nameISize(x86)");
1364 }
1365 }
1366
1367
1368 /*------------------------------------------------------------*/
1369 /*--- JMP helpers ---*/
1370 /*------------------------------------------------------------*/
1371
jmp_lit(DisResult * dres,IRJumpKind kind,Addr32 d32)1372 static void jmp_lit( /*MOD*/DisResult* dres,
1373 IRJumpKind kind, Addr32 d32 )
1374 {
1375 vassert(dres->whatNext == Dis_Continue);
1376 vassert(dres->len == 0);
1377 vassert(dres->continueAt == 0);
1378 vassert(dres->jk_StopHere == Ijk_INVALID);
1379 dres->whatNext = Dis_StopHere;
1380 dres->jk_StopHere = kind;
1381 stmt( IRStmt_Put( OFFB_EIP, mkU32(d32) ) );
1382 }
1383
jmp_treg(DisResult * dres,IRJumpKind kind,IRTemp t)1384 static void jmp_treg( /*MOD*/DisResult* dres,
1385 IRJumpKind kind, IRTemp t )
1386 {
1387 vassert(dres->whatNext == Dis_Continue);
1388 vassert(dres->len == 0);
1389 vassert(dres->continueAt == 0);
1390 vassert(dres->jk_StopHere == Ijk_INVALID);
1391 dres->whatNext = Dis_StopHere;
1392 dres->jk_StopHere = kind;
1393 stmt( IRStmt_Put( OFFB_EIP, mkexpr(t) ) );
1394 }
1395
1396 static
jcc_01(DisResult * dres,X86Condcode cond,Addr32 d32_false,Addr32 d32_true)1397 void jcc_01( /*MOD*/DisResult* dres,
1398 X86Condcode cond, Addr32 d32_false, Addr32 d32_true )
1399 {
1400 Bool invert;
1401 X86Condcode condPos;
1402 vassert(dres->whatNext == Dis_Continue);
1403 vassert(dres->len == 0);
1404 vassert(dres->continueAt == 0);
1405 vassert(dres->jk_StopHere == Ijk_INVALID);
1406 dres->whatNext = Dis_StopHere;
1407 dres->jk_StopHere = Ijk_Boring;
1408 condPos = positiveIse_X86Condcode ( cond, &invert );
1409 if (invert) {
1410 stmt( IRStmt_Exit( mk_x86g_calculate_condition(condPos),
1411 Ijk_Boring,
1412 IRConst_U32(d32_false),
1413 OFFB_EIP ) );
1414 stmt( IRStmt_Put( OFFB_EIP, mkU32(d32_true) ) );
1415 } else {
1416 stmt( IRStmt_Exit( mk_x86g_calculate_condition(condPos),
1417 Ijk_Boring,
1418 IRConst_U32(d32_true),
1419 OFFB_EIP ) );
1420 stmt( IRStmt_Put( OFFB_EIP, mkU32(d32_false) ) );
1421 }
1422 }
1423
1424
1425 /*------------------------------------------------------------*/
1426 /*--- Disassembling addressing modes ---*/
1427 /*------------------------------------------------------------*/
1428
1429 static
sorbTxt(UChar sorb)1430 const HChar* sorbTxt ( UChar sorb )
1431 {
1432 switch (sorb) {
1433 case 0: return ""; /* no override */
1434 case 0x3E: return "%ds";
1435 case 0x26: return "%es:";
1436 case 0x64: return "%fs:";
1437 case 0x65: return "%gs:";
1438 case 0x2e: return "%cs:";
1439 case 0x36: return "%ss:";
1440 default: vpanic("sorbTxt(x86,guest)");
1441 }
1442 }
1443
1444
1445 static
handleSegOverrideAux(IRTemp seg_selector,IRExpr * virtual)1446 IRExpr* handleSegOverrideAux ( IRTemp seg_selector, IRExpr* virtual )
1447 {
1448 IRTemp ldt_ptr, gdt_ptr, r64;
1449
1450 ldt_ptr = newTemp(Ity_I64);
1451 gdt_ptr = newTemp(Ity_I64);
1452 r64 = newTemp(Ity_I64);
1453
1454 assign( ldt_ptr, IRExpr_Get( OFFB_LDT, Ity_I64 ));
1455 assign( gdt_ptr, IRExpr_Get( OFFB_GDT, Ity_I64 ));
1456
1457 /*
1458 Call this to do the translation and limit checks:
1459 ULong x86g_use_seg_selector ( HWord ldt, HWord gdt,
1460 UInt seg_selector, UInt virtual_addr )
1461 */
1462 assign(
1463 r64,
1464 mkIRExprCCall(
1465 Ity_I64,
1466 0/*regparms*/,
1467 "x86g_use_seg_selector",
1468 &x86g_use_seg_selector,
1469 mkIRExprVec_4( mkexpr(ldt_ptr), mkexpr(gdt_ptr),
1470 mkexpr(seg_selector), virtual)
1471 )
1472 );
1473
1474 /* If the high 32 of the result are non-zero, there was a
1475 failure in address translation. In which case, make a
1476 quick exit.
1477 */
1478 stmt(
1479 IRStmt_Exit(
1480 binop(Iop_CmpNE32, unop(Iop_64HIto32, mkexpr(r64)), mkU32(0)),
1481 Ijk_MapFail,
1482 IRConst_U32( guest_EIP_curr_instr ),
1483 OFFB_EIP
1484 )
1485 );
1486
1487 /* otherwise, here's the translated result. */
1488 return unop(Iop_64to32, mkexpr(r64));
1489 }
1490
1491 /* 'virtual' is an IRExpr* holding a virtual address. Convert it to a
1492 linear address by adding any required segment override as indicated
1493 by sorb. */
1494 static
handleSegOverride(UChar sorb,IRExpr * virtual)1495 IRExpr* handleSegOverride ( UChar sorb, IRExpr* virtual )
1496 {
1497 Int sreg;
1498 IRTemp seg_selector;
1499
1500 if (sorb == 0)
1501 /* the common case - no override */
1502 return virtual;
1503
1504 switch (sorb) {
1505 case 0x3E: sreg = R_DS; break;
1506 case 0x26: sreg = R_ES; break;
1507 case 0x64: sreg = R_FS; break;
1508 case 0x65: sreg = R_GS; break;
1509 case 0x2E: sreg = R_CS; break;
1510 case 0x36: sreg = R_SS; break;
1511 default: vpanic("handleSegOverride(x86,guest)");
1512 }
1513
1514
1515 seg_selector = newTemp(Ity_I32);
1516 assign( seg_selector, unop(Iop_16Uto32, getSReg(sreg)) );
1517
1518 return handleSegOverrideAux(seg_selector, virtual);
1519 }
1520
1521
1522 /* Generate IR to calculate an address indicated by a ModRM and
1523 following SIB bytes. The expression, and the number of bytes in
1524 the address mode, are returned. Note that this fn should not be
1525 called if the R/M part of the address denotes a register instead of
1526 memory. If print_codegen is true, text of the addressing mode is
1527 placed in buf.
1528
1529 The computed address is stored in a new tempreg, and the
1530 identity of the tempreg is returned. */
1531
disAMode_copy2tmp(IRExpr * addr)1532 static IRTemp disAMode_copy2tmp ( IRExpr* addr )
1533 {
1534 IRTemp tmp = newTemp(Ity_I32);
1535 IRTemp halfsize_tmp = IRTemp_INVALID;
1536
1537 if (current_sz_addr == 4) {
1538 assign( tmp, addr );
1539 } else {
1540 halfsize_tmp = newTemp(Ity_I16);
1541 assign(halfsize_tmp, addr);
1542 assign(tmp, unop(Iop_16Uto32, mkexpr(halfsize_tmp)));
1543 }
1544 return tmp;
1545 }
1546
1547 static
disAMode32(Int * len,UChar sorb,Int delta,HChar * buf)1548 IRTemp disAMode32 ( Int* len, UChar sorb, Int delta, HChar* buf )
1549 {
1550 UChar mod_reg_rm = getIByte(delta);
1551 delta++;
1552
1553 buf[0] = (UChar)0;
1554
1555 /* squeeze out the reg field from mod_reg_rm, since a 256-entry
1556 jump table seems a bit excessive.
1557 */
1558 mod_reg_rm &= 0xC7; /* is now XX000YYY */
1559 mod_reg_rm = toUChar(mod_reg_rm | (mod_reg_rm >> 3));
1560 /* is now XX0XXYYY */
1561 mod_reg_rm &= 0x1F; /* is now 000XXYYY */
1562 switch (mod_reg_rm) {
1563
1564 /* (%eax) .. (%edi), not including (%esp) or (%ebp).
1565 --> GET %reg, t
1566 */
1567 case 0x00: case 0x01: case 0x02: case 0x03:
1568 /* ! 04 */ /* ! 05 */ case 0x06: case 0x07:
1569 { UChar rm = mod_reg_rm;
1570 DIS(buf, "%s(%s)", sorbTxt(sorb), nameIReg(4,rm));
1571 *len = 1;
1572 return disAMode_copy2tmp(
1573 handleSegOverride(sorb, getIReg(4,rm)));
1574 }
1575
1576 /* d8(%eax) ... d8(%edi), not including d8(%esp)
1577 --> GET %reg, t ; ADDL d8, t
1578 */
1579 case 0x08: case 0x09: case 0x0A: case 0x0B:
1580 /* ! 0C */ case 0x0D: case 0x0E: case 0x0F:
1581 { UChar rm = toUChar(mod_reg_rm & 7);
1582 UInt d = getSDisp8(delta);
1583 DIS(buf, "%s%d(%s)", sorbTxt(sorb), (Int)d, nameIReg(4,rm));
1584 *len = 2;
1585 return disAMode_copy2tmp(
1586 handleSegOverride(sorb,
1587 binop(Iop_Add32,getIReg(4,rm),mkU32(d))));
1588 }
1589
1590 /* d32(%eax) ... d32(%edi), not including d32(%esp)
1591 --> GET %reg, t ; ADDL d8, t
1592 */
1593 case 0x10: case 0x11: case 0x12: case 0x13:
1594 /* ! 14 */ case 0x15: case 0x16: case 0x17:
1595 { UChar rm = toUChar(mod_reg_rm & 7);
1596 UInt d = getUDisp32(delta);
1597 DIS(buf, "%s0x%x(%s)", sorbTxt(sorb), d, nameIReg(4,rm));
1598 *len = 5;
1599 return disAMode_copy2tmp(
1600 handleSegOverride(sorb,
1601 binop(Iop_Add32,getIReg(4,rm),mkU32(d))));
1602 }
1603
1604 /* a register, %eax .. %edi. This shouldn't happen. */
1605 case 0x18: case 0x19: case 0x1A: case 0x1B:
1606 case 0x1C: case 0x1D: case 0x1E: case 0x1F:
1607 vpanic("disAMode(x86): not an addr!");
1608
1609 /* a 32-bit literal address
1610 --> MOV d32, tmp
1611 */
1612 case 0x05:
1613 { UInt d = getUDisp32(delta);
1614 *len = 5;
1615 DIS(buf, "%s(0x%x)", sorbTxt(sorb), d);
1616 return disAMode_copy2tmp(
1617 handleSegOverride(sorb, mkU32(d)));
1618 }
1619
1620 case 0x04: {
1621 /* SIB, with no displacement. Special cases:
1622 -- %esp cannot act as an index value.
1623 If index_r indicates %esp, zero is used for the index.
1624 -- when mod is zero and base indicates EBP, base is instead
1625 a 32-bit literal.
1626 It's all madness, I tell you. Extract %index, %base and
1627 scale from the SIB byte. The value denoted is then:
1628 | %index == %ESP && %base == %EBP
1629 = d32 following SIB byte
1630 | %index == %ESP && %base != %EBP
1631 = %base
1632 | %index != %ESP && %base == %EBP
1633 = d32 following SIB byte + (%index << scale)
1634 | %index != %ESP && %base != %ESP
1635 = %base + (%index << scale)
1636
1637 What happens to the souls of CPU architects who dream up such
1638 horrendous schemes, do you suppose?
1639 */
1640 UChar sib = getIByte(delta);
1641 UChar scale = toUChar((sib >> 6) & 3);
1642 UChar index_r = toUChar((sib >> 3) & 7);
1643 UChar base_r = toUChar(sib & 7);
1644 delta++;
1645
1646 if (index_r != R_ESP && base_r != R_EBP) {
1647 DIS(buf, "%s(%s,%s,%d)", sorbTxt(sorb),
1648 nameIReg(4,base_r), nameIReg(4,index_r), 1<<scale);
1649 *len = 2;
1650 return
1651 disAMode_copy2tmp(
1652 handleSegOverride(sorb,
1653 binop(Iop_Add32,
1654 getIReg(4,base_r),
1655 binop(Iop_Shl32, getIReg(4,index_r),
1656 mkU8(scale)))));
1657 }
1658
1659 if (index_r != R_ESP && base_r == R_EBP) {
1660 UInt d = getUDisp32(delta);
1661 DIS(buf, "%s0x%x(,%s,%d)", sorbTxt(sorb), d,
1662 nameIReg(4,index_r), 1<<scale);
1663 *len = 6;
1664 return
1665 disAMode_copy2tmp(
1666 handleSegOverride(sorb,
1667 binop(Iop_Add32,
1668 binop(Iop_Shl32, getIReg(4,index_r), mkU8(scale)),
1669 mkU32(d))));
1670 }
1671
1672 if (index_r == R_ESP && base_r != R_EBP) {
1673 DIS(buf, "%s(%s,,)", sorbTxt(sorb), nameIReg(4,base_r));
1674 *len = 2;
1675 return disAMode_copy2tmp(
1676 handleSegOverride(sorb, getIReg(4,base_r)));
1677 }
1678
1679 if (index_r == R_ESP && base_r == R_EBP) {
1680 UInt d = getUDisp32(delta);
1681 DIS(buf, "%s0x%x(,,)", sorbTxt(sorb), d);
1682 *len = 6;
1683 return disAMode_copy2tmp(
1684 handleSegOverride(sorb, mkU32(d)));
1685 }
1686 /*NOTREACHED*/
1687 vassert(0);
1688 }
1689
1690 /* SIB, with 8-bit displacement. Special cases:
1691 -- %esp cannot act as an index value.
1692 If index_r indicates %esp, zero is used for the index.
1693 Denoted value is:
1694 | %index == %ESP
1695 = d8 + %base
1696 | %index != %ESP
1697 = d8 + %base + (%index << scale)
1698 */
1699 case 0x0C: {
1700 UChar sib = getIByte(delta);
1701 UChar scale = toUChar((sib >> 6) & 3);
1702 UChar index_r = toUChar((sib >> 3) & 7);
1703 UChar base_r = toUChar(sib & 7);
1704 UInt d = getSDisp8(delta+1);
1705
1706 if (index_r == R_ESP) {
1707 DIS(buf, "%s%d(%s,,)", sorbTxt(sorb),
1708 (Int)d, nameIReg(4,base_r));
1709 *len = 3;
1710 return disAMode_copy2tmp(
1711 handleSegOverride(sorb,
1712 binop(Iop_Add32, getIReg(4,base_r), mkU32(d)) ));
1713 } else {
1714 DIS(buf, "%s%d(%s,%s,%d)", sorbTxt(sorb), (Int)d,
1715 nameIReg(4,base_r), nameIReg(4,index_r), 1<<scale);
1716 *len = 3;
1717 return
1718 disAMode_copy2tmp(
1719 handleSegOverride(sorb,
1720 binop(Iop_Add32,
1721 binop(Iop_Add32,
1722 getIReg(4,base_r),
1723 binop(Iop_Shl32,
1724 getIReg(4,index_r), mkU8(scale))),
1725 mkU32(d))));
1726 }
1727 /*NOTREACHED*/
1728 vassert(0);
1729 }
1730
1731 /* SIB, with 32-bit displacement. Special cases:
1732 -- %esp cannot act as an index value.
1733 If index_r indicates %esp, zero is used for the index.
1734 Denoted value is:
1735 | %index == %ESP
1736 = d32 + %base
1737 | %index != %ESP
1738 = d32 + %base + (%index << scale)
1739 */
1740 case 0x14: {
1741 UChar sib = getIByte(delta);
1742 UChar scale = toUChar((sib >> 6) & 3);
1743 UChar index_r = toUChar((sib >> 3) & 7);
1744 UChar base_r = toUChar(sib & 7);
1745 UInt d = getUDisp32(delta+1);
1746
1747 if (index_r == R_ESP) {
1748 DIS(buf, "%s%d(%s,,)", sorbTxt(sorb),
1749 (Int)d, nameIReg(4,base_r));
1750 *len = 6;
1751 return disAMode_copy2tmp(
1752 handleSegOverride(sorb,
1753 binop(Iop_Add32, getIReg(4,base_r), mkU32(d)) ));
1754 } else {
1755 DIS(buf, "%s%d(%s,%s,%d)", sorbTxt(sorb), (Int)d,
1756 nameIReg(4,base_r), nameIReg(4,index_r), 1<<scale);
1757 *len = 6;
1758 return
1759 disAMode_copy2tmp(
1760 handleSegOverride(sorb,
1761 binop(Iop_Add32,
1762 binop(Iop_Add32,
1763 getIReg(4,base_r),
1764 binop(Iop_Shl32,
1765 getIReg(4,index_r), mkU8(scale))),
1766 mkU32(d))));
1767 }
1768 /*NOTREACHED*/
1769 vassert(0);
1770 }
1771
1772 default:
1773 vpanic("disAMode(x86)");
1774 return 0; /*notreached*/
1775 }
1776 }
1777
1778 static
disAMode16(Int * len,UChar sorb,Int delta,HChar * buf)1779 IRTemp disAMode16 ( Int* len, UChar sorb, Int delta, HChar* buf )
1780 {
1781 UChar mod_reg_rm = getIByte(delta);
1782 delta++;
1783
1784 buf[0] = (UChar)0;
1785
1786 /* squeeze out the reg field from mod_reg_rm, since a 256-entry
1787 jump table seems a bit excessive.
1788 */
1789 mod_reg_rm &= 0xC7; /* is now XX000YYY */
1790 mod_reg_rm = toUChar(mod_reg_rm | (mod_reg_rm >> 3));
1791 /* is now XX0XXYYY */
1792 mod_reg_rm &= 0x1F; /* is now 000XXYYY */
1793 switch (mod_reg_rm) {
1794
1795 case 0x00: case 0x01: case 0x02: case 0x03:
1796 vpanic("TODO disAMode16 1");
1797 break;
1798
1799 case 0x04: case 0x05: case 0x07:
1800 { UChar rm = mod_reg_rm;
1801 *len = 1;
1802 return disAMode_copy2tmp(
1803 handleSegOverride(sorb, getIReg(2,rm)));
1804 }
1805
1806 case 0x08: case 0x09: case 0x0a: case 0x0b:
1807 vpanic("TODO disAMode16 2");
1808 break;
1809
1810 case 0x0C: case 0x0D: case 0x0E: case 0x0F:
1811 { UChar rm = toUChar(mod_reg_rm & 7);
1812 UInt d = getSDisp8(delta);
1813 DIS(buf, "%s%d(%s)", sorbTxt(sorb), (Int)d, nameIReg(2,rm));
1814 *len = 2;
1815 return disAMode_copy2tmp(
1816 handleSegOverride(sorb,
1817 binop(Iop_Add16,getIReg(2,rm),mkU16(d))));
1818 }
1819
1820 case 0x14: case 0x15: case 0x16: case 0x17:
1821 { UChar rm = toUChar(mod_reg_rm & 7);
1822 UInt d = getUDisp16(delta);
1823 DIS(buf, "%s0x%x(%s)", sorbTxt(sorb), (Int)d, nameIReg(2,rm));
1824 *len = 3;
1825 return disAMode_copy2tmp(
1826 handleSegOverride(sorb,
1827 binop(Iop_Add16,getIReg(2,rm),mkU16(d))));
1828 }
1829
1830 /* This shouldn't happen. */
1831 case 0x18: case 0x19: case 0x1A: case 0x1B:
1832 case 0x1C: case 0x1D: case 0x1E: case 0x1F:
1833 vpanic("disAMode(x86): not an addr!");
1834
1835 case 0x06:
1836 { UInt d = getUDisp16(delta);
1837 *len = 3;
1838 DIS(buf, "%s(0x%x)", sorbTxt(sorb), d);
1839 return disAMode_copy2tmp(
1840 handleSegOverride(sorb, mkU16(d)));
1841 }
1842
1843
1844 default:
1845 vpanic("disAMode(x86)");
1846 return 0; /*notreached*/
1847 }
1848 }
1849
1850 static
disAMode(Int * len,UChar sorb,Int delta,HChar * buf)1851 IRTemp disAMode ( Int* len, UChar sorb, Int delta, HChar* buf ) {
1852 if (current_sz_addr == 4) {
1853 return disAMode32(len, sorb, delta, buf);
1854 } else {
1855 return disAMode16(len, sorb, delta, buf);
1856 }
1857 }
1858
1859 /* Figure out the number of (insn-stream) bytes constituting the amode
1860 beginning at delta. Is useful for getting hold of literals beyond
1861 the end of the amode before it has been disassembled. */
1862
lengthAMode32(Int delta)1863 static UInt lengthAMode32 ( Int delta )
1864 {
1865 UChar mod_reg_rm = getIByte(delta); delta++;
1866
1867 /* squeeze out the reg field from mod_reg_rm, since a 256-entry
1868 jump table seems a bit excessive.
1869 */
1870 mod_reg_rm &= 0xC7; /* is now XX000YYY */
1871 mod_reg_rm = toUChar(mod_reg_rm | (mod_reg_rm >> 3));
1872 /* is now XX0XXYYY */
1873 mod_reg_rm &= 0x1F; /* is now 000XXYYY */
1874 switch (mod_reg_rm) {
1875
1876 /* (%eax) .. (%edi), not including (%esp) or (%ebp). */
1877 case 0x00: case 0x01: case 0x02: case 0x03:
1878 /* ! 04 */ /* ! 05 */ case 0x06: case 0x07:
1879 return 1;
1880
1881 /* d8(%eax) ... d8(%edi), not including d8(%esp). */
1882 case 0x08: case 0x09: case 0x0A: case 0x0B:
1883 /* ! 0C */ case 0x0D: case 0x0E: case 0x0F:
1884 return 2;
1885
1886 /* d32(%eax) ... d32(%edi), not including d32(%esp). */
1887 case 0x10: case 0x11: case 0x12: case 0x13:
1888 /* ! 14 */ case 0x15: case 0x16: case 0x17:
1889 return 5;
1890
1891 /* a register, %eax .. %edi. (Not an addr, but still handled.) */
1892 case 0x18: case 0x19: case 0x1A: case 0x1B:
1893 case 0x1C: case 0x1D: case 0x1E: case 0x1F:
1894 return 1;
1895
1896 /* a 32-bit literal address. */
1897 case 0x05: return 5;
1898
1899 /* SIB, no displacement. */
1900 case 0x04: {
1901 UChar sib = getIByte(delta);
1902 UChar base_r = toUChar(sib & 7);
1903 if (base_r == R_EBP) return 6; else return 2;
1904 }
1905 /* SIB, with 8-bit displacement. */
1906 case 0x0C: return 3;
1907
1908 /* SIB, with 32-bit displacement. */
1909 case 0x14: return 6;
1910
1911 default:
1912 vpanic("lengthAMode");
1913 return 0; /*notreached*/
1914 }
1915 }
1916
lengthAMode16(Int delta)1917 static UInt lengthAMode16 ( Int delta )
1918 {
1919 UChar mod_reg_rm = getIByte(delta); delta++;
1920
1921 /* squeeze out the reg field from mod_reg_rm, since a 256-entry
1922 jump table seems a bit excessive.
1923 */
1924 mod_reg_rm &= 0xC7; /* is now XX000YYY */
1925 mod_reg_rm = toUChar(mod_reg_rm | (mod_reg_rm >> 3));
1926 /* is now XX0XXYYY */
1927 mod_reg_rm &= 0x1F; /* is now 000XXYYY */
1928 switch (mod_reg_rm) {
1929
1930 case 0x04: case 0x05: case 0x07:
1931 case 0x18: case 0x19: case 0x1A: case 0x1B:
1932 case 0x1C: case 0x1D: case 0x1E: case 0x1F:
1933 return 1;
1934 case 0x00: case 0x01: case 0x02: case 0x03: case 0x06:
1935 return 2;
1936 case 0x08: case 0x09: case 0x0a: case 0x0b:
1937 case 0x0c: case 0x0d: case 0x0e: case 0x0f:
1938 case 0x14: case 0x15: case 0x16: case 0x17:
1939 return 3;
1940 case 0x10: case 0x11: case 0x12: case 0x13:
1941 return 4;
1942 default:
1943 vpanic("lengthAMode16");
1944 return 0; /*notreached*/
1945 }
1946 }
1947
lengthAMode(Int delta)1948 static UInt lengthAMode ( Int delta )
1949 {
1950 if (protected_mode) {
1951 return lengthAMode32(delta);
1952 } else {
1953 return lengthAMode16(delta);
1954 }
1955 }
1956
1957 /*------------------------------------------------------------*/
1958 /*--- Disassembling common idioms ---*/
1959 /*------------------------------------------------------------*/
1960
1961 /* Handle binary integer instructions of the form
1962 op E, G meaning
1963 op reg-or-mem, reg
1964 Is passed the a ptr to the modRM byte, the actual operation, and the
1965 data size. Returns the address advanced completely over this
1966 instruction.
1967
1968 E(src) is reg-or-mem
1969 G(dst) is reg.
1970
1971 If E is reg, --> GET %G, tmp
1972 OP %E, tmp
1973 PUT tmp, %G
1974
1975 If E is mem and OP is not reversible,
1976 --> (getAddr E) -> tmpa
1977 LD (tmpa), tmpa
1978 GET %G, tmp2
1979 OP tmpa, tmp2
1980 PUT tmp2, %G
1981
1982 If E is mem and OP is reversible
1983 --> (getAddr E) -> tmpa
1984 LD (tmpa), tmpa
1985 OP %G, tmpa
1986 PUT tmpa, %G
1987 */
1988 static
dis_op2_E_G(UChar sorb,Bool addSubCarry,IROp op8,Bool keep,Int size,Int delta0,const HChar * t_x86opc)1989 UInt dis_op2_E_G ( UChar sorb,
1990 Bool addSubCarry,
1991 IROp op8,
1992 Bool keep,
1993 Int size,
1994 Int delta0,
1995 const HChar* t_x86opc )
1996 {
1997 HChar dis_buf[50];
1998 Int len;
1999 IRType ty = szToITy(size);
2000 IRTemp dst1 = newTemp(ty);
2001 IRTemp src = newTemp(ty);
2002 IRTemp dst0 = newTemp(ty);
2003 UChar rm = getUChar(delta0);
2004 IRTemp addr = IRTemp_INVALID;
2005
2006 /* addSubCarry == True indicates the intended operation is
2007 add-with-carry or subtract-with-borrow. */
2008 if (addSubCarry) {
2009 vassert(op8 == Iop_Add8 || op8 == Iop_Sub8);
2010 vassert(keep);
2011 }
2012
2013 if (epartIsReg(rm)) {
2014 /* Specially handle XOR reg,reg, because that doesn't really
2015 depend on reg, and doing the obvious thing potentially
2016 generates a spurious value check failure due to the bogus
2017 dependency. Ditto SBB reg,reg. */
2018 if ((op8 == Iop_Xor8 || (op8 == Iop_Sub8 && addSubCarry))
2019 && gregOfRM(rm) == eregOfRM(rm)) {
2020 putIReg(size, gregOfRM(rm), mkU(ty,0));
2021 }
2022 assign( dst0, getIReg(size,gregOfRM(rm)) );
2023 assign( src, getIReg(size,eregOfRM(rm)) );
2024
2025 if (addSubCarry && op8 == Iop_Add8) {
2026 helper_ADC( size, dst1, dst0, src,
2027 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
2028 putIReg(size, gregOfRM(rm), mkexpr(dst1));
2029 } else
2030 if (addSubCarry && op8 == Iop_Sub8) {
2031 helper_SBB( size, dst1, dst0, src,
2032 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
2033 putIReg(size, gregOfRM(rm), mkexpr(dst1));
2034 } else {
2035 assign( dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
2036 if (isAddSub(op8))
2037 setFlags_DEP1_DEP2(op8, dst0, src, ty);
2038 else
2039 setFlags_DEP1(op8, dst1, ty);
2040 if (keep)
2041 putIReg(size, gregOfRM(rm), mkexpr(dst1));
2042 }
2043
2044 DIP("%s%c %s,%s\n", t_x86opc, nameISize(size),
2045 nameIReg(size,eregOfRM(rm)),
2046 nameIReg(size,gregOfRM(rm)));
2047 return 1+delta0;
2048 } else {
2049 /* E refers to memory */
2050 addr = disAMode ( &len, sorb, delta0, dis_buf);
2051 assign( dst0, getIReg(size,gregOfRM(rm)) );
2052 assign( src, loadLE(szToITy(size), mkexpr(addr)) );
2053
2054 if (addSubCarry && op8 == Iop_Add8) {
2055 helper_ADC( size, dst1, dst0, src,
2056 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
2057 putIReg(size, gregOfRM(rm), mkexpr(dst1));
2058 } else
2059 if (addSubCarry && op8 == Iop_Sub8) {
2060 helper_SBB( size, dst1, dst0, src,
2061 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
2062 putIReg(size, gregOfRM(rm), mkexpr(dst1));
2063 } else {
2064 assign( dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
2065 if (isAddSub(op8))
2066 setFlags_DEP1_DEP2(op8, dst0, src, ty);
2067 else
2068 setFlags_DEP1(op8, dst1, ty);
2069 if (keep)
2070 putIReg(size, gregOfRM(rm), mkexpr(dst1));
2071 }
2072
2073 DIP("%s%c %s,%s\n", t_x86opc, nameISize(size),
2074 dis_buf,nameIReg(size,gregOfRM(rm)));
2075 return len+delta0;
2076 }
2077 }
2078
2079
2080
2081 /* Handle binary integer instructions of the form
2082 op G, E meaning
2083 op reg, reg-or-mem
2084 Is passed the a ptr to the modRM byte, the actual operation, and the
2085 data size. Returns the address advanced completely over this
2086 instruction.
2087
2088 G(src) is reg.
2089 E(dst) is reg-or-mem
2090
2091 If E is reg, --> GET %E, tmp
2092 OP %G, tmp
2093 PUT tmp, %E
2094
2095 If E is mem, --> (getAddr E) -> tmpa
2096 LD (tmpa), tmpv
2097 OP %G, tmpv
2098 ST tmpv, (tmpa)
2099 */
2100 static
dis_op2_G_E(UChar sorb,Bool locked,Bool addSubCarry,IROp op8,Bool keep,Int size,Int delta0,const HChar * t_x86opc)2101 UInt dis_op2_G_E ( UChar sorb,
2102 Bool locked,
2103 Bool addSubCarry,
2104 IROp op8,
2105 Bool keep,
2106 Int size,
2107 Int delta0,
2108 const HChar* t_x86opc )
2109 {
2110 HChar dis_buf[50];
2111 Int len;
2112 IRType ty = szToITy(size);
2113 IRTemp dst1 = newTemp(ty);
2114 IRTemp src = newTemp(ty);
2115 IRTemp dst0 = newTemp(ty);
2116 UChar rm = getIByte(delta0);
2117 IRTemp addr = IRTemp_INVALID;
2118
2119 /* addSubCarry == True indicates the intended operation is
2120 add-with-carry or subtract-with-borrow. */
2121 if (addSubCarry) {
2122 vassert(op8 == Iop_Add8 || op8 == Iop_Sub8);
2123 vassert(keep);
2124 }
2125
2126 if (epartIsReg(rm)) {
2127 /* Specially handle XOR reg,reg, because that doesn't really
2128 depend on reg, and doing the obvious thing potentially
2129 generates a spurious value check failure due to the bogus
2130 dependency. Ditto SBB reg,reg.*/
2131 if ((op8 == Iop_Xor8 || (op8 == Iop_Sub8 && addSubCarry))
2132 && gregOfRM(rm) == eregOfRM(rm)) {
2133 putIReg(size, eregOfRM(rm), mkU(ty,0));
2134 }
2135 assign(dst0, getIReg(size,eregOfRM(rm)));
2136 assign(src, getIReg(size,gregOfRM(rm)));
2137
2138 if (addSubCarry && op8 == Iop_Add8) {
2139 helper_ADC( size, dst1, dst0, src,
2140 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
2141 putIReg(size, eregOfRM(rm), mkexpr(dst1));
2142 } else
2143 if (addSubCarry && op8 == Iop_Sub8) {
2144 helper_SBB( size, dst1, dst0, src,
2145 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
2146 putIReg(size, eregOfRM(rm), mkexpr(dst1));
2147 } else {
2148 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
2149 if (isAddSub(op8))
2150 setFlags_DEP1_DEP2(op8, dst0, src, ty);
2151 else
2152 setFlags_DEP1(op8, dst1, ty);
2153 if (keep)
2154 putIReg(size, eregOfRM(rm), mkexpr(dst1));
2155 }
2156
2157 DIP("%s%c %s,%s\n", t_x86opc, nameISize(size),
2158 nameIReg(size,gregOfRM(rm)),
2159 nameIReg(size,eregOfRM(rm)));
2160 return 1+delta0;
2161 }
2162
2163 /* E refers to memory */
2164 {
2165 addr = disAMode ( &len, sorb, delta0, dis_buf);
2166 assign(dst0, loadLE(ty,mkexpr(addr)));
2167 assign(src, getIReg(size,gregOfRM(rm)));
2168
2169 if (addSubCarry && op8 == Iop_Add8) {
2170 if (locked) {
2171 /* cas-style store */
2172 helper_ADC( size, dst1, dst0, src,
2173 /*store*/addr, dst0/*expVal*/, guest_EIP_curr_instr );
2174 } else {
2175 /* normal store */
2176 helper_ADC( size, dst1, dst0, src,
2177 /*store*/addr, IRTemp_INVALID, 0 );
2178 }
2179 } else
2180 if (addSubCarry && op8 == Iop_Sub8) {
2181 if (locked) {
2182 /* cas-style store */
2183 helper_SBB( size, dst1, dst0, src,
2184 /*store*/addr, dst0/*expVal*/, guest_EIP_curr_instr );
2185 } else {
2186 /* normal store */
2187 helper_SBB( size, dst1, dst0, src,
2188 /*store*/addr, IRTemp_INVALID, 0 );
2189 }
2190 } else {
2191 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
2192 if (keep) {
2193 if (locked) {
2194 if (0) vex_printf("locked case\n" );
2195 casLE( mkexpr(addr),
2196 mkexpr(dst0)/*expval*/,
2197 mkexpr(dst1)/*newval*/, guest_EIP_curr_instr );
2198 } else {
2199 if (0) vex_printf("nonlocked case\n");
2200 storeLE(mkexpr(addr), mkexpr(dst1));
2201 }
2202 }
2203 if (isAddSub(op8))
2204 setFlags_DEP1_DEP2(op8, dst0, src, ty);
2205 else
2206 setFlags_DEP1(op8, dst1, ty);
2207 }
2208
2209 DIP("%s%c %s,%s\n", t_x86opc, nameISize(size),
2210 nameIReg(size,gregOfRM(rm)), dis_buf);
2211 return len+delta0;
2212 }
2213 }
2214
2215
2216 /* Handle move instructions of the form
2217 mov E, G meaning
2218 mov reg-or-mem, reg
2219 Is passed the a ptr to the modRM byte, and the data size. Returns
2220 the address advanced completely over this instruction.
2221
2222 E(src) is reg-or-mem
2223 G(dst) is reg.
2224
2225 If E is reg, --> GET %E, tmpv
2226 PUT tmpv, %G
2227
2228 If E is mem --> (getAddr E) -> tmpa
2229 LD (tmpa), tmpb
2230 PUT tmpb, %G
2231 */
2232 static
dis_mov_E_G(UChar sorb,Int size,Int delta0)2233 UInt dis_mov_E_G ( UChar sorb,
2234 Int size,
2235 Int delta0 )
2236 {
2237 Int len;
2238 UChar rm = getIByte(delta0);
2239 HChar dis_buf[50];
2240
2241 if (epartIsReg(rm)) {
2242 putIReg(size, gregOfRM(rm), getIReg(size, eregOfRM(rm)));
2243 DIP("mov%c %s,%s\n", nameISize(size),
2244 nameIReg(size,eregOfRM(rm)),
2245 nameIReg(size,gregOfRM(rm)));
2246 return 1+delta0;
2247 }
2248
2249 /* E refers to memory */
2250 {
2251 IRTemp addr = disAMode ( &len, sorb, delta0, dis_buf );
2252 putIReg(size, gregOfRM(rm), loadLE(szToITy(size), mkexpr(addr)));
2253 DIP("mov%c %s,%s\n", nameISize(size),
2254 dis_buf,nameIReg(size,gregOfRM(rm)));
2255 return delta0+len;
2256 }
2257 }
2258
2259
2260 /* Handle move instructions of the form
2261 mov G, E meaning
2262 mov reg, reg-or-mem
2263 Is passed the a ptr to the modRM byte, and the data size. Returns
2264 the address advanced completely over this instruction.
2265
2266 G(src) is reg.
2267 E(dst) is reg-or-mem
2268
2269 If E is reg, --> GET %G, tmp
2270 PUT tmp, %E
2271
2272 If E is mem, --> (getAddr E) -> tmpa
2273 GET %G, tmpv
2274 ST tmpv, (tmpa)
2275 */
2276 static
dis_mov_G_E(UChar sorb,Int size,Int delta0)2277 UInt dis_mov_G_E ( UChar sorb,
2278 Int size,
2279 Int delta0 )
2280 {
2281 Int len;
2282 UChar rm = getIByte(delta0);
2283 HChar dis_buf[50];
2284
2285 if (epartIsReg(rm)) {
2286 putIReg(size, eregOfRM(rm), getIReg(size, gregOfRM(rm)));
2287 DIP("mov%c %s,%s\n", nameISize(size),
2288 nameIReg(size,gregOfRM(rm)),
2289 nameIReg(size,eregOfRM(rm)));
2290 return 1+delta0;
2291 }
2292
2293 /* E refers to memory */
2294 {
2295 IRTemp addr = disAMode ( &len, sorb, delta0, dis_buf);
2296 storeLE( mkexpr(addr), getIReg(size, gregOfRM(rm)) );
2297 DIP("mov%c %s,%s\n", nameISize(size),
2298 nameIReg(size,gregOfRM(rm)), dis_buf);
2299 return len+delta0;
2300 }
2301 }
2302
2303
2304 /* op $immediate, AL/AX/EAX. */
2305 static
dis_op_imm_A(Int size,Bool carrying,IROp op8,Bool keep,Int delta,const HChar * t_x86opc)2306 UInt dis_op_imm_A ( Int size,
2307 Bool carrying,
2308 IROp op8,
2309 Bool keep,
2310 Int delta,
2311 const HChar* t_x86opc )
2312 {
2313 IRType ty = szToITy(size);
2314 IRTemp dst0 = newTemp(ty);
2315 IRTemp src = newTemp(ty);
2316 IRTemp dst1 = newTemp(ty);
2317 UInt lit = getUDisp(size,delta);
2318 assign(dst0, getIReg(size,R_EAX));
2319 assign(src, mkU(ty,lit));
2320
2321 if (isAddSub(op8) && !carrying) {
2322 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
2323 setFlags_DEP1_DEP2(op8, dst0, src, ty);
2324 }
2325 else
2326 if (isLogic(op8)) {
2327 vassert(!carrying);
2328 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
2329 setFlags_DEP1(op8, dst1, ty);
2330 }
2331 else
2332 if (op8 == Iop_Add8 && carrying) {
2333 helper_ADC( size, dst1, dst0, src,
2334 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
2335 }
2336 else
2337 if (op8 == Iop_Sub8 && carrying) {
2338 helper_SBB( size, dst1, dst0, src,
2339 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
2340 }
2341 else
2342 vpanic("dis_op_imm_A(x86,guest)");
2343
2344 if (keep)
2345 putIReg(size, R_EAX, mkexpr(dst1));
2346
2347 DIP("%s%c $0x%x, %s\n", t_x86opc, nameISize(size),
2348 lit, nameIReg(size,R_EAX));
2349 return delta+size;
2350 }
2351
2352
2353 /* Sign- and Zero-extending moves. */
2354 static
dis_movx_E_G(UChar sorb,Int delta,Int szs,Int szd,Bool sign_extend)2355 UInt dis_movx_E_G ( UChar sorb,
2356 Int delta, Int szs, Int szd, Bool sign_extend )
2357 {
2358 UChar rm = getIByte(delta);
2359 if (epartIsReg(rm)) {
2360 if (szd == szs) {
2361 // mutant case. See #250799
2362 putIReg(szd, gregOfRM(rm),
2363 getIReg(szs,eregOfRM(rm)));
2364 } else {
2365 // normal case
2366 putIReg(szd, gregOfRM(rm),
2367 unop(mkWidenOp(szs,szd,sign_extend),
2368 getIReg(szs,eregOfRM(rm))));
2369 }
2370 DIP("mov%c%c%c %s,%s\n", sign_extend ? 's' : 'z',
2371 nameISize(szs), nameISize(szd),
2372 nameIReg(szs,eregOfRM(rm)),
2373 nameIReg(szd,gregOfRM(rm)));
2374 return 1+delta;
2375 }
2376
2377 /* E refers to memory */
2378 {
2379 Int len;
2380 HChar dis_buf[50];
2381 IRTemp addr = disAMode ( &len, sorb, delta, dis_buf );
2382 if (szd == szs) {
2383 // mutant case. See #250799
2384 putIReg(szd, gregOfRM(rm),
2385 loadLE(szToITy(szs),mkexpr(addr)));
2386 } else {
2387 // normal case
2388 putIReg(szd, gregOfRM(rm),
2389 unop(mkWidenOp(szs,szd,sign_extend),
2390 loadLE(szToITy(szs),mkexpr(addr))));
2391 }
2392 DIP("mov%c%c%c %s,%s\n", sign_extend ? 's' : 'z',
2393 nameISize(szs), nameISize(szd),
2394 dis_buf, nameIReg(szd,gregOfRM(rm)));
2395 return len+delta;
2396 }
2397 }
2398
2399
2400 /* Generate code to divide ArchRegs EDX:EAX / DX:AX / AX by the 32 /
2401 16 / 8 bit quantity in the given IRTemp. */
2402 static
codegen_div(Int sz,IRTemp t,Bool signed_divide)2403 void codegen_div ( Int sz, IRTemp t, Bool signed_divide )
2404 {
2405 IROp op = signed_divide ? Iop_DivModS64to32 : Iop_DivModU64to32;
2406 IRTemp src64 = newTemp(Ity_I64);
2407 IRTemp dst64 = newTemp(Ity_I64);
2408 switch (sz) {
2409 case 4:
2410 assign( src64, binop(Iop_32HLto64,
2411 getIReg(4,R_EDX), getIReg(4,R_EAX)) );
2412 assign( dst64, binop(op, mkexpr(src64), mkexpr(t)) );
2413 putIReg( 4, R_EAX, unop(Iop_64to32,mkexpr(dst64)) );
2414 putIReg( 4, R_EDX, unop(Iop_64HIto32,mkexpr(dst64)) );
2415 break;
2416 case 2: {
2417 IROp widen3264 = signed_divide ? Iop_32Sto64 : Iop_32Uto64;
2418 IROp widen1632 = signed_divide ? Iop_16Sto32 : Iop_16Uto32;
2419 assign( src64, unop(widen3264,
2420 binop(Iop_16HLto32,
2421 getIReg(2,R_EDX), getIReg(2,R_EAX))) );
2422 assign( dst64, binop(op, mkexpr(src64), unop(widen1632,mkexpr(t))) );
2423 putIReg( 2, R_EAX, unop(Iop_32to16,unop(Iop_64to32,mkexpr(dst64))) );
2424 putIReg( 2, R_EDX, unop(Iop_32to16,unop(Iop_64HIto32,mkexpr(dst64))) );
2425 break;
2426 }
2427 case 1: {
2428 IROp widen3264 = signed_divide ? Iop_32Sto64 : Iop_32Uto64;
2429 IROp widen1632 = signed_divide ? Iop_16Sto32 : Iop_16Uto32;
2430 IROp widen816 = signed_divide ? Iop_8Sto16 : Iop_8Uto16;
2431 assign( src64, unop(widen3264, unop(widen1632, getIReg(2,R_EAX))) );
2432 assign( dst64,
2433 binop(op, mkexpr(src64),
2434 unop(widen1632, unop(widen816, mkexpr(t)))) );
2435 putIReg( 1, R_AL, unop(Iop_16to8, unop(Iop_32to16,
2436 unop(Iop_64to32,mkexpr(dst64)))) );
2437 putIReg( 1, R_AH, unop(Iop_16to8, unop(Iop_32to16,
2438 unop(Iop_64HIto32,mkexpr(dst64)))) );
2439 break;
2440 }
2441 default: vpanic("codegen_div(x86)");
2442 }
2443 }
2444
2445
2446 static
dis_Grp1(UChar sorb,Bool locked,Int delta,UChar modrm,Int am_sz,Int d_sz,Int sz,UInt d32)2447 UInt dis_Grp1 ( UChar sorb, Bool locked,
2448 Int delta, UChar modrm,
2449 Int am_sz, Int d_sz, Int sz, UInt d32 )
2450 {
2451 Int len;
2452 HChar dis_buf[50];
2453 IRType ty = szToITy(sz);
2454 IRTemp dst1 = newTemp(ty);
2455 IRTemp src = newTemp(ty);
2456 IRTemp dst0 = newTemp(ty);
2457 IRTemp addr = IRTemp_INVALID;
2458 IROp op8 = Iop_INVALID;
2459 UInt mask = sz==1 ? 0xFF : (sz==2 ? 0xFFFF : 0xFFFFFFFF);
2460
2461 switch (gregOfRM(modrm)) {
2462 case 0: op8 = Iop_Add8; break; case 1: op8 = Iop_Or8; break;
2463 case 2: break; // ADC
2464 case 3: break; // SBB
2465 case 4: op8 = Iop_And8; break; case 5: op8 = Iop_Sub8; break;
2466 case 6: op8 = Iop_Xor8; break; case 7: op8 = Iop_Sub8; break;
2467 /*NOTREACHED*/
2468 default: vpanic("dis_Grp1: unhandled case");
2469 }
2470
2471 if (epartIsReg(modrm)) {
2472 vassert(am_sz == 1);
2473
2474 assign(dst0, getIReg(sz,eregOfRM(modrm)));
2475 assign(src, mkU(ty,d32 & mask));
2476
2477 if (gregOfRM(modrm) == 2 /* ADC */) {
2478 helper_ADC( sz, dst1, dst0, src,
2479 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
2480 } else
2481 if (gregOfRM(modrm) == 3 /* SBB */) {
2482 helper_SBB( sz, dst1, dst0, src,
2483 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
2484 } else {
2485 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
2486 if (isAddSub(op8))
2487 setFlags_DEP1_DEP2(op8, dst0, src, ty);
2488 else
2489 setFlags_DEP1(op8, dst1, ty);
2490 }
2491
2492 if (gregOfRM(modrm) < 7)
2493 putIReg(sz, eregOfRM(modrm), mkexpr(dst1));
2494
2495 delta += (am_sz + d_sz);
2496 DIP("%s%c $0x%x, %s\n", nameGrp1(gregOfRM(modrm)), nameISize(sz), d32,
2497 nameIReg(sz,eregOfRM(modrm)));
2498 } else {
2499 addr = disAMode ( &len, sorb, delta, dis_buf);
2500
2501 assign(dst0, loadLE(ty,mkexpr(addr)));
2502 assign(src, mkU(ty,d32 & mask));
2503
2504 if (gregOfRM(modrm) == 2 /* ADC */) {
2505 if (locked) {
2506 /* cas-style store */
2507 helper_ADC( sz, dst1, dst0, src,
2508 /*store*/addr, dst0/*expVal*/, guest_EIP_curr_instr );
2509 } else {
2510 /* normal store */
2511 helper_ADC( sz, dst1, dst0, src,
2512 /*store*/addr, IRTemp_INVALID, 0 );
2513 }
2514 } else
2515 if (gregOfRM(modrm) == 3 /* SBB */) {
2516 if (locked) {
2517 /* cas-style store */
2518 helper_SBB( sz, dst1, dst0, src,
2519 /*store*/addr, dst0/*expVal*/, guest_EIP_curr_instr );
2520 } else {
2521 /* normal store */
2522 helper_SBB( sz, dst1, dst0, src,
2523 /*store*/addr, IRTemp_INVALID, 0 );
2524 }
2525 } else {
2526 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
2527 if (gregOfRM(modrm) < 7) {
2528 if (locked) {
2529 casLE( mkexpr(addr), mkexpr(dst0)/*expVal*/,
2530 mkexpr(dst1)/*newVal*/,
2531 guest_EIP_curr_instr );
2532 } else {
2533 storeLE(mkexpr(addr), mkexpr(dst1));
2534 }
2535 }
2536 if (isAddSub(op8))
2537 setFlags_DEP1_DEP2(op8, dst0, src, ty);
2538 else
2539 setFlags_DEP1(op8, dst1, ty);
2540 }
2541
2542 delta += (len+d_sz);
2543 DIP("%s%c $0x%x, %s\n", nameGrp1(gregOfRM(modrm)), nameISize(sz),
2544 d32, dis_buf);
2545 }
2546 return delta;
2547 }
2548
2549
2550 /* Group 2 extended opcodes. shift_expr must be an 8-bit typed
2551 expression. */
2552
2553 static
dis_Grp2(UChar sorb,Int delta,UChar modrm,Int am_sz,Int d_sz,Int sz,IRExpr * shift_expr,const HChar * shift_expr_txt,Bool * decode_OK)2554 UInt dis_Grp2 ( UChar sorb,
2555 Int delta, UChar modrm,
2556 Int am_sz, Int d_sz, Int sz, IRExpr* shift_expr,
2557 const HChar* shift_expr_txt, Bool* decode_OK )
2558 {
2559 /* delta on entry points at the modrm byte. */
2560 HChar dis_buf[50];
2561 Int len;
2562 Bool isShift, isRotate, isRotateC;
2563 IRType ty = szToITy(sz);
2564 IRTemp dst0 = newTemp(ty);
2565 IRTemp dst1 = newTemp(ty);
2566 IRTemp addr = IRTemp_INVALID;
2567
2568 *decode_OK = True;
2569
2570 vassert(sz == 1 || sz == 2 || sz == 4);
2571
2572 /* Put value to shift/rotate in dst0. */
2573 if (epartIsReg(modrm)) {
2574 assign(dst0, getIReg(sz, eregOfRM(modrm)));
2575 delta += (am_sz + d_sz);
2576 } else {
2577 addr = disAMode ( &len, sorb, delta, dis_buf);
2578 assign(dst0, loadLE(ty,mkexpr(addr)));
2579 delta += len + d_sz;
2580 }
2581
2582 isShift = False;
2583 switch (gregOfRM(modrm)) { case 4: case 5: case 6: case 7: isShift = True; }
2584
2585 isRotate = False;
2586 switch (gregOfRM(modrm)) { case 0: case 1: isRotate = True; }
2587
2588 isRotateC = False;
2589 switch (gregOfRM(modrm)) { case 2: case 3: isRotateC = True; }
2590
2591 if (!isShift && !isRotate && !isRotateC) {
2592 /*NOTREACHED*/
2593 vpanic("dis_Grp2(Reg): unhandled case(x86)");
2594 }
2595
2596 if (isRotateC) {
2597 /* call a helper; these insns are so ridiculous they do not
2598 deserve better */
2599 Bool left = toBool(gregOfRM(modrm) == 2);
2600 IRTemp r64 = newTemp(Ity_I64);
2601 IRExpr** args
2602 = mkIRExprVec_4( widenUto32(mkexpr(dst0)), /* thing to rotate */
2603 widenUto32(shift_expr), /* rotate amount */
2604 widenUto32(mk_x86g_calculate_eflags_all()),
2605 mkU32(sz) );
2606 assign( r64, mkIRExprCCall(
2607 Ity_I64,
2608 0/*regparm*/,
2609 left ? "x86g_calculate_RCL" : "x86g_calculate_RCR",
2610 left ? &x86g_calculate_RCL : &x86g_calculate_RCR,
2611 args
2612 )
2613 );
2614 /* new eflags in hi half r64; new value in lo half r64 */
2615 assign( dst1, narrowTo(ty, unop(Iop_64to32, mkexpr(r64))) );
2616 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
2617 stmt( IRStmt_Put( OFFB_CC_DEP1, unop(Iop_64HIto32, mkexpr(r64)) ));
2618 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
2619 /* Set NDEP even though it isn't used. This makes redundant-PUT
2620 elimination of previous stores to this field work better. */
2621 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
2622 }
2623
2624 if (isShift) {
2625
2626 IRTemp pre32 = newTemp(Ity_I32);
2627 IRTemp res32 = newTemp(Ity_I32);
2628 IRTemp res32ss = newTemp(Ity_I32);
2629 IRTemp shift_amt = newTemp(Ity_I8);
2630 IROp op32;
2631
2632 switch (gregOfRM(modrm)) {
2633 case 4: op32 = Iop_Shl32; break;
2634 case 5: op32 = Iop_Shr32; break;
2635 case 6: op32 = Iop_Shl32; break;
2636 case 7: op32 = Iop_Sar32; break;
2637 /*NOTREACHED*/
2638 default: vpanic("dis_Grp2:shift"); break;
2639 }
2640
2641 /* Widen the value to be shifted to 32 bits, do the shift, and
2642 narrow back down. This seems surprisingly long-winded, but
2643 unfortunately the Intel semantics requires that 8/16-bit
2644 shifts give defined results for shift values all the way up
2645 to 31, and this seems the simplest way to do it. It has the
2646 advantage that the only IR level shifts generated are of 32
2647 bit values, and the shift amount is guaranteed to be in the
2648 range 0 .. 31, thereby observing the IR semantics requiring
2649 all shift values to be in the range 0 .. 2^word_size-1. */
2650
2651 /* shift_amt = shift_expr & 31, regardless of operation size */
2652 assign( shift_amt, binop(Iop_And8, shift_expr, mkU8(31)) );
2653
2654 /* suitably widen the value to be shifted to 32 bits. */
2655 assign( pre32, op32==Iop_Sar32 ? widenSto32(mkexpr(dst0))
2656 : widenUto32(mkexpr(dst0)) );
2657
2658 /* res32 = pre32 `shift` shift_amt */
2659 assign( res32, binop(op32, mkexpr(pre32), mkexpr(shift_amt)) );
2660
2661 /* res32ss = pre32 `shift` ((shift_amt - 1) & 31) */
2662 assign( res32ss,
2663 binop(op32,
2664 mkexpr(pre32),
2665 binop(Iop_And8,
2666 binop(Iop_Sub8,
2667 mkexpr(shift_amt), mkU8(1)),
2668 mkU8(31))) );
2669
2670 /* Build the flags thunk. */
2671 setFlags_DEP1_DEP2_shift(op32, res32, res32ss, ty, shift_amt);
2672
2673 /* Narrow the result back down. */
2674 assign( dst1, narrowTo(ty, mkexpr(res32)) );
2675
2676 } /* if (isShift) */
2677
2678 else
2679 if (isRotate) {
2680 Int ccOp = ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1 : 2);
2681 Bool left = toBool(gregOfRM(modrm) == 0);
2682 IRTemp rot_amt = newTemp(Ity_I8);
2683 IRTemp rot_amt32 = newTemp(Ity_I8);
2684 IRTemp oldFlags = newTemp(Ity_I32);
2685
2686 /* rot_amt = shift_expr & mask */
2687 /* By masking the rotate amount thusly, the IR-level Shl/Shr
2688 expressions never shift beyond the word size and thus remain
2689 well defined. */
2690 assign(rot_amt32, binop(Iop_And8, shift_expr, mkU8(31)));
2691
2692 if (ty == Ity_I32)
2693 assign(rot_amt, mkexpr(rot_amt32));
2694 else
2695 assign(rot_amt, binop(Iop_And8, mkexpr(rot_amt32), mkU8(8*sz-1)));
2696
2697 if (left) {
2698
2699 /* dst1 = (dst0 << rot_amt) | (dst0 >>u (wordsize-rot_amt)) */
2700 assign(dst1,
2701 binop( mkSizedOp(ty,Iop_Or8),
2702 binop( mkSizedOp(ty,Iop_Shl8),
2703 mkexpr(dst0),
2704 mkexpr(rot_amt)
2705 ),
2706 binop( mkSizedOp(ty,Iop_Shr8),
2707 mkexpr(dst0),
2708 binop(Iop_Sub8,mkU8(8*sz), mkexpr(rot_amt))
2709 )
2710 )
2711 );
2712 ccOp += X86G_CC_OP_ROLB;
2713
2714 } else { /* right */
2715
2716 /* dst1 = (dst0 >>u rot_amt) | (dst0 << (wordsize-rot_amt)) */
2717 assign(dst1,
2718 binop( mkSizedOp(ty,Iop_Or8),
2719 binop( mkSizedOp(ty,Iop_Shr8),
2720 mkexpr(dst0),
2721 mkexpr(rot_amt)
2722 ),
2723 binop( mkSizedOp(ty,Iop_Shl8),
2724 mkexpr(dst0),
2725 binop(Iop_Sub8,mkU8(8*sz), mkexpr(rot_amt))
2726 )
2727 )
2728 );
2729 ccOp += X86G_CC_OP_RORB;
2730
2731 }
2732
2733 /* dst1 now holds the rotated value. Build flag thunk. We
2734 need the resulting value for this, and the previous flags.
2735 Except don't set it if the rotate count is zero. */
2736
2737 assign(oldFlags, mk_x86g_calculate_eflags_all());
2738
2739 /* rot_amt32 :: Ity_I8. We need to convert it to I1. */
2740 IRTemp rot_amt32b = newTemp(Ity_I1);
2741 assign(rot_amt32b, binop(Iop_CmpNE8, mkexpr(rot_amt32), mkU8(0)) );
2742
2743 /* CC_DEP1 is the rotated value. CC_NDEP is flags before. */
2744 stmt( IRStmt_Put( OFFB_CC_OP,
2745 IRExpr_ITE( mkexpr(rot_amt32b),
2746 mkU32(ccOp),
2747 IRExpr_Get(OFFB_CC_OP,Ity_I32) ) ));
2748 stmt( IRStmt_Put( OFFB_CC_DEP1,
2749 IRExpr_ITE( mkexpr(rot_amt32b),
2750 widenUto32(mkexpr(dst1)),
2751 IRExpr_Get(OFFB_CC_DEP1,Ity_I32) ) ));
2752 stmt( IRStmt_Put( OFFB_CC_DEP2,
2753 IRExpr_ITE( mkexpr(rot_amt32b),
2754 mkU32(0),
2755 IRExpr_Get(OFFB_CC_DEP2,Ity_I32) ) ));
2756 stmt( IRStmt_Put( OFFB_CC_NDEP,
2757 IRExpr_ITE( mkexpr(rot_amt32b),
2758 mkexpr(oldFlags),
2759 IRExpr_Get(OFFB_CC_NDEP,Ity_I32) ) ));
2760 } /* if (isRotate) */
2761
2762 /* Save result, and finish up. */
2763 if (epartIsReg(modrm)) {
2764 putIReg(sz, eregOfRM(modrm), mkexpr(dst1));
2765 if (vex_traceflags & VEX_TRACE_FE) {
2766 vex_printf("%s%c ",
2767 nameGrp2(gregOfRM(modrm)), nameISize(sz) );
2768 if (shift_expr_txt)
2769 vex_printf("%s", shift_expr_txt);
2770 else
2771 ppIRExpr(shift_expr);
2772 vex_printf(", %s\n", nameIReg(sz,eregOfRM(modrm)));
2773 }
2774 } else {
2775 storeLE(mkexpr(addr), mkexpr(dst1));
2776 if (vex_traceflags & VEX_TRACE_FE) {
2777 vex_printf("%s%c ",
2778 nameGrp2(gregOfRM(modrm)), nameISize(sz) );
2779 if (shift_expr_txt)
2780 vex_printf("%s", shift_expr_txt);
2781 else
2782 ppIRExpr(shift_expr);
2783 vex_printf(", %s\n", dis_buf);
2784 }
2785 }
2786 return delta;
2787 }
2788
2789
2790 /* Group 8 extended opcodes (but BT/BTS/BTC/BTR only). */
2791 static
dis_Grp8_Imm(UChar sorb,Bool locked,Int delta,UChar modrm,Int am_sz,Int sz,UInt src_val,Bool * decode_OK)2792 UInt dis_Grp8_Imm ( UChar sorb,
2793 Bool locked,
2794 Int delta, UChar modrm,
2795 Int am_sz, Int sz, UInt src_val,
2796 Bool* decode_OK )
2797 {
2798 /* src_val denotes a d8.
2799 And delta on entry points at the modrm byte. */
2800
2801 IRType ty = szToITy(sz);
2802 IRTemp t2 = newTemp(Ity_I32);
2803 IRTemp t2m = newTemp(Ity_I32);
2804 IRTemp t_addr = IRTemp_INVALID;
2805 HChar dis_buf[50];
2806 UInt mask;
2807
2808 /* we're optimists :-) */
2809 *decode_OK = True;
2810
2811 /* Limit src_val -- the bit offset -- to something within a word.
2812 The Intel docs say that literal offsets larger than a word are
2813 masked in this way. */
2814 switch (sz) {
2815 case 2: src_val &= 15; break;
2816 case 4: src_val &= 31; break;
2817 default: *decode_OK = False; return delta;
2818 }
2819
2820 /* Invent a mask suitable for the operation. */
2821 switch (gregOfRM(modrm)) {
2822 case 4: /* BT */ mask = 0; break;
2823 case 5: /* BTS */ mask = 1 << src_val; break;
2824 case 6: /* BTR */ mask = ~(1 << src_val); break;
2825 case 7: /* BTC */ mask = 1 << src_val; break;
2826 /* If this needs to be extended, probably simplest to make a
2827 new function to handle the other cases (0 .. 3). The
2828 Intel docs do however not indicate any use for 0 .. 3, so
2829 we don't expect this to happen. */
2830 default: *decode_OK = False; return delta;
2831 }
2832
2833 /* Fetch the value to be tested and modified into t2, which is
2834 32-bits wide regardless of sz. */
2835 if (epartIsReg(modrm)) {
2836 vassert(am_sz == 1);
2837 assign( t2, widenUto32(getIReg(sz, eregOfRM(modrm))) );
2838 delta += (am_sz + 1);
2839 DIP("%s%c $0x%x, %s\n", nameGrp8(gregOfRM(modrm)), nameISize(sz),
2840 src_val, nameIReg(sz,eregOfRM(modrm)));
2841 } else {
2842 Int len;
2843 t_addr = disAMode ( &len, sorb, delta, dis_buf);
2844 delta += (len+1);
2845 assign( t2, widenUto32(loadLE(ty, mkexpr(t_addr))) );
2846 DIP("%s%c $0x%x, %s\n", nameGrp8(gregOfRM(modrm)), nameISize(sz),
2847 src_val, dis_buf);
2848 }
2849
2850 /* Compute the new value into t2m, if non-BT. */
2851 switch (gregOfRM(modrm)) {
2852 case 4: /* BT */
2853 break;
2854 case 5: /* BTS */
2855 assign( t2m, binop(Iop_Or32, mkU32(mask), mkexpr(t2)) );
2856 break;
2857 case 6: /* BTR */
2858 assign( t2m, binop(Iop_And32, mkU32(mask), mkexpr(t2)) );
2859 break;
2860 case 7: /* BTC */
2861 assign( t2m, binop(Iop_Xor32, mkU32(mask), mkexpr(t2)) );
2862 break;
2863 default:
2864 /*NOTREACHED*/ /*the previous switch guards this*/
2865 vassert(0);
2866 }
2867
2868 /* Write the result back, if non-BT. If the CAS fails then we
2869 side-exit from the trace at this point, and so the flag state is
2870 not affected. This is of course as required. */
2871 if (gregOfRM(modrm) != 4 /* BT */) {
2872 if (epartIsReg(modrm)) {
2873 putIReg(sz, eregOfRM(modrm), narrowTo(ty, mkexpr(t2m)));
2874 } else {
2875 if (locked) {
2876 casLE( mkexpr(t_addr),
2877 narrowTo(ty, mkexpr(t2))/*expd*/,
2878 narrowTo(ty, mkexpr(t2m))/*new*/,
2879 guest_EIP_curr_instr );
2880 } else {
2881 storeLE(mkexpr(t_addr), narrowTo(ty, mkexpr(t2m)));
2882 }
2883 }
2884 }
2885
2886 /* Copy relevant bit from t2 into the carry flag. */
2887 /* Flags: C=selected bit, O,S,Z,A,P undefined, so are set to zero. */
2888 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
2889 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
2890 stmt( IRStmt_Put(
2891 OFFB_CC_DEP1,
2892 binop(Iop_And32,
2893 binop(Iop_Shr32, mkexpr(t2), mkU8(src_val)),
2894 mkU32(1))
2895 ));
2896 /* Set NDEP even though it isn't used. This makes redundant-PUT
2897 elimination of previous stores to this field work better. */
2898 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
2899
2900 return delta;
2901 }
2902
2903
2904 /* Signed/unsigned widening multiply. Generate IR to multiply the
2905 value in EAX/AX/AL by the given IRTemp, and park the result in
2906 EDX:EAX/DX:AX/AX.
2907 */
codegen_mulL_A_D(Int sz,Bool syned,IRTemp tmp,const HChar * tmp_txt)2908 static void codegen_mulL_A_D ( Int sz, Bool syned,
2909 IRTemp tmp, const HChar* tmp_txt )
2910 {
2911 IRType ty = szToITy(sz);
2912 IRTemp t1 = newTemp(ty);
2913
2914 assign( t1, getIReg(sz, R_EAX) );
2915
2916 switch (ty) {
2917 case Ity_I32: {
2918 IRTemp res64 = newTemp(Ity_I64);
2919 IRTemp resHi = newTemp(Ity_I32);
2920 IRTemp resLo = newTemp(Ity_I32);
2921 IROp mulOp = syned ? Iop_MullS32 : Iop_MullU32;
2922 UInt tBaseOp = syned ? X86G_CC_OP_SMULB : X86G_CC_OP_UMULB;
2923 setFlags_MUL ( Ity_I32, t1, tmp, tBaseOp );
2924 assign( res64, binop(mulOp, mkexpr(t1), mkexpr(tmp)) );
2925 assign( resHi, unop(Iop_64HIto32,mkexpr(res64)));
2926 assign( resLo, unop(Iop_64to32,mkexpr(res64)));
2927 putIReg(4, R_EDX, mkexpr(resHi));
2928 putIReg(4, R_EAX, mkexpr(resLo));
2929 break;
2930 }
2931 case Ity_I16: {
2932 IRTemp res32 = newTemp(Ity_I32);
2933 IRTemp resHi = newTemp(Ity_I16);
2934 IRTemp resLo = newTemp(Ity_I16);
2935 IROp mulOp = syned ? Iop_MullS16 : Iop_MullU16;
2936 UInt tBaseOp = syned ? X86G_CC_OP_SMULB : X86G_CC_OP_UMULB;
2937 setFlags_MUL ( Ity_I16, t1, tmp, tBaseOp );
2938 assign( res32, binop(mulOp, mkexpr(t1), mkexpr(tmp)) );
2939 assign( resHi, unop(Iop_32HIto16,mkexpr(res32)));
2940 assign( resLo, unop(Iop_32to16,mkexpr(res32)));
2941 putIReg(2, R_EDX, mkexpr(resHi));
2942 putIReg(2, R_EAX, mkexpr(resLo));
2943 break;
2944 }
2945 case Ity_I8: {
2946 IRTemp res16 = newTemp(Ity_I16);
2947 IRTemp resHi = newTemp(Ity_I8);
2948 IRTemp resLo = newTemp(Ity_I8);
2949 IROp mulOp = syned ? Iop_MullS8 : Iop_MullU8;
2950 UInt tBaseOp = syned ? X86G_CC_OP_SMULB : X86G_CC_OP_UMULB;
2951 setFlags_MUL ( Ity_I8, t1, tmp, tBaseOp );
2952 assign( res16, binop(mulOp, mkexpr(t1), mkexpr(tmp)) );
2953 assign( resHi, unop(Iop_16HIto8,mkexpr(res16)));
2954 assign( resLo, unop(Iop_16to8,mkexpr(res16)));
2955 putIReg(2, R_EAX, mkexpr(res16));
2956 break;
2957 }
2958 default:
2959 vpanic("codegen_mulL_A_D(x86)");
2960 }
2961 DIP("%s%c %s\n", syned ? "imul" : "mul", nameISize(sz), tmp_txt);
2962 }
2963
2964
2965 /* Group 3 extended opcodes. */
2966 static
dis_Grp3(UChar sorb,Bool locked,Int sz,Int delta,Bool * decode_OK)2967 UInt dis_Grp3 ( UChar sorb, Bool locked, Int sz, Int delta, Bool* decode_OK )
2968 {
2969 UInt d32;
2970 UChar modrm;
2971 HChar dis_buf[50];
2972 Int len;
2973 IRTemp addr;
2974 IRType ty = szToITy(sz);
2975 IRTemp t1 = newTemp(ty);
2976 IRTemp dst1, src, dst0;
2977
2978 *decode_OK = True; /* may change this later */
2979
2980 modrm = getIByte(delta);
2981
2982 if (locked && (gregOfRM(modrm) != 2 && gregOfRM(modrm) != 3)) {
2983 /* LOCK prefix only allowed with not and neg subopcodes */
2984 *decode_OK = False;
2985 return delta;
2986 }
2987
2988 if (epartIsReg(modrm)) {
2989 switch (gregOfRM(modrm)) {
2990 case 0: { /* TEST */
2991 delta++; d32 = getUDisp(sz, delta); delta += sz;
2992 dst1 = newTemp(ty);
2993 assign(dst1, binop(mkSizedOp(ty,Iop_And8),
2994 getIReg(sz,eregOfRM(modrm)),
2995 mkU(ty,d32)));
2996 setFlags_DEP1( Iop_And8, dst1, ty );
2997 DIP("test%c $0x%x, %s\n", nameISize(sz), d32,
2998 nameIReg(sz, eregOfRM(modrm)));
2999 break;
3000 }
3001 case 1: /* UNDEFINED */
3002 /* The Intel docs imply this insn is undefined and binutils
3003 agrees. Unfortunately Core 2 will run it (with who
3004 knows what result?) sandpile.org reckons it's an alias
3005 for case 0. We play safe. */
3006 *decode_OK = False;
3007 break;
3008 case 2: /* NOT */
3009 delta++;
3010 putIReg(sz, eregOfRM(modrm),
3011 unop(mkSizedOp(ty,Iop_Not8),
3012 getIReg(sz, eregOfRM(modrm))));
3013 DIP("not%c %s\n", nameISize(sz), nameIReg(sz, eregOfRM(modrm)));
3014 break;
3015 case 3: /* NEG */
3016 delta++;
3017 dst0 = newTemp(ty);
3018 src = newTemp(ty);
3019 dst1 = newTemp(ty);
3020 assign(dst0, mkU(ty,0));
3021 assign(src, getIReg(sz,eregOfRM(modrm)));
3022 assign(dst1, binop(mkSizedOp(ty,Iop_Sub8), mkexpr(dst0), mkexpr(src)));
3023 setFlags_DEP1_DEP2(Iop_Sub8, dst0, src, ty);
3024 putIReg(sz, eregOfRM(modrm), mkexpr(dst1));
3025 DIP("neg%c %s\n", nameISize(sz), nameIReg(sz, eregOfRM(modrm)));
3026 break;
3027 case 4: /* MUL (unsigned widening) */
3028 delta++;
3029 src = newTemp(ty);
3030 assign(src, getIReg(sz,eregOfRM(modrm)));
3031 codegen_mulL_A_D ( sz, False, src, nameIReg(sz,eregOfRM(modrm)) );
3032 break;
3033 case 5: /* IMUL (signed widening) */
3034 delta++;
3035 src = newTemp(ty);
3036 assign(src, getIReg(sz,eregOfRM(modrm)));
3037 codegen_mulL_A_D ( sz, True, src, nameIReg(sz,eregOfRM(modrm)) );
3038 break;
3039 case 6: /* DIV */
3040 delta++;
3041 assign( t1, getIReg(sz, eregOfRM(modrm)) );
3042 codegen_div ( sz, t1, False );
3043 DIP("div%c %s\n", nameISize(sz), nameIReg(sz, eregOfRM(modrm)));
3044 break;
3045 case 7: /* IDIV */
3046 delta++;
3047 assign( t1, getIReg(sz, eregOfRM(modrm)) );
3048 codegen_div ( sz, t1, True );
3049 DIP("idiv%c %s\n", nameISize(sz), nameIReg(sz, eregOfRM(modrm)));
3050 break;
3051 default:
3052 /* This can't happen - gregOfRM should return 0 .. 7 only */
3053 vpanic("Grp3(x86)");
3054 }
3055 } else {
3056 addr = disAMode ( &len, sorb, delta, dis_buf );
3057 t1 = newTemp(ty);
3058 delta += len;
3059 assign(t1, loadLE(ty,mkexpr(addr)));
3060 switch (gregOfRM(modrm)) {
3061 case 0: { /* TEST */
3062 d32 = getUDisp(sz, delta); delta += sz;
3063 dst1 = newTemp(ty);
3064 assign(dst1, binop(mkSizedOp(ty,Iop_And8),
3065 mkexpr(t1), mkU(ty,d32)));
3066 setFlags_DEP1( Iop_And8, dst1, ty );
3067 DIP("test%c $0x%x, %s\n", nameISize(sz), d32, dis_buf);
3068 break;
3069 }
3070 case 1: /* UNDEFINED */
3071 /* See comment above on R case */
3072 *decode_OK = False;
3073 break;
3074 case 2: /* NOT */
3075 dst1 = newTemp(ty);
3076 assign(dst1, unop(mkSizedOp(ty,Iop_Not8), mkexpr(t1)));
3077 if (locked) {
3078 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(dst1)/*new*/,
3079 guest_EIP_curr_instr );
3080 } else {
3081 storeLE( mkexpr(addr), mkexpr(dst1) );
3082 }
3083 DIP("not%c %s\n", nameISize(sz), dis_buf);
3084 break;
3085 case 3: /* NEG */
3086 dst0 = newTemp(ty);
3087 src = newTemp(ty);
3088 dst1 = newTemp(ty);
3089 assign(dst0, mkU(ty,0));
3090 assign(src, mkexpr(t1));
3091 assign(dst1, binop(mkSizedOp(ty,Iop_Sub8),
3092 mkexpr(dst0), mkexpr(src)));
3093 if (locked) {
3094 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(dst1)/*new*/,
3095 guest_EIP_curr_instr );
3096 } else {
3097 storeLE( mkexpr(addr), mkexpr(dst1) );
3098 }
3099 setFlags_DEP1_DEP2(Iop_Sub8, dst0, src, ty);
3100 DIP("neg%c %s\n", nameISize(sz), dis_buf);
3101 break;
3102 case 4: /* MUL */
3103 codegen_mulL_A_D ( sz, False, t1, dis_buf );
3104 break;
3105 case 5: /* IMUL */
3106 codegen_mulL_A_D ( sz, True, t1, dis_buf );
3107 break;
3108 case 6: /* DIV */
3109 codegen_div ( sz, t1, False );
3110 DIP("div%c %s\n", nameISize(sz), dis_buf);
3111 break;
3112 case 7: /* IDIV */
3113 codegen_div ( sz, t1, True );
3114 DIP("idiv%c %s\n", nameISize(sz), dis_buf);
3115 break;
3116 default:
3117 /* This can't happen - gregOfRM should return 0 .. 7 only */
3118 vpanic("Grp3(x86)");
3119 }
3120 }
3121 return delta;
3122 }
3123
3124
3125 /* Group 4 extended opcodes. */
3126 static
dis_Grp4(UChar sorb,Bool locked,Int delta,Bool * decode_OK)3127 UInt dis_Grp4 ( UChar sorb, Bool locked, Int delta, Bool* decode_OK )
3128 {
3129 Int alen;
3130 UChar modrm;
3131 HChar dis_buf[50];
3132 IRType ty = Ity_I8;
3133 IRTemp t1 = newTemp(ty);
3134 IRTemp t2 = newTemp(ty);
3135
3136 *decode_OK = True;
3137
3138 modrm = getIByte(delta);
3139
3140 if (locked && (gregOfRM(modrm) != 0 && gregOfRM(modrm) != 1)) {
3141 /* LOCK prefix only allowed with inc and dec subopcodes */
3142 *decode_OK = False;
3143 return delta;
3144 }
3145
3146 if (epartIsReg(modrm)) {
3147 assign(t1, getIReg(1, eregOfRM(modrm)));
3148 switch (gregOfRM(modrm)) {
3149 case 0: /* INC */
3150 assign(t2, binop(Iop_Add8, mkexpr(t1), mkU8(1)));
3151 putIReg(1, eregOfRM(modrm), mkexpr(t2));
3152 setFlags_INC_DEC( True, t2, ty );
3153 break;
3154 case 1: /* DEC */
3155 assign(t2, binop(Iop_Sub8, mkexpr(t1), mkU8(1)));
3156 putIReg(1, eregOfRM(modrm), mkexpr(t2));
3157 setFlags_INC_DEC( False, t2, ty );
3158 break;
3159 default:
3160 *decode_OK = False;
3161 return delta;
3162 }
3163 delta++;
3164 DIP("%sb %s\n", nameGrp4(gregOfRM(modrm)),
3165 nameIReg(1, eregOfRM(modrm)));
3166 } else {
3167 IRTemp addr = disAMode ( &alen, sorb, delta, dis_buf );
3168 assign( t1, loadLE(ty, mkexpr(addr)) );
3169 switch (gregOfRM(modrm)) {
3170 case 0: /* INC */
3171 assign(t2, binop(Iop_Add8, mkexpr(t1), mkU8(1)));
3172 if (locked) {
3173 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(t2)/*new*/,
3174 guest_EIP_curr_instr );
3175 } else {
3176 storeLE( mkexpr(addr), mkexpr(t2) );
3177 }
3178 setFlags_INC_DEC( True, t2, ty );
3179 break;
3180 case 1: /* DEC */
3181 assign(t2, binop(Iop_Sub8, mkexpr(t1), mkU8(1)));
3182 if (locked) {
3183 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(t2)/*new*/,
3184 guest_EIP_curr_instr );
3185 } else {
3186 storeLE( mkexpr(addr), mkexpr(t2) );
3187 }
3188 setFlags_INC_DEC( False, t2, ty );
3189 break;
3190 default:
3191 *decode_OK = False;
3192 return delta;
3193 }
3194 delta += alen;
3195 DIP("%sb %s\n", nameGrp4(gregOfRM(modrm)), dis_buf);
3196 }
3197 return delta;
3198 }
3199
3200
3201 /* Group 5 extended opcodes. */
3202 static
dis_Grp5(UChar sorb,Bool locked,Int sz,Int delta,DisResult * dres,Bool * decode_OK)3203 UInt dis_Grp5 ( UChar sorb, Bool locked, Int sz, Int delta,
3204 /*MOD*/DisResult* dres, /*OUT*/Bool* decode_OK )
3205 {
3206 Int len;
3207 UChar modrm;
3208 HChar dis_buf[50];
3209 IRTemp addr = IRTemp_INVALID;
3210 IRType ty = szToITy(sz);
3211 IRTemp t1 = newTemp(ty);
3212 IRTemp t2 = IRTemp_INVALID;
3213
3214 *decode_OK = True;
3215
3216 modrm = getIByte(delta);
3217
3218 if (locked && (gregOfRM(modrm) != 0 && gregOfRM(modrm) != 1)) {
3219 /* LOCK prefix only allowed with inc and dec subopcodes */
3220 *decode_OK = False;
3221 return delta;
3222 }
3223
3224 if (epartIsReg(modrm)) {
3225 assign(t1, getIReg(sz,eregOfRM(modrm)));
3226 switch (gregOfRM(modrm)) {
3227 case 0: /* INC */
3228 vassert(sz == 2 || sz == 4);
3229 t2 = newTemp(ty);
3230 assign(t2, binop(mkSizedOp(ty,Iop_Add8),
3231 mkexpr(t1), mkU(ty,1)));
3232 setFlags_INC_DEC( True, t2, ty );
3233 putIReg(sz,eregOfRM(modrm),mkexpr(t2));
3234 break;
3235 case 1: /* DEC */
3236 vassert(sz == 2 || sz == 4);
3237 t2 = newTemp(ty);
3238 assign(t2, binop(mkSizedOp(ty,Iop_Sub8),
3239 mkexpr(t1), mkU(ty,1)));
3240 setFlags_INC_DEC( False, t2, ty );
3241 putIReg(sz,eregOfRM(modrm),mkexpr(t2));
3242 break;
3243 case 2: /* call Ev */
3244 vassert(sz == 4);
3245 t2 = newTemp(Ity_I32);
3246 assign(t2, binop(Iop_Sub32, getIReg(4,R_ESP), mkU32(4)));
3247 putIReg(4, R_ESP, mkexpr(t2));
3248 storeLE( mkexpr(t2), mkU32(guest_EIP_bbstart+delta+1));
3249 jmp_treg(dres, Ijk_Call, t1);
3250 vassert(dres->whatNext == Dis_StopHere);
3251 break;
3252 case 4: /* jmp Ev */
3253 vassert(sz == 4 || sz == 2);
3254 jmp_treg(dres, Ijk_Boring, t1);
3255 vassert(dres->whatNext == Dis_StopHere);
3256 break;
3257 case 6: /* PUSH Ev */
3258 vassert(sz == 4 || sz == 2);
3259 t2 = newTemp(Ity_I32);
3260 assign( t2, binop(Iop_Sub32,getIReg(4,R_ESP),mkU32(sz)) );
3261 putIReg(4, R_ESP, mkexpr(t2) );
3262 storeLE( mkexpr(t2), mkexpr(t1) );
3263 break;
3264 default:
3265 *decode_OK = False;
3266 return delta;
3267 }
3268 delta++;
3269 DIP("%s%c %s\n", nameGrp5(gregOfRM(modrm)),
3270 nameISize(sz), nameIReg(sz, eregOfRM(modrm)));
3271 } else {
3272 addr = disAMode ( &len, sorb, delta, dis_buf );
3273 assign(t1, loadLE(ty,mkexpr(addr)));
3274 switch (gregOfRM(modrm)) {
3275 case 0: /* INC */
3276 t2 = newTemp(ty);
3277 assign(t2, binop(mkSizedOp(ty,Iop_Add8),
3278 mkexpr(t1), mkU(ty,1)));
3279 if (locked) {
3280 casLE( mkexpr(addr),
3281 mkexpr(t1), mkexpr(t2), guest_EIP_curr_instr );
3282 } else {
3283 storeLE(mkexpr(addr),mkexpr(t2));
3284 }
3285 setFlags_INC_DEC( True, t2, ty );
3286 break;
3287 case 1: /* DEC */
3288 t2 = newTemp(ty);
3289 assign(t2, binop(mkSizedOp(ty,Iop_Sub8),
3290 mkexpr(t1), mkU(ty,1)));
3291 if (locked) {
3292 casLE( mkexpr(addr),
3293 mkexpr(t1), mkexpr(t2), guest_EIP_curr_instr );
3294 } else {
3295 storeLE(mkexpr(addr),mkexpr(t2));
3296 }
3297 setFlags_INC_DEC( False, t2, ty );
3298 break;
3299 case 2: /* call Ev */
3300 vassert(sz == 4);
3301 t2 = newTemp(Ity_I32);
3302 assign(t2, binop(Iop_Sub32, getIReg(4,R_ESP), mkU32(4)));
3303 putIReg(4, R_ESP, mkexpr(t2));
3304 storeLE( mkexpr(t2), mkU32(guest_EIP_bbstart+delta+len));
3305 jmp_treg(dres, Ijk_Call, t1);
3306 vassert(dres->whatNext == Dis_StopHere);
3307 break;
3308 case 4: /* JMP Ev */
3309 vassert(sz == 4);
3310 jmp_treg(dres, Ijk_Boring, t1);
3311 vassert(dres->whatNext == Dis_StopHere);
3312 break;
3313 case 6: /* PUSH Ev */
3314 vassert(sz == 4 || sz == 2);
3315 t2 = newTemp(Ity_I32);
3316 assign( t2, binop(Iop_Sub32,getIReg(4,R_ESP),mkU32(sz)) );
3317 putIReg(4, R_ESP, mkexpr(t2) );
3318 storeLE( mkexpr(t2), mkexpr(t1) );
3319 break;
3320 default:
3321 *decode_OK = False;
3322 return delta;
3323 }
3324 delta += len;
3325 DIP("%s%c %s\n", nameGrp5(gregOfRM(modrm)),
3326 nameISize(sz), dis_buf);
3327 }
3328 return delta;
3329 }
3330
3331
3332 /*------------------------------------------------------------*/
3333 /*--- Disassembling string ops (including REP prefixes) ---*/
3334 /*------------------------------------------------------------*/
3335
3336 /* Code shared by all the string ops */
3337 static
dis_string_op_increment(Int sz,Int t_inc)3338 void dis_string_op_increment(Int sz, Int t_inc)
3339 {
3340 if (sz == 4 || sz == 2) {
3341 assign( t_inc,
3342 binop(Iop_Shl32, IRExpr_Get( OFFB_DFLAG, Ity_I32 ),
3343 mkU8(sz/2) ) );
3344 } else {
3345 assign( t_inc,
3346 IRExpr_Get( OFFB_DFLAG, Ity_I32 ) );
3347 }
3348 }
3349
3350 static
dis_string_op(void (* dis_OP)(Int,IRTemp),Int sz,const HChar * name,UChar sorb)3351 void dis_string_op( void (*dis_OP)( Int, IRTemp ),
3352 Int sz, const HChar* name, UChar sorb )
3353 {
3354 IRTemp t_inc = newTemp(Ity_I32);
3355 vassert(sorb == 0); /* hmm. so what was the point of passing it in? */
3356 dis_string_op_increment(sz, t_inc);
3357 dis_OP( sz, t_inc );
3358 DIP("%s%c\n", name, nameISize(sz));
3359 }
3360
3361 static
dis_MOVS(Int sz,IRTemp t_inc)3362 void dis_MOVS ( Int sz, IRTemp t_inc )
3363 {
3364 IRType ty = szToITy(sz);
3365 IRTemp td = newTemp(Ity_I32); /* EDI */
3366 IRTemp ts = newTemp(Ity_I32); /* ESI */
3367
3368 assign( td, getIReg(4, R_EDI) );
3369 assign( ts, getIReg(4, R_ESI) );
3370
3371 storeLE( mkexpr(td), loadLE(ty,mkexpr(ts)) );
3372
3373 putIReg( 4, R_EDI, binop(Iop_Add32, mkexpr(td), mkexpr(t_inc)) );
3374 putIReg( 4, R_ESI, binop(Iop_Add32, mkexpr(ts), mkexpr(t_inc)) );
3375 }
3376
3377 static
dis_LODS(Int sz,IRTemp t_inc)3378 void dis_LODS ( Int sz, IRTemp t_inc )
3379 {
3380 IRType ty = szToITy(sz);
3381 IRTemp ts = newTemp(Ity_I32); /* ESI */
3382
3383 assign( ts, getIReg(4, R_ESI) );
3384
3385 putIReg( sz, R_EAX, loadLE(ty, mkexpr(ts)) );
3386
3387 putIReg( 4, R_ESI, binop(Iop_Add32, mkexpr(ts), mkexpr(t_inc)) );
3388 }
3389
3390 static
dis_STOS(Int sz,IRTemp t_inc)3391 void dis_STOS ( Int sz, IRTemp t_inc )
3392 {
3393 IRType ty = szToITy(sz);
3394 IRTemp ta = newTemp(ty); /* EAX */
3395 IRTemp td = newTemp(Ity_I32); /* EDI */
3396
3397 assign( ta, getIReg(sz, R_EAX) );
3398 assign( td, getIReg(4, R_EDI) );
3399
3400 storeLE( mkexpr(td), mkexpr(ta) );
3401
3402 putIReg( 4, R_EDI, binop(Iop_Add32, mkexpr(td), mkexpr(t_inc)) );
3403 }
3404
3405 static
dis_CMPS(Int sz,IRTemp t_inc)3406 void dis_CMPS ( Int sz, IRTemp t_inc )
3407 {
3408 IRType ty = szToITy(sz);
3409 IRTemp tdv = newTemp(ty); /* (EDI) */
3410 IRTemp tsv = newTemp(ty); /* (ESI) */
3411 IRTemp td = newTemp(Ity_I32); /* EDI */
3412 IRTemp ts = newTemp(Ity_I32); /* ESI */
3413
3414 assign( td, getIReg(4, R_EDI) );
3415 assign( ts, getIReg(4, R_ESI) );
3416
3417 assign( tdv, loadLE(ty,mkexpr(td)) );
3418 assign( tsv, loadLE(ty,mkexpr(ts)) );
3419
3420 setFlags_DEP1_DEP2 ( Iop_Sub8, tsv, tdv, ty );
3421
3422 putIReg(4, R_EDI, binop(Iop_Add32, mkexpr(td), mkexpr(t_inc)) );
3423 putIReg(4, R_ESI, binop(Iop_Add32, mkexpr(ts), mkexpr(t_inc)) );
3424 }
3425
3426 static
dis_SCAS(Int sz,IRTemp t_inc)3427 void dis_SCAS ( Int sz, IRTemp t_inc )
3428 {
3429 IRType ty = szToITy(sz);
3430 IRTemp ta = newTemp(ty); /* EAX */
3431 IRTemp td = newTemp(Ity_I32); /* EDI */
3432 IRTemp tdv = newTemp(ty); /* (EDI) */
3433
3434 assign( ta, getIReg(sz, R_EAX) );
3435 assign( td, getIReg(4, R_EDI) );
3436
3437 assign( tdv, loadLE(ty,mkexpr(td)) );
3438 setFlags_DEP1_DEP2 ( Iop_Sub8, ta, tdv, ty );
3439
3440 putIReg(4, R_EDI, binop(Iop_Add32, mkexpr(td), mkexpr(t_inc)) );
3441 }
3442
3443
3444 /* Wrap the appropriate string op inside a REP/REPE/REPNE.
3445 We assume the insn is the last one in the basic block, and so emit a jump
3446 to the next insn, rather than just falling through. */
3447 static
dis_REP_op(DisResult * dres,X86Condcode cond,void (* dis_OP)(Int,IRTemp),Int sz,Addr32 eip,Addr32 eip_next,const HChar * name)3448 void dis_REP_op ( /*MOD*/DisResult* dres,
3449 X86Condcode cond,
3450 void (*dis_OP)(Int, IRTemp),
3451 Int sz, Addr32 eip, Addr32 eip_next, const HChar* name )
3452 {
3453 IRTemp t_inc = newTemp(Ity_I32);
3454 IRTemp tc = newTemp(Ity_I32); /* ECX */
3455
3456 assign( tc, getIReg(4,R_ECX) );
3457
3458 stmt( IRStmt_Exit( binop(Iop_CmpEQ32,mkexpr(tc),mkU32(0)),
3459 Ijk_Boring,
3460 IRConst_U32(eip_next), OFFB_EIP ) );
3461
3462 putIReg(4, R_ECX, binop(Iop_Sub32, mkexpr(tc), mkU32(1)) );
3463
3464 dis_string_op_increment(sz, t_inc);
3465 dis_OP (sz, t_inc);
3466
3467 if (cond == X86CondAlways) {
3468 jmp_lit(dres, Ijk_Boring, eip);
3469 vassert(dres->whatNext == Dis_StopHere);
3470 } else {
3471 stmt( IRStmt_Exit( mk_x86g_calculate_condition(cond),
3472 Ijk_Boring,
3473 IRConst_U32(eip), OFFB_EIP ) );
3474 jmp_lit(dres, Ijk_Boring, eip_next);
3475 vassert(dres->whatNext == Dis_StopHere);
3476 }
3477 DIP("%s%c\n", name, nameISize(sz));
3478 }
3479
3480
3481 /*------------------------------------------------------------*/
3482 /*--- Arithmetic, etc. ---*/
3483 /*------------------------------------------------------------*/
3484
3485 /* IMUL E, G. Supplied eip points to the modR/M byte. */
3486 static
dis_mul_E_G(UChar sorb,Int size,Int delta0)3487 UInt dis_mul_E_G ( UChar sorb,
3488 Int size,
3489 Int delta0 )
3490 {
3491 Int alen;
3492 HChar dis_buf[50];
3493 UChar rm = getIByte(delta0);
3494 IRType ty = szToITy(size);
3495 IRTemp te = newTemp(ty);
3496 IRTemp tg = newTemp(ty);
3497 IRTemp resLo = newTemp(ty);
3498
3499 assign( tg, getIReg(size, gregOfRM(rm)) );
3500 if (epartIsReg(rm)) {
3501 assign( te, getIReg(size, eregOfRM(rm)) );
3502 } else {
3503 IRTemp addr = disAMode( &alen, sorb, delta0, dis_buf );
3504 assign( te, loadLE(ty,mkexpr(addr)) );
3505 }
3506
3507 setFlags_MUL ( ty, te, tg, X86G_CC_OP_SMULB );
3508
3509 assign( resLo, binop( mkSizedOp(ty, Iop_Mul8), mkexpr(te), mkexpr(tg) ) );
3510
3511 putIReg(size, gregOfRM(rm), mkexpr(resLo) );
3512
3513 if (epartIsReg(rm)) {
3514 DIP("imul%c %s, %s\n", nameISize(size),
3515 nameIReg(size,eregOfRM(rm)),
3516 nameIReg(size,gregOfRM(rm)));
3517 return 1+delta0;
3518 } else {
3519 DIP("imul%c %s, %s\n", nameISize(size),
3520 dis_buf, nameIReg(size,gregOfRM(rm)));
3521 return alen+delta0;
3522 }
3523 }
3524
3525
3526 /* IMUL I * E -> G. Supplied eip points to the modR/M byte. */
3527 static
dis_imul_I_E_G(UChar sorb,Int size,Int delta,Int litsize)3528 UInt dis_imul_I_E_G ( UChar sorb,
3529 Int size,
3530 Int delta,
3531 Int litsize )
3532 {
3533 Int d32, alen;
3534 HChar dis_buf[50];
3535 UChar rm = getIByte(delta);
3536 IRType ty = szToITy(size);
3537 IRTemp te = newTemp(ty);
3538 IRTemp tl = newTemp(ty);
3539 IRTemp resLo = newTemp(ty);
3540
3541 vassert(size == 1 || size == 2 || size == 4);
3542
3543 if (epartIsReg(rm)) {
3544 assign(te, getIReg(size, eregOfRM(rm)));
3545 delta++;
3546 } else {
3547 IRTemp addr = disAMode( &alen, sorb, delta, dis_buf );
3548 assign(te, loadLE(ty, mkexpr(addr)));
3549 delta += alen;
3550 }
3551 d32 = getSDisp(litsize,delta);
3552 delta += litsize;
3553
3554 if (size == 1) d32 &= 0xFF;
3555 if (size == 2) d32 &= 0xFFFF;
3556
3557 assign(tl, mkU(ty,d32));
3558
3559 assign( resLo, binop( mkSizedOp(ty, Iop_Mul8), mkexpr(te), mkexpr(tl) ));
3560
3561 setFlags_MUL ( ty, te, tl, X86G_CC_OP_SMULB );
3562
3563 putIReg(size, gregOfRM(rm), mkexpr(resLo));
3564
3565 DIP("imul %d, %s, %s\n", d32,
3566 ( epartIsReg(rm) ? nameIReg(size,eregOfRM(rm)) : dis_buf ),
3567 nameIReg(size,gregOfRM(rm)) );
3568 return delta;
3569 }
3570
3571
3572 /* Generate an IR sequence to do a count-leading-zeroes operation on
3573 the supplied IRTemp, and return a new IRTemp holding the result.
3574 'ty' may be Ity_I16 or Ity_I32 only. In the case where the
3575 argument is zero, return the number of bits in the word (the
3576 natural semantics). */
gen_LZCNT(IRType ty,IRTemp src)3577 static IRTemp gen_LZCNT ( IRType ty, IRTemp src )
3578 {
3579 vassert(ty == Ity_I32 || ty == Ity_I16);
3580
3581 IRTemp src32 = newTemp(Ity_I32);
3582 assign(src32, widenUto32( mkexpr(src) ));
3583
3584 IRTemp src32x = newTemp(Ity_I32);
3585 assign(src32x,
3586 binop(Iop_Shl32, mkexpr(src32),
3587 mkU8(32 - 8 * sizeofIRType(ty))));
3588
3589 // Clz32 has undefined semantics when its input is zero, so
3590 // special-case around that.
3591 IRTemp res32 = newTemp(Ity_I32);
3592 assign(res32,
3593 IRExpr_ITE(
3594 binop(Iop_CmpEQ32, mkexpr(src32x), mkU32(0)),
3595 mkU32(8 * sizeofIRType(ty)),
3596 unop(Iop_Clz32, mkexpr(src32x))
3597 ));
3598
3599 IRTemp res = newTemp(ty);
3600 assign(res, narrowTo(ty, mkexpr(res32)));
3601 return res;
3602 }
3603
3604
3605 /*------------------------------------------------------------*/
3606 /*--- ---*/
3607 /*--- x87 FLOATING POINT INSTRUCTIONS ---*/
3608 /*--- ---*/
3609 /*------------------------------------------------------------*/
3610
3611 /* --- Helper functions for dealing with the register stack. --- */
3612
3613 /* --- Set the emulation-warning pseudo-register. --- */
3614
put_emwarn(IRExpr * e)3615 static void put_emwarn ( IRExpr* e /* :: Ity_I32 */ )
3616 {
3617 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
3618 stmt( IRStmt_Put( OFFB_EMNOTE, e ) );
3619 }
3620
3621 /* --- Produce an IRExpr* denoting a 64-bit QNaN. --- */
3622
mkQNaN64(void)3623 static IRExpr* mkQNaN64 ( void )
3624 {
3625 /* QNaN is 0 2047 1 0(51times)
3626 == 0b 11111111111b 1 0(51times)
3627 == 0x7FF8 0000 0000 0000
3628 */
3629 return IRExpr_Const(IRConst_F64i(0x7FF8000000000000ULL));
3630 }
3631
3632 /* --------- Get/put the top-of-stack pointer. --------- */
3633
get_ftop(void)3634 static IRExpr* get_ftop ( void )
3635 {
3636 return IRExpr_Get( OFFB_FTOP, Ity_I32 );
3637 }
3638
put_ftop(IRExpr * e)3639 static void put_ftop ( IRExpr* e )
3640 {
3641 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
3642 stmt( IRStmt_Put( OFFB_FTOP, e ) );
3643 }
3644
3645 /* --------- Get/put the C3210 bits. --------- */
3646
get_C3210(void)3647 static IRExpr* get_C3210 ( void )
3648 {
3649 return IRExpr_Get( OFFB_FC3210, Ity_I32 );
3650 }
3651
put_C3210(IRExpr * e)3652 static void put_C3210 ( IRExpr* e )
3653 {
3654 stmt( IRStmt_Put( OFFB_FC3210, e ) );
3655 }
3656
3657 /* --------- Get/put the FPU rounding mode. --------- */
get_fpround(void)3658 static IRExpr* /* :: Ity_I32 */ get_fpround ( void )
3659 {
3660 return IRExpr_Get( OFFB_FPROUND, Ity_I32 );
3661 }
3662
put_fpround(IRExpr * e)3663 static void put_fpround ( IRExpr* /* :: Ity_I32 */ e )
3664 {
3665 stmt( IRStmt_Put( OFFB_FPROUND, e ) );
3666 }
3667
3668
3669 /* --------- Synthesise a 2-bit FPU rounding mode. --------- */
3670 /* Produces a value in 0 .. 3, which is encoded as per the type
3671 IRRoundingMode. Since the guest_FPROUND value is also encoded as
3672 per IRRoundingMode, we merely need to get it and mask it for
3673 safety.
3674 */
get_roundingmode(void)3675 static IRExpr* /* :: Ity_I32 */ get_roundingmode ( void )
3676 {
3677 return binop( Iop_And32, get_fpround(), mkU32(3) );
3678 }
3679
get_FAKE_roundingmode(void)3680 static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void )
3681 {
3682 return mkU32(Irrm_NEAREST);
3683 }
3684
3685
3686 /* --------- Get/set FP register tag bytes. --------- */
3687
3688 /* Given i, and some expression e, generate 'ST_TAG(i) = e'. */
3689
put_ST_TAG(Int i,IRExpr * value)3690 static void put_ST_TAG ( Int i, IRExpr* value )
3691 {
3692 IRRegArray* descr;
3693 vassert(typeOfIRExpr(irsb->tyenv, value) == Ity_I8);
3694 descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 );
3695 stmt( IRStmt_PutI( mkIRPutI(descr, get_ftop(), i, value) ) );
3696 }
3697
3698 /* Given i, generate an expression yielding 'ST_TAG(i)'. This will be
3699 zero to indicate "Empty" and nonzero to indicate "NonEmpty". */
3700
get_ST_TAG(Int i)3701 static IRExpr* get_ST_TAG ( Int i )
3702 {
3703 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 );
3704 return IRExpr_GetI( descr, get_ftop(), i );
3705 }
3706
3707
3708 /* --------- Get/set FP registers. --------- */
3709
3710 /* Given i, and some expression e, emit 'ST(i) = e' and set the
3711 register's tag to indicate the register is full. The previous
3712 state of the register is not checked. */
3713
put_ST_UNCHECKED(Int i,IRExpr * value)3714 static void put_ST_UNCHECKED ( Int i, IRExpr* value )
3715 {
3716 IRRegArray* descr;
3717 vassert(typeOfIRExpr(irsb->tyenv, value) == Ity_F64);
3718 descr = mkIRRegArray( OFFB_FPREGS, Ity_F64, 8 );
3719 stmt( IRStmt_PutI( mkIRPutI(descr, get_ftop(), i, value) ) );
3720 /* Mark the register as in-use. */
3721 put_ST_TAG(i, mkU8(1));
3722 }
3723
3724 /* Given i, and some expression e, emit
3725 ST(i) = is_full(i) ? NaN : e
3726 and set the tag accordingly.
3727 */
3728
put_ST(Int i,IRExpr * value)3729 static void put_ST ( Int i, IRExpr* value )
3730 {
3731 put_ST_UNCHECKED(
3732 i,
3733 IRExpr_ITE( binop(Iop_CmpNE8, get_ST_TAG(i), mkU8(0)),
3734 /* non-0 means full */
3735 mkQNaN64(),
3736 /* 0 means empty */
3737 value
3738 )
3739 );
3740 }
3741
3742
3743 /* Given i, generate an expression yielding 'ST(i)'. */
3744
get_ST_UNCHECKED(Int i)3745 static IRExpr* get_ST_UNCHECKED ( Int i )
3746 {
3747 IRRegArray* descr = mkIRRegArray( OFFB_FPREGS, Ity_F64, 8 );
3748 return IRExpr_GetI( descr, get_ftop(), i );
3749 }
3750
3751
3752 /* Given i, generate an expression yielding
3753 is_full(i) ? ST(i) : NaN
3754 */
3755
get_ST(Int i)3756 static IRExpr* get_ST ( Int i )
3757 {
3758 return
3759 IRExpr_ITE( binop(Iop_CmpNE8, get_ST_TAG(i), mkU8(0)),
3760 /* non-0 means full */
3761 get_ST_UNCHECKED(i),
3762 /* 0 means empty */
3763 mkQNaN64());
3764 }
3765
3766
3767 /* Given i, and some expression e, and a condition cond, generate IR
3768 which has the same effect as put_ST(i,e) when cond is true and has
3769 no effect when cond is false. Given the lack of proper
3770 if-then-else in the IR, this is pretty tricky.
3771 */
3772
maybe_put_ST(IRTemp cond,Int i,IRExpr * value)3773 static void maybe_put_ST ( IRTemp cond, Int i, IRExpr* value )
3774 {
3775 // new_tag = if cond then FULL else old_tag
3776 // new_val = if cond then (if old_tag==FULL then NaN else val)
3777 // else old_val
3778
3779 IRTemp old_tag = newTemp(Ity_I8);
3780 assign(old_tag, get_ST_TAG(i));
3781 IRTemp new_tag = newTemp(Ity_I8);
3782 assign(new_tag,
3783 IRExpr_ITE(mkexpr(cond), mkU8(1)/*FULL*/, mkexpr(old_tag)));
3784
3785 IRTemp old_val = newTemp(Ity_F64);
3786 assign(old_val, get_ST_UNCHECKED(i));
3787 IRTemp new_val = newTemp(Ity_F64);
3788 assign(new_val,
3789 IRExpr_ITE(mkexpr(cond),
3790 IRExpr_ITE(binop(Iop_CmpNE8, mkexpr(old_tag), mkU8(0)),
3791 /* non-0 means full */
3792 mkQNaN64(),
3793 /* 0 means empty */
3794 value),
3795 mkexpr(old_val)));
3796
3797 put_ST_UNCHECKED(i, mkexpr(new_val));
3798 // put_ST_UNCHECKED incorrectly sets tag(i) to always be FULL. So
3799 // now set it to new_tag instead.
3800 put_ST_TAG(i, mkexpr(new_tag));
3801 }
3802
3803 /* Adjust FTOP downwards by one register. */
3804
fp_push(void)3805 static void fp_push ( void )
3806 {
3807 put_ftop( binop(Iop_Sub32, get_ftop(), mkU32(1)) );
3808 }
3809
3810 /* Adjust FTOP downwards by one register when COND is 1:I1. Else
3811 don't change it. */
3812
maybe_fp_push(IRTemp cond)3813 static void maybe_fp_push ( IRTemp cond )
3814 {
3815 put_ftop( binop(Iop_Sub32, get_ftop(), unop(Iop_1Uto32,mkexpr(cond))) );
3816 }
3817
3818 /* Adjust FTOP upwards by one register, and mark the vacated register
3819 as empty. */
3820
fp_pop(void)3821 static void fp_pop ( void )
3822 {
3823 put_ST_TAG(0, mkU8(0));
3824 put_ftop( binop(Iop_Add32, get_ftop(), mkU32(1)) );
3825 }
3826
3827 /* Set the C2 bit of the FPU status register to e[0]. Assumes that
3828 e[31:1] == 0.
3829 */
set_C2(IRExpr * e)3830 static void set_C2 ( IRExpr* e )
3831 {
3832 IRExpr* cleared = binop(Iop_And32, get_C3210(), mkU32(~X86G_FC_MASK_C2));
3833 put_C3210( binop(Iop_Or32,
3834 cleared,
3835 binop(Iop_Shl32, e, mkU8(X86G_FC_SHIFT_C2))) );
3836 }
3837
3838 /* Generate code to check that abs(d64) < 2^63 and is finite. This is
3839 used to do the range checks for FSIN, FCOS, FSINCOS and FPTAN. The
3840 test is simple, but the derivation of it is not so simple.
3841
3842 The exponent field for an IEEE754 double is 11 bits. That means it
3843 can take values 0 through 0x7FF. If the exponent has value 0x7FF,
3844 the number is either a NaN or an Infinity and so is not finite.
3845 Furthermore, a finite value of exactly 2^63 is the smallest value
3846 that has exponent value 0x43E. Hence, what we need to do is
3847 extract the exponent, ignoring the sign bit and mantissa, and check
3848 it is < 0x43E, or <= 0x43D.
3849
3850 To make this easily applicable to 32- and 64-bit targets, a
3851 roundabout approach is used. First the number is converted to I64,
3852 then the top 32 bits are taken. Shifting them right by 20 bits
3853 places the sign bit and exponent in the bottom 12 bits. Anding
3854 with 0x7FF gets rid of the sign bit, leaving just the exponent
3855 available for comparison.
3856 */
math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(IRTemp d64)3857 static IRTemp math_IS_TRIG_ARG_FINITE_AND_IN_RANGE ( IRTemp d64 )
3858 {
3859 IRTemp i64 = newTemp(Ity_I64);
3860 assign(i64, unop(Iop_ReinterpF64asI64, mkexpr(d64)) );
3861 IRTemp exponent = newTemp(Ity_I32);
3862 assign(exponent,
3863 binop(Iop_And32,
3864 binop(Iop_Shr32, unop(Iop_64HIto32, mkexpr(i64)), mkU8(20)),
3865 mkU32(0x7FF)));
3866 IRTemp in_range_and_finite = newTemp(Ity_I1);
3867 assign(in_range_and_finite,
3868 binop(Iop_CmpLE32U, mkexpr(exponent), mkU32(0x43D)));
3869 return in_range_and_finite;
3870 }
3871
3872 /* Invent a plausible-looking FPU status word value:
3873 ((ftop & 7) << 11) | (c3210 & 0x4700)
3874 */
get_FPU_sw(void)3875 static IRExpr* get_FPU_sw ( void )
3876 {
3877 return
3878 unop(Iop_32to16,
3879 binop(Iop_Or32,
3880 binop(Iop_Shl32,
3881 binop(Iop_And32, get_ftop(), mkU32(7)),
3882 mkU8(11)),
3883 binop(Iop_And32, get_C3210(), mkU32(0x4700))
3884 ));
3885 }
3886
3887
3888 /* ------------------------------------------------------- */
3889 /* Given all that stack-mangling junk, we can now go ahead
3890 and describe FP instructions.
3891 */
3892
3893 /* ST(0) = ST(0) `op` mem64/32(addr)
3894 Need to check ST(0)'s tag on read, but not on write.
3895 */
3896 static
fp_do_op_mem_ST_0(IRTemp addr,const HChar * op_txt,HChar * dis_buf,IROp op,Bool dbl)3897 void fp_do_op_mem_ST_0 ( IRTemp addr, const HChar* op_txt, HChar* dis_buf,
3898 IROp op, Bool dbl )
3899 {
3900 DIP("f%s%c %s\n", op_txt, dbl?'l':'s', dis_buf);
3901 if (dbl) {
3902 put_ST_UNCHECKED(0,
3903 triop( op,
3904 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
3905 get_ST(0),
3906 loadLE(Ity_F64,mkexpr(addr))
3907 ));
3908 } else {
3909 put_ST_UNCHECKED(0,
3910 triop( op,
3911 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
3912 get_ST(0),
3913 unop(Iop_F32toF64, loadLE(Ity_F32,mkexpr(addr)))
3914 ));
3915 }
3916 }
3917
3918
3919 /* ST(0) = mem64/32(addr) `op` ST(0)
3920 Need to check ST(0)'s tag on read, but not on write.
3921 */
3922 static
fp_do_oprev_mem_ST_0(IRTemp addr,const HChar * op_txt,HChar * dis_buf,IROp op,Bool dbl)3923 void fp_do_oprev_mem_ST_0 ( IRTemp addr, const HChar* op_txt, HChar* dis_buf,
3924 IROp op, Bool dbl )
3925 {
3926 DIP("f%s%c %s\n", op_txt, dbl?'l':'s', dis_buf);
3927 if (dbl) {
3928 put_ST_UNCHECKED(0,
3929 triop( op,
3930 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
3931 loadLE(Ity_F64,mkexpr(addr)),
3932 get_ST(0)
3933 ));
3934 } else {
3935 put_ST_UNCHECKED(0,
3936 triop( op,
3937 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
3938 unop(Iop_F32toF64, loadLE(Ity_F32,mkexpr(addr))),
3939 get_ST(0)
3940 ));
3941 }
3942 }
3943
3944
3945 /* ST(dst) = ST(dst) `op` ST(src).
3946 Check dst and src tags when reading but not on write.
3947 */
3948 static
fp_do_op_ST_ST(const HChar * op_txt,IROp op,UInt st_src,UInt st_dst,Bool pop_after)3949 void fp_do_op_ST_ST ( const HChar* op_txt, IROp op, UInt st_src, UInt st_dst,
3950 Bool pop_after )
3951 {
3952 DIP("f%s%s st(%u), st(%u)\n", op_txt, pop_after?"p":"",
3953 st_src, st_dst);
3954 put_ST_UNCHECKED(
3955 st_dst,
3956 triop( op,
3957 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
3958 get_ST(st_dst),
3959 get_ST(st_src) )
3960 );
3961 if (pop_after)
3962 fp_pop();
3963 }
3964
3965 /* ST(dst) = ST(src) `op` ST(dst).
3966 Check dst and src tags when reading but not on write.
3967 */
3968 static
fp_do_oprev_ST_ST(const HChar * op_txt,IROp op,UInt st_src,UInt st_dst,Bool pop_after)3969 void fp_do_oprev_ST_ST ( const HChar* op_txt, IROp op, UInt st_src,
3970 UInt st_dst, Bool pop_after )
3971 {
3972 DIP("f%s%s st(%u), st(%u)\n", op_txt, pop_after?"p":"",
3973 st_src, st_dst);
3974 put_ST_UNCHECKED(
3975 st_dst,
3976 triop( op,
3977 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
3978 get_ST(st_src),
3979 get_ST(st_dst) )
3980 );
3981 if (pop_after)
3982 fp_pop();
3983 }
3984
3985 /* %eflags(Z,P,C) = UCOMI( st(0), st(i) ) */
fp_do_ucomi_ST0_STi(UInt i,Bool pop_after)3986 static void fp_do_ucomi_ST0_STi ( UInt i, Bool pop_after )
3987 {
3988 DIP("fucomi%s %%st(0),%%st(%u)\n", pop_after ? "p" : "", i);
3989 /* This is a bit of a hack (and isn't really right). It sets
3990 Z,P,C,O correctly, but forces A and S to zero, whereas the Intel
3991 documentation implies A and S are unchanged.
3992 */
3993 /* It's also fishy in that it is used both for COMIP and
3994 UCOMIP, and they aren't the same (although similar). */
3995 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
3996 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
3997 stmt( IRStmt_Put( OFFB_CC_DEP1,
3998 binop( Iop_And32,
3999 binop(Iop_CmpF64, get_ST(0), get_ST(i)),
4000 mkU32(0x45)
4001 )));
4002 /* Set NDEP even though it isn't used. This makes redundant-PUT
4003 elimination of previous stores to this field work better. */
4004 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
4005 if (pop_after)
4006 fp_pop();
4007 }
4008
4009
4010 static
dis_FPU(Bool * decode_ok,UChar sorb,Int delta)4011 UInt dis_FPU ( Bool* decode_ok, UChar sorb, Int delta )
4012 {
4013 Int len;
4014 UInt r_src, r_dst;
4015 HChar dis_buf[50];
4016 IRTemp t1, t2;
4017
4018 /* On entry, delta points at the second byte of the insn (the modrm
4019 byte).*/
4020 UChar first_opcode = getIByte(delta-1);
4021 UChar modrm = getIByte(delta+0);
4022
4023 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD8 opcodes +-+-+-+-+-+-+-+ */
4024
4025 if (first_opcode == 0xD8) {
4026 if (modrm < 0xC0) {
4027
4028 /* bits 5,4,3 are an opcode extension, and the modRM also
4029 specifies an address. */
4030 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
4031 delta += len;
4032
4033 switch (gregOfRM(modrm)) {
4034
4035 case 0: /* FADD single-real */
4036 fp_do_op_mem_ST_0 ( addr, "add", dis_buf, Iop_AddF64, False );
4037 break;
4038
4039 case 1: /* FMUL single-real */
4040 fp_do_op_mem_ST_0 ( addr, "mul", dis_buf, Iop_MulF64, False );
4041 break;
4042
4043 case 2: /* FCOM single-real */
4044 DIP("fcoms %s\n", dis_buf);
4045 /* This forces C1 to zero, which isn't right. */
4046 put_C3210(
4047 binop( Iop_And32,
4048 binop(Iop_Shl32,
4049 binop(Iop_CmpF64,
4050 get_ST(0),
4051 unop(Iop_F32toF64,
4052 loadLE(Ity_F32,mkexpr(addr)))),
4053 mkU8(8)),
4054 mkU32(0x4500)
4055 ));
4056 break;
4057
4058 case 3: /* FCOMP single-real */
4059 DIP("fcomps %s\n", dis_buf);
4060 /* This forces C1 to zero, which isn't right. */
4061 put_C3210(
4062 binop( Iop_And32,
4063 binop(Iop_Shl32,
4064 binop(Iop_CmpF64,
4065 get_ST(0),
4066 unop(Iop_F32toF64,
4067 loadLE(Ity_F32,mkexpr(addr)))),
4068 mkU8(8)),
4069 mkU32(0x4500)
4070 ));
4071 fp_pop();
4072 break;
4073
4074 case 4: /* FSUB single-real */
4075 fp_do_op_mem_ST_0 ( addr, "sub", dis_buf, Iop_SubF64, False );
4076 break;
4077
4078 case 5: /* FSUBR single-real */
4079 fp_do_oprev_mem_ST_0 ( addr, "subr", dis_buf, Iop_SubF64, False );
4080 break;
4081
4082 case 6: /* FDIV single-real */
4083 fp_do_op_mem_ST_0 ( addr, "div", dis_buf, Iop_DivF64, False );
4084 break;
4085
4086 case 7: /* FDIVR single-real */
4087 fp_do_oprev_mem_ST_0 ( addr, "divr", dis_buf, Iop_DivF64, False );
4088 break;
4089
4090 default:
4091 vex_printf("unhandled opc_aux = 0x%2x\n", (UInt)gregOfRM(modrm));
4092 vex_printf("first_opcode == 0xD8\n");
4093 goto decode_fail;
4094 }
4095 } else {
4096 delta++;
4097 switch (modrm) {
4098
4099 case 0xc0:
4100 case 0xc1:
4101 case 0xc2:
4102 case 0xc3:
4103 case 0xc4:
4104 case 0xc5:
4105 case 0xc6:
4106 case 0xc7: /* FADD %st(?),%st(0) */
4107 fp_do_op_ST_ST ( "add", Iop_AddF64, modrm - 0xC0, 0, False );
4108 break;
4109
4110 case 0xc8:
4111 case 0xc9:
4112 case 0xca:
4113 case 0xcb:
4114 case 0xcc:
4115 case 0xcd:
4116 case 0xce:
4117 case 0xcf: /* FMUL %st(?),%st(0) */
4118 fp_do_op_ST_ST ( "mul", Iop_MulF64, modrm - 0xC8, 0, False );
4119 break;
4120
4121 /* Dunno if this is right */
4122 case 0xd0:
4123 case 0xd1:
4124 case 0xd2:
4125 case 0xd3:
4126 case 0xd4:
4127 case 0xd5:
4128 case 0xd6:
4129 case 0xd7: /* FCOM %st(?),%st(0) */
4130 r_dst = (UInt)modrm - 0xD0;
4131 DIP("fcom %%st(0),%%st(%u)\n", r_dst);
4132 /* This forces C1 to zero, which isn't right. */
4133 put_C3210(
4134 binop( Iop_And32,
4135 binop(Iop_Shl32,
4136 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)),
4137 mkU8(8)),
4138 mkU32(0x4500)
4139 ));
4140 break;
4141
4142 /* Dunno if this is right */
4143 case 0xd8:
4144 case 0xd9:
4145 case 0xda:
4146 case 0xdb:
4147 case 0xdc:
4148 case 0xdd:
4149 case 0xde:
4150 case 0xdf: /* FCOMP %st(?),%st(0) */
4151 r_dst = (UInt)modrm - 0xD8;
4152 DIP("fcomp %%st(0),%%st(%u)\n", r_dst);
4153 /* This forces C1 to zero, which isn't right. */
4154 put_C3210(
4155 binop( Iop_And32,
4156 binop(Iop_Shl32,
4157 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)),
4158 mkU8(8)),
4159 mkU32(0x4500)
4160 ));
4161 fp_pop();
4162 break;
4163
4164 case 0xe0:
4165 case 0xe1:
4166 case 0xe2:
4167 case 0xe3:
4168 case 0xe4:
4169 case 0xe5:
4170 case 0xe6:
4171 case 0xe7: /* FSUB %st(?),%st(0) */
4172 fp_do_op_ST_ST ( "sub", Iop_SubF64, modrm - 0xE0, 0, False );
4173 break;
4174
4175 case 0xe8:
4176 case 0xe9:
4177 case 0xea:
4178 case 0xeb:
4179 case 0xec:
4180 case 0xed:
4181 case 0xee:
4182 case 0xef: /* FSUBR %st(?),%st(0) */
4183 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, modrm - 0xE8, 0, False );
4184 break;
4185
4186 case 0xf0:
4187 case 0xf1:
4188 case 0xf2:
4189 case 0xf3:
4190 case 0xf4:
4191 case 0xf5:
4192 case 0xf6:
4193 case 0xf7: /* FDIV %st(?),%st(0) */
4194 fp_do_op_ST_ST ( "div", Iop_DivF64, modrm - 0xF0, 0, False );
4195 break;
4196
4197 case 0xf8:
4198 case 0xf9:
4199 case 0xfa:
4200 case 0xfb:
4201 case 0xfc:
4202 case 0xfd:
4203 case 0xfe:
4204 case 0xff: /* FDIVR %st(?),%st(0) */
4205 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, modrm - 0xF8, 0, False );
4206 break;
4207
4208 default:
4209 goto decode_fail;
4210 }
4211 }
4212 }
4213
4214 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD9 opcodes +-+-+-+-+-+-+-+ */
4215 else
4216 if (first_opcode == 0xD9) {
4217 if (modrm < 0xC0) {
4218
4219 /* bits 5,4,3 are an opcode extension, and the modRM also
4220 specifies an address. */
4221 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
4222 delta += len;
4223
4224 switch (gregOfRM(modrm)) {
4225
4226 case 0: /* FLD single-real */
4227 DIP("flds %s\n", dis_buf);
4228 fp_push();
4229 put_ST(0, unop(Iop_F32toF64,
4230 loadLE(Ity_F32, mkexpr(addr))));
4231 break;
4232
4233 case 2: /* FST single-real */
4234 DIP("fsts %s\n", dis_buf);
4235 storeLE(mkexpr(addr),
4236 binop(Iop_F64toF32, get_roundingmode(), get_ST(0)));
4237 break;
4238
4239 case 3: /* FSTP single-real */
4240 DIP("fstps %s\n", dis_buf);
4241 storeLE(mkexpr(addr),
4242 binop(Iop_F64toF32, get_roundingmode(), get_ST(0)));
4243 fp_pop();
4244 break;
4245
4246 case 4: { /* FLDENV m28 */
4247 /* Uses dirty helper:
4248 VexEmNote x86g_do_FLDENV ( VexGuestX86State*, HWord ) */
4249 IRTemp ew = newTemp(Ity_I32);
4250 IRDirty* d = unsafeIRDirty_0_N (
4251 0/*regparms*/,
4252 "x86g_dirtyhelper_FLDENV",
4253 &x86g_dirtyhelper_FLDENV,
4254 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr) )
4255 );
4256 d->tmp = ew;
4257 /* declare we're reading memory */
4258 d->mFx = Ifx_Read;
4259 d->mAddr = mkexpr(addr);
4260 d->mSize = 28;
4261
4262 /* declare we're writing guest state */
4263 d->nFxState = 4;
4264 vex_bzero(&d->fxState, sizeof(d->fxState));
4265
4266 d->fxState[0].fx = Ifx_Write;
4267 d->fxState[0].offset = OFFB_FTOP;
4268 d->fxState[0].size = sizeof(UInt);
4269
4270 d->fxState[1].fx = Ifx_Write;
4271 d->fxState[1].offset = OFFB_FPTAGS;
4272 d->fxState[1].size = 8 * sizeof(UChar);
4273
4274 d->fxState[2].fx = Ifx_Write;
4275 d->fxState[2].offset = OFFB_FPROUND;
4276 d->fxState[2].size = sizeof(UInt);
4277
4278 d->fxState[3].fx = Ifx_Write;
4279 d->fxState[3].offset = OFFB_FC3210;
4280 d->fxState[3].size = sizeof(UInt);
4281
4282 stmt( IRStmt_Dirty(d) );
4283
4284 /* ew contains any emulation warning we may need to
4285 issue. If needed, side-exit to the next insn,
4286 reporting the warning, so that Valgrind's dispatcher
4287 sees the warning. */
4288 put_emwarn( mkexpr(ew) );
4289 stmt(
4290 IRStmt_Exit(
4291 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)),
4292 Ijk_EmWarn,
4293 IRConst_U32( ((Addr32)guest_EIP_bbstart)+delta),
4294 OFFB_EIP
4295 )
4296 );
4297
4298 DIP("fldenv %s\n", dis_buf);
4299 break;
4300 }
4301
4302 case 5: {/* FLDCW */
4303 /* The only thing we observe in the control word is the
4304 rounding mode. Therefore, pass the 16-bit value
4305 (x87 native-format control word) to a clean helper,
4306 getting back a 64-bit value, the lower half of which
4307 is the FPROUND value to store, and the upper half of
4308 which is the emulation-warning token which may be
4309 generated.
4310 */
4311 /* ULong x86h_check_fldcw ( UInt ); */
4312 IRTemp t64 = newTemp(Ity_I64);
4313 IRTemp ew = newTemp(Ity_I32);
4314 DIP("fldcw %s\n", dis_buf);
4315 assign( t64, mkIRExprCCall(
4316 Ity_I64, 0/*regparms*/,
4317 "x86g_check_fldcw",
4318 &x86g_check_fldcw,
4319 mkIRExprVec_1(
4320 unop( Iop_16Uto32,
4321 loadLE(Ity_I16, mkexpr(addr)))
4322 )
4323 )
4324 );
4325
4326 put_fpround( unop(Iop_64to32, mkexpr(t64)) );
4327 assign( ew, unop(Iop_64HIto32, mkexpr(t64) ) );
4328 put_emwarn( mkexpr(ew) );
4329 /* Finally, if an emulation warning was reported,
4330 side-exit to the next insn, reporting the warning,
4331 so that Valgrind's dispatcher sees the warning. */
4332 stmt(
4333 IRStmt_Exit(
4334 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)),
4335 Ijk_EmWarn,
4336 IRConst_U32( ((Addr32)guest_EIP_bbstart)+delta),
4337 OFFB_EIP
4338 )
4339 );
4340 break;
4341 }
4342
4343 case 6: { /* FNSTENV m28 */
4344 /* Uses dirty helper:
4345 void x86g_do_FSTENV ( VexGuestX86State*, HWord ) */
4346 IRDirty* d = unsafeIRDirty_0_N (
4347 0/*regparms*/,
4348 "x86g_dirtyhelper_FSTENV",
4349 &x86g_dirtyhelper_FSTENV,
4350 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr) )
4351 );
4352 /* declare we're writing memory */
4353 d->mFx = Ifx_Write;
4354 d->mAddr = mkexpr(addr);
4355 d->mSize = 28;
4356
4357 /* declare we're reading guest state */
4358 d->nFxState = 4;
4359 vex_bzero(&d->fxState, sizeof(d->fxState));
4360
4361 d->fxState[0].fx = Ifx_Read;
4362 d->fxState[0].offset = OFFB_FTOP;
4363 d->fxState[0].size = sizeof(UInt);
4364
4365 d->fxState[1].fx = Ifx_Read;
4366 d->fxState[1].offset = OFFB_FPTAGS;
4367 d->fxState[1].size = 8 * sizeof(UChar);
4368
4369 d->fxState[2].fx = Ifx_Read;
4370 d->fxState[2].offset = OFFB_FPROUND;
4371 d->fxState[2].size = sizeof(UInt);
4372
4373 d->fxState[3].fx = Ifx_Read;
4374 d->fxState[3].offset = OFFB_FC3210;
4375 d->fxState[3].size = sizeof(UInt);
4376
4377 stmt( IRStmt_Dirty(d) );
4378
4379 DIP("fnstenv %s\n", dis_buf);
4380 break;
4381 }
4382
4383 case 7: /* FNSTCW */
4384 /* Fake up a native x87 FPU control word. The only
4385 thing it depends on is FPROUND[1:0], so call a clean
4386 helper to cook it up. */
4387 /* UInt x86h_create_fpucw ( UInt fpround ) */
4388 DIP("fnstcw %s\n", dis_buf);
4389 storeLE(
4390 mkexpr(addr),
4391 unop( Iop_32to16,
4392 mkIRExprCCall(
4393 Ity_I32, 0/*regp*/,
4394 "x86g_create_fpucw", &x86g_create_fpucw,
4395 mkIRExprVec_1( get_fpround() )
4396 )
4397 )
4398 );
4399 break;
4400
4401 default:
4402 vex_printf("unhandled opc_aux = 0x%2x\n", (UInt)gregOfRM(modrm));
4403 vex_printf("first_opcode == 0xD9\n");
4404 goto decode_fail;
4405 }
4406
4407 } else {
4408 delta++;
4409 switch (modrm) {
4410
4411 case 0xc0:
4412 case 0xc1:
4413 case 0xc2:
4414 case 0xc3:
4415 case 0xc4:
4416 case 0xc5:
4417 case 0xc6:
4418 case 0xc7: /* FLD %st(?) */
4419 r_src = (UInt)modrm - 0xC0;
4420 DIP("fld %%st(%u)\n", r_src);
4421 t1 = newTemp(Ity_F64);
4422 assign(t1, get_ST(r_src));
4423 fp_push();
4424 put_ST(0, mkexpr(t1));
4425 break;
4426
4427 case 0xc8:
4428 case 0xc9:
4429 case 0xca:
4430 case 0xcb:
4431 case 0xcc:
4432 case 0xcd:
4433 case 0xce:
4434 case 0xcf: /* FXCH %st(?) */
4435 r_src = (UInt)modrm - 0xC8;
4436 DIP("fxch %%st(%u)\n", r_src);
4437 t1 = newTemp(Ity_F64);
4438 t2 = newTemp(Ity_F64);
4439 assign(t1, get_ST(0));
4440 assign(t2, get_ST(r_src));
4441 put_ST_UNCHECKED(0, mkexpr(t2));
4442 put_ST_UNCHECKED(r_src, mkexpr(t1));
4443 break;
4444
4445 case 0xE0: /* FCHS */
4446 DIP("fchs\n");
4447 put_ST_UNCHECKED(0, unop(Iop_NegF64, get_ST(0)));
4448 break;
4449
4450 case 0xE1: /* FABS */
4451 DIP("fabs\n");
4452 put_ST_UNCHECKED(0, unop(Iop_AbsF64, get_ST(0)));
4453 break;
4454
4455 case 0xE4: /* FTST */
4456 DIP("ftst\n");
4457 /* This forces C1 to zero, which isn't right. */
4458 /* Well, in fact the Intel docs say (bizarrely): "C1 is
4459 set to 0 if stack underflow occurred; otherwise, set
4460 to 0" which is pretty nonsensical. I guess it's a
4461 typo. */
4462 put_C3210(
4463 binop( Iop_And32,
4464 binop(Iop_Shl32,
4465 binop(Iop_CmpF64,
4466 get_ST(0),
4467 IRExpr_Const(IRConst_F64i(0x0ULL))),
4468 mkU8(8)),
4469 mkU32(0x4500)
4470 ));
4471 break;
4472
4473 case 0xE5: { /* FXAM */
4474 /* This is an interesting one. It examines %st(0),
4475 regardless of whether the tag says it's empty or not.
4476 Here, just pass both the tag (in our format) and the
4477 value (as a double, actually a ULong) to a helper
4478 function. */
4479 IRExpr** args
4480 = mkIRExprVec_2( unop(Iop_8Uto32, get_ST_TAG(0)),
4481 unop(Iop_ReinterpF64asI64,
4482 get_ST_UNCHECKED(0)) );
4483 put_C3210(mkIRExprCCall(
4484 Ity_I32,
4485 0/*regparm*/,
4486 "x86g_calculate_FXAM", &x86g_calculate_FXAM,
4487 args
4488 ));
4489 DIP("fxam\n");
4490 break;
4491 }
4492
4493 case 0xE8: /* FLD1 */
4494 DIP("fld1\n");
4495 fp_push();
4496 /* put_ST(0, IRExpr_Const(IRConst_F64(1.0))); */
4497 put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff0000000000000ULL)));
4498 break;
4499
4500 case 0xE9: /* FLDL2T */
4501 DIP("fldl2t\n");
4502 fp_push();
4503 /* put_ST(0, IRExpr_Const(IRConst_F64(3.32192809488736234781))); */
4504 put_ST(0, IRExpr_Const(IRConst_F64i(0x400a934f0979a371ULL)));
4505 break;
4506
4507 case 0xEA: /* FLDL2E */
4508 DIP("fldl2e\n");
4509 fp_push();
4510 /* put_ST(0, IRExpr_Const(IRConst_F64(1.44269504088896340739))); */
4511 put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff71547652b82feULL)));
4512 break;
4513
4514 case 0xEB: /* FLDPI */
4515 DIP("fldpi\n");
4516 fp_push();
4517 /* put_ST(0, IRExpr_Const(IRConst_F64(3.14159265358979323851))); */
4518 put_ST(0, IRExpr_Const(IRConst_F64i(0x400921fb54442d18ULL)));
4519 break;
4520
4521 case 0xEC: /* FLDLG2 */
4522 DIP("fldlg2\n");
4523 fp_push();
4524 /* put_ST(0, IRExpr_Const(IRConst_F64(0.301029995663981143))); */
4525 put_ST(0, IRExpr_Const(IRConst_F64i(0x3fd34413509f79ffULL)));
4526 break;
4527
4528 case 0xED: /* FLDLN2 */
4529 DIP("fldln2\n");
4530 fp_push();
4531 /* put_ST(0, IRExpr_Const(IRConst_F64(0.69314718055994530942))); */
4532 put_ST(0, IRExpr_Const(IRConst_F64i(0x3fe62e42fefa39efULL)));
4533 break;
4534
4535 case 0xEE: /* FLDZ */
4536 DIP("fldz\n");
4537 fp_push();
4538 /* put_ST(0, IRExpr_Const(IRConst_F64(0.0))); */
4539 put_ST(0, IRExpr_Const(IRConst_F64i(0x0000000000000000ULL)));
4540 break;
4541
4542 case 0xF0: /* F2XM1 */
4543 DIP("f2xm1\n");
4544 put_ST_UNCHECKED(0,
4545 binop(Iop_2xm1F64,
4546 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4547 get_ST(0)));
4548 break;
4549
4550 case 0xF1: /* FYL2X */
4551 DIP("fyl2x\n");
4552 put_ST_UNCHECKED(1,
4553 triop(Iop_Yl2xF64,
4554 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4555 get_ST(1),
4556 get_ST(0)));
4557 fp_pop();
4558 break;
4559
4560 case 0xF2: { /* FPTAN */
4561 DIP("fptan\n");
4562 IRTemp argD = newTemp(Ity_F64);
4563 assign(argD, get_ST(0));
4564 IRTemp argOK = math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD);
4565 IRTemp resD = newTemp(Ity_F64);
4566 assign(resD,
4567 IRExpr_ITE(
4568 mkexpr(argOK),
4569 binop(Iop_TanF64,
4570 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4571 mkexpr(argD)),
4572 mkexpr(argD))
4573 );
4574 put_ST_UNCHECKED(0, mkexpr(resD));
4575 /* Conditionally push 1.0 on the stack, if the arg is
4576 in range */
4577 maybe_fp_push(argOK);
4578 maybe_put_ST(argOK, 0,
4579 IRExpr_Const(IRConst_F64(1.0)));
4580 set_C2( binop(Iop_Xor32,
4581 unop(Iop_1Uto32, mkexpr(argOK)),
4582 mkU32(1)) );
4583 break;
4584 }
4585
4586 case 0xF3: /* FPATAN */
4587 DIP("fpatan\n");
4588 put_ST_UNCHECKED(1,
4589 triop(Iop_AtanF64,
4590 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4591 get_ST(1),
4592 get_ST(0)));
4593 fp_pop();
4594 break;
4595
4596 case 0xF4: { /* FXTRACT */
4597 IRTemp argF = newTemp(Ity_F64);
4598 IRTemp sigF = newTemp(Ity_F64);
4599 IRTemp expF = newTemp(Ity_F64);
4600 IRTemp argI = newTemp(Ity_I64);
4601 IRTemp sigI = newTemp(Ity_I64);
4602 IRTemp expI = newTemp(Ity_I64);
4603 DIP("fxtract\n");
4604 assign( argF, get_ST(0) );
4605 assign( argI, unop(Iop_ReinterpF64asI64, mkexpr(argF)));
4606 assign( sigI,
4607 mkIRExprCCall(
4608 Ity_I64, 0/*regparms*/,
4609 "x86amd64g_calculate_FXTRACT",
4610 &x86amd64g_calculate_FXTRACT,
4611 mkIRExprVec_2( mkexpr(argI),
4612 mkIRExpr_HWord(0)/*sig*/ ))
4613 );
4614 assign( expI,
4615 mkIRExprCCall(
4616 Ity_I64, 0/*regparms*/,
4617 "x86amd64g_calculate_FXTRACT",
4618 &x86amd64g_calculate_FXTRACT,
4619 mkIRExprVec_2( mkexpr(argI),
4620 mkIRExpr_HWord(1)/*exp*/ ))
4621 );
4622 assign( sigF, unop(Iop_ReinterpI64asF64, mkexpr(sigI)) );
4623 assign( expF, unop(Iop_ReinterpI64asF64, mkexpr(expI)) );
4624 /* exponent */
4625 put_ST_UNCHECKED(0, mkexpr(expF) );
4626 fp_push();
4627 /* significand */
4628 put_ST(0, mkexpr(sigF) );
4629 break;
4630 }
4631
4632 case 0xF5: { /* FPREM1 -- IEEE compliant */
4633 IRTemp a1 = newTemp(Ity_F64);
4634 IRTemp a2 = newTemp(Ity_F64);
4635 DIP("fprem1\n");
4636 /* Do FPREM1 twice, once to get the remainder, and once
4637 to get the C3210 flag values. */
4638 assign( a1, get_ST(0) );
4639 assign( a2, get_ST(1) );
4640 put_ST_UNCHECKED(0,
4641 triop(Iop_PRem1F64,
4642 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4643 mkexpr(a1),
4644 mkexpr(a2)));
4645 put_C3210(
4646 triop(Iop_PRem1C3210F64,
4647 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4648 mkexpr(a1),
4649 mkexpr(a2)) );
4650 break;
4651 }
4652
4653 case 0xF7: /* FINCSTP */
4654 DIP("fprem\n");
4655 put_ftop( binop(Iop_Add32, get_ftop(), mkU32(1)) );
4656 break;
4657
4658 case 0xF8: { /* FPREM -- not IEEE compliant */
4659 IRTemp a1 = newTemp(Ity_F64);
4660 IRTemp a2 = newTemp(Ity_F64);
4661 DIP("fprem\n");
4662 /* Do FPREM twice, once to get the remainder, and once
4663 to get the C3210 flag values. */
4664 assign( a1, get_ST(0) );
4665 assign( a2, get_ST(1) );
4666 put_ST_UNCHECKED(0,
4667 triop(Iop_PRemF64,
4668 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4669 mkexpr(a1),
4670 mkexpr(a2)));
4671 put_C3210(
4672 triop(Iop_PRemC3210F64,
4673 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4674 mkexpr(a1),
4675 mkexpr(a2)) );
4676 break;
4677 }
4678
4679 case 0xF9: /* FYL2XP1 */
4680 DIP("fyl2xp1\n");
4681 put_ST_UNCHECKED(1,
4682 triop(Iop_Yl2xp1F64,
4683 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4684 get_ST(1),
4685 get_ST(0)));
4686 fp_pop();
4687 break;
4688
4689 case 0xFA: /* FSQRT */
4690 DIP("fsqrt\n");
4691 put_ST_UNCHECKED(0,
4692 binop(Iop_SqrtF64,
4693 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4694 get_ST(0)));
4695 break;
4696
4697 case 0xFB: { /* FSINCOS */
4698 DIP("fsincos\n");
4699 IRTemp argD = newTemp(Ity_F64);
4700 assign(argD, get_ST(0));
4701 IRTemp argOK = math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD);
4702 IRTemp resD = newTemp(Ity_F64);
4703 assign(resD,
4704 IRExpr_ITE(
4705 mkexpr(argOK),
4706 binop(Iop_SinF64,
4707 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4708 mkexpr(argD)),
4709 mkexpr(argD))
4710 );
4711 put_ST_UNCHECKED(0, mkexpr(resD));
4712 /* Conditionally push the cos value on the stack, if
4713 the arg is in range */
4714 maybe_fp_push(argOK);
4715 maybe_put_ST(argOK, 0,
4716 binop(Iop_CosF64,
4717 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4718 mkexpr(argD)));
4719 set_C2( binop(Iop_Xor32,
4720 unop(Iop_1Uto32, mkexpr(argOK)),
4721 mkU32(1)) );
4722 break;
4723 }
4724
4725 case 0xFC: /* FRNDINT */
4726 DIP("frndint\n");
4727 put_ST_UNCHECKED(0,
4728 binop(Iop_RoundF64toInt, get_roundingmode(), get_ST(0)) );
4729 break;
4730
4731 case 0xFD: /* FSCALE */
4732 DIP("fscale\n");
4733 put_ST_UNCHECKED(0,
4734 triop(Iop_ScaleF64,
4735 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4736 get_ST(0),
4737 get_ST(1)));
4738 break;
4739
4740 case 0xFE: /* FSIN */
4741 case 0xFF: { /* FCOS */
4742 Bool isSIN = modrm == 0xFE;
4743 DIP("%s\n", isSIN ? "fsin" : "fcos");
4744 IRTemp argD = newTemp(Ity_F64);
4745 assign(argD, get_ST(0));
4746 IRTemp argOK = math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD);
4747 IRTemp resD = newTemp(Ity_F64);
4748 assign(resD,
4749 IRExpr_ITE(
4750 mkexpr(argOK),
4751 binop(isSIN ? Iop_SinF64 : Iop_CosF64,
4752 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4753 mkexpr(argD)),
4754 mkexpr(argD))
4755 );
4756 put_ST_UNCHECKED(0, mkexpr(resD));
4757 set_C2( binop(Iop_Xor32,
4758 unop(Iop_1Uto32, mkexpr(argOK)),
4759 mkU32(1)) );
4760 break;
4761 }
4762
4763 default:
4764 goto decode_fail;
4765 }
4766 }
4767 }
4768
4769 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDA opcodes +-+-+-+-+-+-+-+ */
4770 else
4771 if (first_opcode == 0xDA) {
4772
4773 if (modrm < 0xC0) {
4774
4775 /* bits 5,4,3 are an opcode extension, and the modRM also
4776 specifies an address. */
4777 IROp fop;
4778 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
4779 delta += len;
4780 switch (gregOfRM(modrm)) {
4781
4782 case 0: /* FIADD m32int */ /* ST(0) += m32int */
4783 DIP("fiaddl %s\n", dis_buf);
4784 fop = Iop_AddF64;
4785 goto do_fop_m32;
4786
4787 case 1: /* FIMUL m32int */ /* ST(0) *= m32int */
4788 DIP("fimull %s\n", dis_buf);
4789 fop = Iop_MulF64;
4790 goto do_fop_m32;
4791
4792 case 2: /* FICOM m32int */
4793 DIP("ficoml %s\n", dis_buf);
4794 /* This forces C1 to zero, which isn't right. */
4795 put_C3210(
4796 binop( Iop_And32,
4797 binop(Iop_Shl32,
4798 binop(Iop_CmpF64,
4799 get_ST(0),
4800 unop(Iop_I32StoF64,
4801 loadLE(Ity_I32,mkexpr(addr)))),
4802 mkU8(8)),
4803 mkU32(0x4500)
4804 ));
4805 break;
4806
4807 case 3: /* FICOMP m32int */
4808 DIP("ficompl %s\n", dis_buf);
4809 /* This forces C1 to zero, which isn't right. */
4810 put_C3210(
4811 binop( Iop_And32,
4812 binop(Iop_Shl32,
4813 binop(Iop_CmpF64,
4814 get_ST(0),
4815 unop(Iop_I32StoF64,
4816 loadLE(Ity_I32,mkexpr(addr)))),
4817 mkU8(8)),
4818 mkU32(0x4500)
4819 ));
4820 fp_pop();
4821 break;
4822
4823 case 4: /* FISUB m32int */ /* ST(0) -= m32int */
4824 DIP("fisubl %s\n", dis_buf);
4825 fop = Iop_SubF64;
4826 goto do_fop_m32;
4827
4828 case 5: /* FISUBR m32int */ /* ST(0) = m32int - ST(0) */
4829 DIP("fisubrl %s\n", dis_buf);
4830 fop = Iop_SubF64;
4831 goto do_foprev_m32;
4832
4833 case 6: /* FIDIV m32int */ /* ST(0) /= m32int */
4834 DIP("fidivl %s\n", dis_buf);
4835 fop = Iop_DivF64;
4836 goto do_fop_m32;
4837
4838 case 7: /* FIDIVR m32int */ /* ST(0) = m32int / ST(0) */
4839 DIP("fidivrl %s\n", dis_buf);
4840 fop = Iop_DivF64;
4841 goto do_foprev_m32;
4842
4843 do_fop_m32:
4844 put_ST_UNCHECKED(0,
4845 triop(fop,
4846 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4847 get_ST(0),
4848 unop(Iop_I32StoF64,
4849 loadLE(Ity_I32, mkexpr(addr)))));
4850 break;
4851
4852 do_foprev_m32:
4853 put_ST_UNCHECKED(0,
4854 triop(fop,
4855 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4856 unop(Iop_I32StoF64,
4857 loadLE(Ity_I32, mkexpr(addr))),
4858 get_ST(0)));
4859 break;
4860
4861 default:
4862 vex_printf("unhandled opc_aux = 0x%2x\n", (UInt)gregOfRM(modrm));
4863 vex_printf("first_opcode == 0xDA\n");
4864 goto decode_fail;
4865 }
4866
4867 } else {
4868
4869 delta++;
4870 switch (modrm) {
4871
4872 case 0xc0:
4873 case 0xc1:
4874 case 0xc2:
4875 case 0xc3:
4876 case 0xc4:
4877 case 0xc5:
4878 case 0xc6:
4879 case 0xc7: /* FCMOVB ST(i), ST(0) */
4880 r_src = (UInt)modrm - 0xC0;
4881 DIP("fcmovb %%st(%u), %%st(0)\n", r_src);
4882 put_ST_UNCHECKED(0,
4883 IRExpr_ITE(
4884 mk_x86g_calculate_condition(X86CondB),
4885 get_ST(r_src), get_ST(0)) );
4886 break;
4887
4888 case 0xc8:
4889 case 0xc9:
4890 case 0xca:
4891 case 0xcb:
4892 case 0xcc:
4893 case 0xcd:
4894 case 0xce:
4895 case 0xcf: /* FCMOVE(Z) ST(i), ST(0) */
4896 r_src = (UInt)modrm - 0xC8;
4897 DIP("fcmovz %%st(%u), %%st(0)\n", r_src);
4898 put_ST_UNCHECKED(0,
4899 IRExpr_ITE(
4900 mk_x86g_calculate_condition(X86CondZ),
4901 get_ST(r_src), get_ST(0)) );
4902 break;
4903
4904 case 0xd0:
4905 case 0xd1:
4906 case 0xd2:
4907 case 0xd3:
4908 case 0xd4:
4909 case 0xd5:
4910 case 0xd6:
4911 case 0xd7: /* FCMOVBE ST(i), ST(0) */
4912 r_src = (UInt)modrm - 0xD0;
4913 DIP("fcmovbe %%st(%u), %%st(0)\n", r_src);
4914 put_ST_UNCHECKED(0,
4915 IRExpr_ITE(
4916 mk_x86g_calculate_condition(X86CondBE),
4917 get_ST(r_src), get_ST(0)) );
4918 break;
4919
4920 case 0xd8:
4921 case 0xd9:
4922 case 0xda:
4923 case 0xdb:
4924 case 0xdc:
4925 case 0xdd:
4926 case 0xde:
4927 case 0xdf: /* FCMOVU ST(i), ST(0) */
4928 r_src = (UInt)modrm - 0xD8;
4929 DIP("fcmovu %%st(%u), %%st(0)\n", r_src);
4930 put_ST_UNCHECKED(0,
4931 IRExpr_ITE(
4932 mk_x86g_calculate_condition(X86CondP),
4933 get_ST(r_src), get_ST(0)) );
4934 break;
4935
4936 case 0xE9: /* FUCOMPP %st(0),%st(1) */
4937 DIP("fucompp %%st(0),%%st(1)\n");
4938 /* This forces C1 to zero, which isn't right. */
4939 put_C3210(
4940 binop( Iop_And32,
4941 binop(Iop_Shl32,
4942 binop(Iop_CmpF64, get_ST(0), get_ST(1)),
4943 mkU8(8)),
4944 mkU32(0x4500)
4945 ));
4946 fp_pop();
4947 fp_pop();
4948 break;
4949
4950 default:
4951 goto decode_fail;
4952 }
4953
4954 }
4955 }
4956
4957 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDB opcodes +-+-+-+-+-+-+-+ */
4958 else
4959 if (first_opcode == 0xDB) {
4960 if (modrm < 0xC0) {
4961
4962 /* bits 5,4,3 are an opcode extension, and the modRM also
4963 specifies an address. */
4964 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
4965 delta += len;
4966
4967 switch (gregOfRM(modrm)) {
4968
4969 case 0: /* FILD m32int */
4970 DIP("fildl %s\n", dis_buf);
4971 fp_push();
4972 put_ST(0, unop(Iop_I32StoF64,
4973 loadLE(Ity_I32, mkexpr(addr))));
4974 break;
4975
4976 case 1: /* FISTTPL m32 (SSE3) */
4977 DIP("fisttpl %s\n", dis_buf);
4978 storeLE( mkexpr(addr),
4979 binop(Iop_F64toI32S, mkU32(Irrm_ZERO), get_ST(0)) );
4980 fp_pop();
4981 break;
4982
4983 case 2: /* FIST m32 */
4984 DIP("fistl %s\n", dis_buf);
4985 storeLE( mkexpr(addr),
4986 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) );
4987 break;
4988
4989 case 3: /* FISTP m32 */
4990 DIP("fistpl %s\n", dis_buf);
4991 storeLE( mkexpr(addr),
4992 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) );
4993 fp_pop();
4994 break;
4995
4996 case 5: { /* FLD extended-real */
4997 /* Uses dirty helper:
4998 ULong x86g_loadF80le ( UInt )
4999 addr holds the address. First, do a dirty call to
5000 get hold of the data. */
5001 IRTemp val = newTemp(Ity_I64);
5002 IRExpr** args = mkIRExprVec_1 ( mkexpr(addr) );
5003
5004 IRDirty* d = unsafeIRDirty_1_N (
5005 val,
5006 0/*regparms*/,
5007 "x86g_dirtyhelper_loadF80le",
5008 &x86g_dirtyhelper_loadF80le,
5009 args
5010 );
5011 /* declare that we're reading memory */
5012 d->mFx = Ifx_Read;
5013 d->mAddr = mkexpr(addr);
5014 d->mSize = 10;
5015
5016 /* execute the dirty call, dumping the result in val. */
5017 stmt( IRStmt_Dirty(d) );
5018 fp_push();
5019 put_ST(0, unop(Iop_ReinterpI64asF64, mkexpr(val)));
5020
5021 DIP("fldt %s\n", dis_buf);
5022 break;
5023 }
5024
5025 case 7: { /* FSTP extended-real */
5026 /* Uses dirty helper: void x86g_storeF80le ( UInt, ULong ) */
5027 IRExpr** args
5028 = mkIRExprVec_2( mkexpr(addr),
5029 unop(Iop_ReinterpF64asI64, get_ST(0)) );
5030
5031 IRDirty* d = unsafeIRDirty_0_N (
5032 0/*regparms*/,
5033 "x86g_dirtyhelper_storeF80le",
5034 &x86g_dirtyhelper_storeF80le,
5035 args
5036 );
5037 /* declare we're writing memory */
5038 d->mFx = Ifx_Write;
5039 d->mAddr = mkexpr(addr);
5040 d->mSize = 10;
5041
5042 /* execute the dirty call. */
5043 stmt( IRStmt_Dirty(d) );
5044 fp_pop();
5045
5046 DIP("fstpt\n %s", dis_buf);
5047 break;
5048 }
5049
5050 default:
5051 vex_printf("unhandled opc_aux = 0x%2x\n", (UInt)gregOfRM(modrm));
5052 vex_printf("first_opcode == 0xDB\n");
5053 goto decode_fail;
5054 }
5055
5056 } else {
5057
5058 delta++;
5059 switch (modrm) {
5060
5061 case 0xc0:
5062 case 0xc1:
5063 case 0xc2:
5064 case 0xc3:
5065 case 0xc4:
5066 case 0xc5:
5067 case 0xc6:
5068 case 0xc7: /* FCMOVNB ST(i), ST(0) */
5069 r_src = (UInt)modrm - 0xC0;
5070 DIP("fcmovnb %%st(%u), %%st(0)\n", r_src);
5071 put_ST_UNCHECKED(0,
5072 IRExpr_ITE(
5073 mk_x86g_calculate_condition(X86CondNB),
5074 get_ST(r_src), get_ST(0)) );
5075 break;
5076
5077 case 0xc8:
5078 case 0xc9:
5079 case 0xca:
5080 case 0xcb:
5081 case 0xcc:
5082 case 0xcd:
5083 case 0xce:
5084 case 0xcf: /* FCMOVNE(NZ) ST(i), ST(0) */
5085 r_src = (UInt)modrm - 0xC8;
5086 DIP("fcmovnz %%st(%u), %%st(0)\n", r_src);
5087 put_ST_UNCHECKED(0,
5088 IRExpr_ITE(
5089 mk_x86g_calculate_condition(X86CondNZ),
5090 get_ST(r_src), get_ST(0)) );
5091 break;
5092
5093 case 0xd0:
5094 case 0xd1:
5095 case 0xd2:
5096 case 0xd3:
5097 case 0xd4:
5098 case 0xd5:
5099 case 0xd6:
5100 case 0xd7: /* FCMOVNBE ST(i), ST(0) */
5101 r_src = (UInt)modrm - 0xD0;
5102 DIP("fcmovnbe %%st(%u), %%st(0)\n", r_src);
5103 put_ST_UNCHECKED(0,
5104 IRExpr_ITE(
5105 mk_x86g_calculate_condition(X86CondNBE),
5106 get_ST(r_src), get_ST(0)) );
5107 break;
5108
5109 case 0xd8:
5110 case 0xd9:
5111 case 0xda:
5112 case 0xdb:
5113 case 0xdc:
5114 case 0xdd:
5115 case 0xde:
5116 case 0xdf: /* FCMOVNU ST(i), ST(0) */
5117 r_src = (UInt)modrm - 0xD8;
5118 DIP("fcmovnu %%st(%u), %%st(0)\n", r_src);
5119 put_ST_UNCHECKED(0,
5120 IRExpr_ITE(
5121 mk_x86g_calculate_condition(X86CondNP),
5122 get_ST(r_src), get_ST(0)) );
5123 break;
5124
5125 case 0xE2:
5126 DIP("fnclex\n");
5127 break;
5128
5129 case 0xE3: {
5130 /* Uses dirty helper:
5131 void x86g_do_FINIT ( VexGuestX86State* ) */
5132 IRDirty* d = unsafeIRDirty_0_N (
5133 0/*regparms*/,
5134 "x86g_dirtyhelper_FINIT",
5135 &x86g_dirtyhelper_FINIT,
5136 mkIRExprVec_1(IRExpr_GSPTR())
5137 );
5138
5139 /* declare we're writing guest state */
5140 d->nFxState = 5;
5141 vex_bzero(&d->fxState, sizeof(d->fxState));
5142
5143 d->fxState[0].fx = Ifx_Write;
5144 d->fxState[0].offset = OFFB_FTOP;
5145 d->fxState[0].size = sizeof(UInt);
5146
5147 d->fxState[1].fx = Ifx_Write;
5148 d->fxState[1].offset = OFFB_FPREGS;
5149 d->fxState[1].size = 8 * sizeof(ULong);
5150
5151 d->fxState[2].fx = Ifx_Write;
5152 d->fxState[2].offset = OFFB_FPTAGS;
5153 d->fxState[2].size = 8 * sizeof(UChar);
5154
5155 d->fxState[3].fx = Ifx_Write;
5156 d->fxState[3].offset = OFFB_FPROUND;
5157 d->fxState[3].size = sizeof(UInt);
5158
5159 d->fxState[4].fx = Ifx_Write;
5160 d->fxState[4].offset = OFFB_FC3210;
5161 d->fxState[4].size = sizeof(UInt);
5162
5163 stmt( IRStmt_Dirty(d) );
5164
5165 DIP("fninit\n");
5166 break;
5167 }
5168
5169 case 0xe8:
5170 case 0xe9:
5171 case 0xea:
5172 case 0xeb:
5173 case 0xec:
5174 case 0xed:
5175 case 0xee:
5176 case 0xef: /* FUCOMI %st(0),%st(?) */
5177 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xE8, False );
5178 break;
5179
5180 case 0xf0:
5181 case 0xf1:
5182 case 0xf2:
5183 case 0xf3:
5184 case 0xf4:
5185 case 0xf5:
5186 case 0xf6:
5187 case 0xf7: /* FCOMI %st(0),%st(?) */
5188 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xF0, False );
5189 break;
5190
5191 default:
5192 goto decode_fail;
5193 }
5194 }
5195 }
5196
5197 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDC opcodes +-+-+-+-+-+-+-+ */
5198 else
5199 if (first_opcode == 0xDC) {
5200 if (modrm < 0xC0) {
5201
5202 /* bits 5,4,3 are an opcode extension, and the modRM also
5203 specifies an address. */
5204 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
5205 delta += len;
5206
5207 switch (gregOfRM(modrm)) {
5208
5209 case 0: /* FADD double-real */
5210 fp_do_op_mem_ST_0 ( addr, "add", dis_buf, Iop_AddF64, True );
5211 break;
5212
5213 case 1: /* FMUL double-real */
5214 fp_do_op_mem_ST_0 ( addr, "mul", dis_buf, Iop_MulF64, True );
5215 break;
5216
5217 case 2: /* FCOM double-real */
5218 DIP("fcoml %s\n", dis_buf);
5219 /* This forces C1 to zero, which isn't right. */
5220 put_C3210(
5221 binop( Iop_And32,
5222 binop(Iop_Shl32,
5223 binop(Iop_CmpF64,
5224 get_ST(0),
5225 loadLE(Ity_F64,mkexpr(addr))),
5226 mkU8(8)),
5227 mkU32(0x4500)
5228 ));
5229 break;
5230
5231 case 3: /* FCOMP double-real */
5232 DIP("fcompl %s\n", dis_buf);
5233 /* This forces C1 to zero, which isn't right. */
5234 put_C3210(
5235 binop( Iop_And32,
5236 binop(Iop_Shl32,
5237 binop(Iop_CmpF64,
5238 get_ST(0),
5239 loadLE(Ity_F64,mkexpr(addr))),
5240 mkU8(8)),
5241 mkU32(0x4500)
5242 ));
5243 fp_pop();
5244 break;
5245
5246 case 4: /* FSUB double-real */
5247 fp_do_op_mem_ST_0 ( addr, "sub", dis_buf, Iop_SubF64, True );
5248 break;
5249
5250 case 5: /* FSUBR double-real */
5251 fp_do_oprev_mem_ST_0 ( addr, "subr", dis_buf, Iop_SubF64, True );
5252 break;
5253
5254 case 6: /* FDIV double-real */
5255 fp_do_op_mem_ST_0 ( addr, "div", dis_buf, Iop_DivF64, True );
5256 break;
5257
5258 case 7: /* FDIVR double-real */
5259 fp_do_oprev_mem_ST_0 ( addr, "divr", dis_buf, Iop_DivF64, True );
5260 break;
5261
5262 default:
5263 vex_printf("unhandled opc_aux = 0x%2x\n", (UInt)gregOfRM(modrm));
5264 vex_printf("first_opcode == 0xDC\n");
5265 goto decode_fail;
5266 }
5267
5268 } else {
5269
5270 delta++;
5271 switch (modrm) {
5272
5273 case 0xc0:
5274 case 0xc1:
5275 case 0xc2:
5276 case 0xc3:
5277 case 0xc4:
5278 case 0xc5:
5279 case 0xc6:
5280 case 0xc7: /* FADD %st(0),%st(?) */
5281 fp_do_op_ST_ST ( "add", Iop_AddF64, 0, modrm - 0xC0, False );
5282 break;
5283
5284 case 0xc8:
5285 case 0xc9:
5286 case 0xca:
5287 case 0xcb:
5288 case 0xcc:
5289 case 0xcd:
5290 case 0xce:
5291 case 0xcf: /* FMUL %st(0),%st(?) */
5292 fp_do_op_ST_ST ( "mul", Iop_MulF64, 0, modrm - 0xC8, False );
5293 break;
5294
5295 case 0xe0:
5296 case 0xe1:
5297 case 0xe2:
5298 case 0xe3:
5299 case 0xe4:
5300 case 0xe5:
5301 case 0xe6:
5302 case 0xe7: /* FSUBR %st(0),%st(?) */
5303 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, 0, modrm - 0xE0, False );
5304 break;
5305
5306 case 0xe8:
5307 case 0xe9:
5308 case 0xea:
5309 case 0xeb:
5310 case 0xec:
5311 case 0xed:
5312 case 0xee:
5313 case 0xef: /* FSUB %st(0),%st(?) */
5314 fp_do_op_ST_ST ( "sub", Iop_SubF64, 0, modrm - 0xE8, False );
5315 break;
5316
5317 case 0xf0:
5318 case 0xf1:
5319 case 0xf2:
5320 case 0xf3:
5321 case 0xf4:
5322 case 0xf5:
5323 case 0xf6:
5324 case 0xf7: /* FDIVR %st(0),%st(?) */
5325 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, 0, modrm - 0xF0, False );
5326 break;
5327
5328 case 0xf8:
5329 case 0xf9:
5330 case 0xfa:
5331 case 0xfb:
5332 case 0xfc:
5333 case 0xfd:
5334 case 0xfe:
5335 case 0xff: /* FDIV %st(0),%st(?) */
5336 fp_do_op_ST_ST ( "div", Iop_DivF64, 0, modrm - 0xF8, False );
5337 break;
5338
5339 default:
5340 goto decode_fail;
5341 }
5342
5343 }
5344 }
5345
5346 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDD opcodes +-+-+-+-+-+-+-+ */
5347 else
5348 if (first_opcode == 0xDD) {
5349
5350 if (modrm < 0xC0) {
5351
5352 /* bits 5,4,3 are an opcode extension, and the modRM also
5353 specifies an address. */
5354 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
5355 delta += len;
5356
5357 switch (gregOfRM(modrm)) {
5358
5359 case 0: /* FLD double-real */
5360 DIP("fldl %s\n", dis_buf);
5361 fp_push();
5362 put_ST(0, loadLE(Ity_F64, mkexpr(addr)));
5363 break;
5364
5365 case 1: /* FISTTPQ m64 (SSE3) */
5366 DIP("fistppll %s\n", dis_buf);
5367 storeLE( mkexpr(addr),
5368 binop(Iop_F64toI64S, mkU32(Irrm_ZERO), get_ST(0)) );
5369 fp_pop();
5370 break;
5371
5372 case 2: /* FST double-real */
5373 DIP("fstl %s\n", dis_buf);
5374 storeLE(mkexpr(addr), get_ST(0));
5375 break;
5376
5377 case 3: /* FSTP double-real */
5378 DIP("fstpl %s\n", dis_buf);
5379 storeLE(mkexpr(addr), get_ST(0));
5380 fp_pop();
5381 break;
5382
5383 case 4: { /* FRSTOR m108 */
5384 /* Uses dirty helper:
5385 VexEmNote x86g_do_FRSTOR ( VexGuestX86State*, Addr32 ) */
5386 IRTemp ew = newTemp(Ity_I32);
5387 IRDirty* d = unsafeIRDirty_0_N (
5388 0/*regparms*/,
5389 "x86g_dirtyhelper_FRSTOR",
5390 &x86g_dirtyhelper_FRSTOR,
5391 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr) )
5392 );
5393 d->tmp = ew;
5394 /* declare we're reading memory */
5395 d->mFx = Ifx_Read;
5396 d->mAddr = mkexpr(addr);
5397 d->mSize = 108;
5398
5399 /* declare we're writing guest state */
5400 d->nFxState = 5;
5401 vex_bzero(&d->fxState, sizeof(d->fxState));
5402
5403 d->fxState[0].fx = Ifx_Write;
5404 d->fxState[0].offset = OFFB_FTOP;
5405 d->fxState[0].size = sizeof(UInt);
5406
5407 d->fxState[1].fx = Ifx_Write;
5408 d->fxState[1].offset = OFFB_FPREGS;
5409 d->fxState[1].size = 8 * sizeof(ULong);
5410
5411 d->fxState[2].fx = Ifx_Write;
5412 d->fxState[2].offset = OFFB_FPTAGS;
5413 d->fxState[2].size = 8 * sizeof(UChar);
5414
5415 d->fxState[3].fx = Ifx_Write;
5416 d->fxState[3].offset = OFFB_FPROUND;
5417 d->fxState[3].size = sizeof(UInt);
5418
5419 d->fxState[4].fx = Ifx_Write;
5420 d->fxState[4].offset = OFFB_FC3210;
5421 d->fxState[4].size = sizeof(UInt);
5422
5423 stmt( IRStmt_Dirty(d) );
5424
5425 /* ew contains any emulation warning we may need to
5426 issue. If needed, side-exit to the next insn,
5427 reporting the warning, so that Valgrind's dispatcher
5428 sees the warning. */
5429 put_emwarn( mkexpr(ew) );
5430 stmt(
5431 IRStmt_Exit(
5432 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)),
5433 Ijk_EmWarn,
5434 IRConst_U32( ((Addr32)guest_EIP_bbstart)+delta),
5435 OFFB_EIP
5436 )
5437 );
5438
5439 DIP("frstor %s\n", dis_buf);
5440 break;
5441 }
5442
5443 case 6: { /* FNSAVE m108 */
5444 /* Uses dirty helper:
5445 void x86g_do_FSAVE ( VexGuestX86State*, UInt ) */
5446 IRDirty* d = unsafeIRDirty_0_N (
5447 0/*regparms*/,
5448 "x86g_dirtyhelper_FSAVE",
5449 &x86g_dirtyhelper_FSAVE,
5450 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr) )
5451 );
5452 /* declare we're writing memory */
5453 d->mFx = Ifx_Write;
5454 d->mAddr = mkexpr(addr);
5455 d->mSize = 108;
5456
5457 /* declare we're reading guest state */
5458 d->nFxState = 5;
5459 vex_bzero(&d->fxState, sizeof(d->fxState));
5460
5461 d->fxState[0].fx = Ifx_Read;
5462 d->fxState[0].offset = OFFB_FTOP;
5463 d->fxState[0].size = sizeof(UInt);
5464
5465 d->fxState[1].fx = Ifx_Read;
5466 d->fxState[1].offset = OFFB_FPREGS;
5467 d->fxState[1].size = 8 * sizeof(ULong);
5468
5469 d->fxState[2].fx = Ifx_Read;
5470 d->fxState[2].offset = OFFB_FPTAGS;
5471 d->fxState[2].size = 8 * sizeof(UChar);
5472
5473 d->fxState[3].fx = Ifx_Read;
5474 d->fxState[3].offset = OFFB_FPROUND;
5475 d->fxState[3].size = sizeof(UInt);
5476
5477 d->fxState[4].fx = Ifx_Read;
5478 d->fxState[4].offset = OFFB_FC3210;
5479 d->fxState[4].size = sizeof(UInt);
5480
5481 stmt( IRStmt_Dirty(d) );
5482
5483 DIP("fnsave %s\n", dis_buf);
5484 break;
5485 }
5486
5487 case 7: { /* FNSTSW m16 */
5488 IRExpr* sw = get_FPU_sw();
5489 vassert(typeOfIRExpr(irsb->tyenv, sw) == Ity_I16);
5490 storeLE( mkexpr(addr), sw );
5491 DIP("fnstsw %s\n", dis_buf);
5492 break;
5493 }
5494
5495 default:
5496 vex_printf("unhandled opc_aux = 0x%2x\n", (UInt)gregOfRM(modrm));
5497 vex_printf("first_opcode == 0xDD\n");
5498 goto decode_fail;
5499 }
5500 } else {
5501 delta++;
5502 switch (modrm) {
5503
5504 case 0xc0:
5505 case 0xc1:
5506 case 0xc2:
5507 case 0xc3:
5508 case 0xc4:
5509 case 0xc5:
5510 case 0xc6:
5511 case 0xc7: /* FFREE %st(?) */
5512 r_dst = (UInt)modrm - 0xC0;
5513 DIP("ffree %%st(%u)\n", r_dst);
5514 put_ST_TAG ( r_dst, mkU8(0) );
5515 break;
5516
5517 case 0xd0:
5518 case 0xd1:
5519 case 0xd2:
5520 case 0xd3:
5521 case 0xd4:
5522 case 0xd5:
5523 case 0xd6:
5524 case 0xd7: /* FST %st(0),%st(?) */
5525 r_dst = (UInt)modrm - 0xD0;
5526 DIP("fst %%st(0),%%st(%u)\n", r_dst);
5527 /* P4 manual says: "If the destination operand is a
5528 non-empty register, the invalid-operation exception
5529 is not generated. Hence put_ST_UNCHECKED. */
5530 put_ST_UNCHECKED(r_dst, get_ST(0));
5531 break;
5532
5533 case 0xd8:
5534 case 0xd9:
5535 case 0xda:
5536 case 0xdb:
5537 case 0xdc:
5538 case 0xdd:
5539 case 0xde:
5540 case 0xdf: /* FSTP %st(0),%st(?) */
5541 r_dst = (UInt)modrm - 0xD8;
5542 DIP("fstp %%st(0),%%st(%u)\n", r_dst);
5543 /* P4 manual says: "If the destination operand is a
5544 non-empty register, the invalid-operation exception
5545 is not generated. Hence put_ST_UNCHECKED. */
5546 put_ST_UNCHECKED(r_dst, get_ST(0));
5547 fp_pop();
5548 break;
5549
5550 case 0xe0:
5551 case 0xe1:
5552 case 0xe2:
5553 case 0xe3:
5554 case 0xe4:
5555 case 0xe5:
5556 case 0xe6:
5557 case 0xe7: /* FUCOM %st(0),%st(?) */
5558 r_dst = (UInt)modrm - 0xE0;
5559 DIP("fucom %%st(0),%%st(%u)\n", r_dst);
5560 /* This forces C1 to zero, which isn't right. */
5561 put_C3210(
5562 binop( Iop_And32,
5563 binop(Iop_Shl32,
5564 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)),
5565 mkU8(8)),
5566 mkU32(0x4500)
5567 ));
5568 break;
5569
5570 case 0xe8:
5571 case 0xe9:
5572 case 0xea:
5573 case 0xeb:
5574 case 0xec:
5575 case 0xed:
5576 case 0xee:
5577 case 0xef: /* FUCOMP %st(0),%st(?) */
5578 r_dst = (UInt)modrm - 0xE8;
5579 DIP("fucomp %%st(0),%%st(%u)\n", r_dst);
5580 /* This forces C1 to zero, which isn't right. */
5581 put_C3210(
5582 binop( Iop_And32,
5583 binop(Iop_Shl32,
5584 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)),
5585 mkU8(8)),
5586 mkU32(0x4500)
5587 ));
5588 fp_pop();
5589 break;
5590
5591 default:
5592 goto decode_fail;
5593 }
5594 }
5595 }
5596
5597 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDE opcodes +-+-+-+-+-+-+-+ */
5598 else
5599 if (first_opcode == 0xDE) {
5600
5601 if (modrm < 0xC0) {
5602
5603 /* bits 5,4,3 are an opcode extension, and the modRM also
5604 specifies an address. */
5605 IROp fop;
5606 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
5607 delta += len;
5608
5609 switch (gregOfRM(modrm)) {
5610
5611 case 0: /* FIADD m16int */ /* ST(0) += m16int */
5612 DIP("fiaddw %s\n", dis_buf);
5613 fop = Iop_AddF64;
5614 goto do_fop_m16;
5615
5616 case 1: /* FIMUL m16int */ /* ST(0) *= m16int */
5617 DIP("fimulw %s\n", dis_buf);
5618 fop = Iop_MulF64;
5619 goto do_fop_m16;
5620
5621 case 2: /* FICOM m16int */
5622 DIP("ficomw %s\n", dis_buf);
5623 /* This forces C1 to zero, which isn't right. */
5624 put_C3210(
5625 binop( Iop_And32,
5626 binop(Iop_Shl32,
5627 binop(Iop_CmpF64,
5628 get_ST(0),
5629 unop(Iop_I32StoF64,
5630 unop(Iop_16Sto32,
5631 loadLE(Ity_I16,mkexpr(addr))))),
5632 mkU8(8)),
5633 mkU32(0x4500)
5634 ));
5635 break;
5636
5637 case 3: /* FICOMP m16int */
5638 DIP("ficompw %s\n", dis_buf);
5639 /* This forces C1 to zero, which isn't right. */
5640 put_C3210(
5641 binop( Iop_And32,
5642 binop(Iop_Shl32,
5643 binop(Iop_CmpF64,
5644 get_ST(0),
5645 unop(Iop_I32StoF64,
5646 unop(Iop_16Sto32,
5647 loadLE(Ity_I16,mkexpr(addr))))),
5648 mkU8(8)),
5649 mkU32(0x4500)
5650 ));
5651 fp_pop();
5652 break;
5653
5654 case 4: /* FISUB m16int */ /* ST(0) -= m16int */
5655 DIP("fisubw %s\n", dis_buf);
5656 fop = Iop_SubF64;
5657 goto do_fop_m16;
5658
5659 case 5: /* FISUBR m16int */ /* ST(0) = m16int - ST(0) */
5660 DIP("fisubrw %s\n", dis_buf);
5661 fop = Iop_SubF64;
5662 goto do_foprev_m16;
5663
5664 case 6: /* FIDIV m16int */ /* ST(0) /= m16int */
5665 DIP("fisubw %s\n", dis_buf);
5666 fop = Iop_DivF64;
5667 goto do_fop_m16;
5668
5669 case 7: /* FIDIVR m16int */ /* ST(0) = m16int / ST(0) */
5670 DIP("fidivrw %s\n", dis_buf);
5671 fop = Iop_DivF64;
5672 goto do_foprev_m16;
5673
5674 do_fop_m16:
5675 put_ST_UNCHECKED(0,
5676 triop(fop,
5677 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5678 get_ST(0),
5679 unop(Iop_I32StoF64,
5680 unop(Iop_16Sto32,
5681 loadLE(Ity_I16, mkexpr(addr))))));
5682 break;
5683
5684 do_foprev_m16:
5685 put_ST_UNCHECKED(0,
5686 triop(fop,
5687 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5688 unop(Iop_I32StoF64,
5689 unop(Iop_16Sto32,
5690 loadLE(Ity_I16, mkexpr(addr)))),
5691 get_ST(0)));
5692 break;
5693
5694 default:
5695 vex_printf("unhandled opc_aux = 0x%2x\n", (UInt)gregOfRM(modrm));
5696 vex_printf("first_opcode == 0xDE\n");
5697 goto decode_fail;
5698 }
5699
5700 } else {
5701
5702 delta++;
5703 switch (modrm) {
5704
5705 case 0xc0:
5706 case 0xc1:
5707 case 0xc2:
5708 case 0xc3:
5709 case 0xc4:
5710 case 0xc5:
5711 case 0xc6:
5712 case 0xc7: /* FADDP %st(0),%st(?) */
5713 fp_do_op_ST_ST ( "add", Iop_AddF64, 0, modrm - 0xC0, True );
5714 break;
5715
5716 case 0xc8:
5717 case 0xc9:
5718 case 0xca:
5719 case 0xcb:
5720 case 0xcc:
5721 case 0xcd:
5722 case 0xce:
5723 case 0xcf: /* FMULP %st(0),%st(?) */
5724 fp_do_op_ST_ST ( "mul", Iop_MulF64, 0, modrm - 0xC8, True );
5725 break;
5726
5727 case 0xD9: /* FCOMPP %st(0),%st(1) */
5728 DIP("fuompp %%st(0),%%st(1)\n");
5729 /* This forces C1 to zero, which isn't right. */
5730 put_C3210(
5731 binop( Iop_And32,
5732 binop(Iop_Shl32,
5733 binop(Iop_CmpF64, get_ST(0), get_ST(1)),
5734 mkU8(8)),
5735 mkU32(0x4500)
5736 ));
5737 fp_pop();
5738 fp_pop();
5739 break;
5740
5741 case 0xe0:
5742 case 0xe1:
5743 case 0xe2:
5744 case 0xe3:
5745 case 0xe4:
5746 case 0xe5:
5747 case 0xe6:
5748 case 0xe7: /* FSUBRP %st(0),%st(?) */
5749 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, 0, modrm - 0xE0, True );
5750 break;
5751
5752 case 0xe8:
5753 case 0xe9:
5754 case 0xea:
5755 case 0xeb:
5756 case 0xec:
5757 case 0xed:
5758 case 0xee:
5759 case 0xef: /* FSUBP %st(0),%st(?) */
5760 fp_do_op_ST_ST ( "sub", Iop_SubF64, 0, modrm - 0xE8, True );
5761 break;
5762
5763 case 0xf0:
5764 case 0xf1:
5765 case 0xf2:
5766 case 0xf3:
5767 case 0xf4:
5768 case 0xf5:
5769 case 0xf6:
5770 case 0xf7: /* FDIVRP %st(0),%st(?) */
5771 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, 0, modrm - 0xF0, True );
5772 break;
5773
5774 case 0xf8:
5775 case 0xf9:
5776 case 0xfa:
5777 case 0xfb:
5778 case 0xfc:
5779 case 0xfd:
5780 case 0xfe:
5781 case 0xff: /* FDIVP %st(0),%st(?) */
5782 fp_do_op_ST_ST ( "div", Iop_DivF64, 0, modrm - 0xF8, True );
5783 break;
5784
5785 default:
5786 goto decode_fail;
5787 }
5788
5789 }
5790 }
5791
5792 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDF opcodes +-+-+-+-+-+-+-+ */
5793 else
5794 if (first_opcode == 0xDF) {
5795
5796 if (modrm < 0xC0) {
5797
5798 /* bits 5,4,3 are an opcode extension, and the modRM also
5799 specifies an address. */
5800 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
5801 delta += len;
5802
5803 switch (gregOfRM(modrm)) {
5804
5805 case 0: /* FILD m16int */
5806 DIP("fildw %s\n", dis_buf);
5807 fp_push();
5808 put_ST(0, unop(Iop_I32StoF64,
5809 unop(Iop_16Sto32,
5810 loadLE(Ity_I16, mkexpr(addr)))));
5811 break;
5812
5813 case 1: /* FISTTPS m16 (SSE3) */
5814 DIP("fisttps %s\n", dis_buf);
5815 storeLE( mkexpr(addr),
5816 binop(Iop_F64toI16S, mkU32(Irrm_ZERO), get_ST(0)) );
5817 fp_pop();
5818 break;
5819
5820 case 2: /* FIST m16 */
5821 DIP("fistp %s\n", dis_buf);
5822 storeLE( mkexpr(addr),
5823 binop(Iop_F64toI16S, get_roundingmode(), get_ST(0)) );
5824 break;
5825
5826 case 3: /* FISTP m16 */
5827 DIP("fistps %s\n", dis_buf);
5828 storeLE( mkexpr(addr),
5829 binop(Iop_F64toI16S, get_roundingmode(), get_ST(0)) );
5830 fp_pop();
5831 break;
5832
5833 case 5: /* FILD m64 */
5834 DIP("fildll %s\n", dis_buf);
5835 fp_push();
5836 put_ST(0, binop(Iop_I64StoF64,
5837 get_roundingmode(),
5838 loadLE(Ity_I64, mkexpr(addr))));
5839 break;
5840
5841 case 7: /* FISTP m64 */
5842 DIP("fistpll %s\n", dis_buf);
5843 storeLE( mkexpr(addr),
5844 binop(Iop_F64toI64S, get_roundingmode(), get_ST(0)) );
5845 fp_pop();
5846 break;
5847
5848 default:
5849 vex_printf("unhandled opc_aux = 0x%2x\n", (UInt)gregOfRM(modrm));
5850 vex_printf("first_opcode == 0xDF\n");
5851 goto decode_fail;
5852 }
5853
5854 } else {
5855
5856 delta++;
5857 switch (modrm) {
5858
5859 case 0xC0: /* FFREEP %st(0) */
5860 DIP("ffreep %%st(%d)\n", 0);
5861 put_ST_TAG ( 0, mkU8(0) );
5862 fp_pop();
5863 break;
5864
5865 case 0xE0: /* FNSTSW %ax */
5866 DIP("fnstsw %%ax\n");
5867 /* Get the FPU status word value and dump it in %AX. */
5868 if (0) {
5869 /* The obvious thing to do is simply dump the 16-bit
5870 status word value in %AX. However, due to a
5871 limitation in Memcheck's origin tracking
5872 machinery, this causes Memcheck not to track the
5873 origin of any undefinedness into %AH (only into
5874 %AL/%AX/%EAX), which means origins are lost in
5875 the sequence "fnstsw %ax; test $M,%ah; jcond .." */
5876 putIReg(2, R_EAX, get_FPU_sw());
5877 } else {
5878 /* So a somewhat lame kludge is to make it very
5879 clear to Memcheck that the value is written to
5880 both %AH and %AL. This generates marginally
5881 worse code, but I don't think it matters much. */
5882 IRTemp t16 = newTemp(Ity_I16);
5883 assign(t16, get_FPU_sw());
5884 putIReg( 1, R_AL, unop(Iop_16to8, mkexpr(t16)) );
5885 putIReg( 1, R_AH, unop(Iop_16HIto8, mkexpr(t16)) );
5886 }
5887 break;
5888
5889 case 0xe8:
5890 case 0xe9:
5891 case 0xea:
5892 case 0xeb:
5893 case 0xec:
5894 case 0xed:
5895 case 0xee:
5896 case 0xef: /* FUCOMIP %st(0),%st(?) */
5897 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xE8, True );
5898 break;
5899
5900 case 0xf0:
5901 case 0xf1:
5902 case 0xf2:
5903 case 0xf3:
5904 case 0xf4:
5905 case 0xf5:
5906 case 0xf6:
5907 case 0xf7: /* FCOMIP %st(0),%st(?) */
5908 /* not really right since COMIP != UCOMIP */
5909 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xF0, True );
5910 break;
5911
5912 default:
5913 goto decode_fail;
5914 }
5915 }
5916
5917 }
5918
5919 else
5920 vpanic("dis_FPU(x86): invalid primary opcode");
5921
5922 *decode_ok = True;
5923 return delta;
5924
5925 decode_fail:
5926 *decode_ok = False;
5927 return delta;
5928 }
5929
5930
5931 /*------------------------------------------------------------*/
5932 /*--- ---*/
5933 /*--- MMX INSTRUCTIONS ---*/
5934 /*--- ---*/
5935 /*------------------------------------------------------------*/
5936
5937 /* Effect of MMX insns on x87 FPU state (table 11-2 of
5938 IA32 arch manual, volume 3):
5939
5940 Read from, or write to MMX register (viz, any insn except EMMS):
5941 * All tags set to Valid (non-empty) -- FPTAGS[i] := nonzero
5942 * FP stack pointer set to zero
5943
5944 EMMS:
5945 * All tags set to Invalid (empty) -- FPTAGS[i] := zero
5946 * FP stack pointer set to zero
5947 */
5948
do_MMX_preamble(void)5949 static void do_MMX_preamble ( void )
5950 {
5951 Int i;
5952 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 );
5953 IRExpr* zero = mkU32(0);
5954 IRExpr* tag1 = mkU8(1);
5955 put_ftop(zero);
5956 for (i = 0; i < 8; i++)
5957 stmt( IRStmt_PutI( mkIRPutI(descr, zero, i, tag1) ) );
5958 }
5959
do_EMMS_preamble(void)5960 static void do_EMMS_preamble ( void )
5961 {
5962 Int i;
5963 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 );
5964 IRExpr* zero = mkU32(0);
5965 IRExpr* tag0 = mkU8(0);
5966 put_ftop(zero);
5967 for (i = 0; i < 8; i++)
5968 stmt( IRStmt_PutI( mkIRPutI(descr, zero, i, tag0) ) );
5969 }
5970
5971
getMMXReg(UInt archreg)5972 static IRExpr* getMMXReg ( UInt archreg )
5973 {
5974 vassert(archreg < 8);
5975 return IRExpr_Get( OFFB_FPREGS + 8 * archreg, Ity_I64 );
5976 }
5977
5978
putMMXReg(UInt archreg,IRExpr * e)5979 static void putMMXReg ( UInt archreg, IRExpr* e )
5980 {
5981 vassert(archreg < 8);
5982 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64);
5983 stmt( IRStmt_Put( OFFB_FPREGS + 8 * archreg, e ) );
5984 }
5985
5986
5987 /* Helper for non-shift MMX insns. Note this is incomplete in the
5988 sense that it does not first call do_MMX_preamble() -- that is the
5989 responsibility of its caller. */
5990
5991 static
dis_MMXop_regmem_to_reg(UChar sorb,Int delta,UChar opc,const HChar * name,Bool show_granularity)5992 UInt dis_MMXop_regmem_to_reg ( UChar sorb,
5993 Int delta,
5994 UChar opc,
5995 const HChar* name,
5996 Bool show_granularity )
5997 {
5998 HChar dis_buf[50];
5999 UChar modrm = getIByte(delta);
6000 Bool isReg = epartIsReg(modrm);
6001 IRExpr* argL = NULL;
6002 IRExpr* argR = NULL;
6003 IRExpr* argG = NULL;
6004 IRExpr* argE = NULL;
6005 IRTemp res = newTemp(Ity_I64);
6006
6007 Bool invG = False;
6008 IROp op = Iop_INVALID;
6009 void* hAddr = NULL;
6010 Bool eLeft = False;
6011 const HChar* hName = NULL;
6012
6013 # define XXX(_name) do { hAddr = &_name; hName = #_name; } while (0)
6014
6015 switch (opc) {
6016 /* Original MMX ones */
6017 case 0xFC: op = Iop_Add8x8; break;
6018 case 0xFD: op = Iop_Add16x4; break;
6019 case 0xFE: op = Iop_Add32x2; break;
6020
6021 case 0xEC: op = Iop_QAdd8Sx8; break;
6022 case 0xED: op = Iop_QAdd16Sx4; break;
6023
6024 case 0xDC: op = Iop_QAdd8Ux8; break;
6025 case 0xDD: op = Iop_QAdd16Ux4; break;
6026
6027 case 0xF8: op = Iop_Sub8x8; break;
6028 case 0xF9: op = Iop_Sub16x4; break;
6029 case 0xFA: op = Iop_Sub32x2; break;
6030
6031 case 0xE8: op = Iop_QSub8Sx8; break;
6032 case 0xE9: op = Iop_QSub16Sx4; break;
6033
6034 case 0xD8: op = Iop_QSub8Ux8; break;
6035 case 0xD9: op = Iop_QSub16Ux4; break;
6036
6037 case 0xE5: op = Iop_MulHi16Sx4; break;
6038 case 0xD5: op = Iop_Mul16x4; break;
6039 case 0xF5: XXX(x86g_calculate_mmx_pmaddwd); break;
6040
6041 case 0x74: op = Iop_CmpEQ8x8; break;
6042 case 0x75: op = Iop_CmpEQ16x4; break;
6043 case 0x76: op = Iop_CmpEQ32x2; break;
6044
6045 case 0x64: op = Iop_CmpGT8Sx8; break;
6046 case 0x65: op = Iop_CmpGT16Sx4; break;
6047 case 0x66: op = Iop_CmpGT32Sx2; break;
6048
6049 case 0x6B: op = Iop_QNarrowBin32Sto16Sx4; eLeft = True; break;
6050 case 0x63: op = Iop_QNarrowBin16Sto8Sx8; eLeft = True; break;
6051 case 0x67: op = Iop_QNarrowBin16Sto8Ux8; eLeft = True; break;
6052
6053 case 0x68: op = Iop_InterleaveHI8x8; eLeft = True; break;
6054 case 0x69: op = Iop_InterleaveHI16x4; eLeft = True; break;
6055 case 0x6A: op = Iop_InterleaveHI32x2; eLeft = True; break;
6056
6057 case 0x60: op = Iop_InterleaveLO8x8; eLeft = True; break;
6058 case 0x61: op = Iop_InterleaveLO16x4; eLeft = True; break;
6059 case 0x62: op = Iop_InterleaveLO32x2; eLeft = True; break;
6060
6061 case 0xDB: op = Iop_And64; break;
6062 case 0xDF: op = Iop_And64; invG = True; break;
6063 case 0xEB: op = Iop_Or64; break;
6064 case 0xEF: /* Possibly do better here if argL and argR are the
6065 same reg */
6066 op = Iop_Xor64; break;
6067
6068 /* Introduced in SSE1 */
6069 case 0xE0: op = Iop_Avg8Ux8; break;
6070 case 0xE3: op = Iop_Avg16Ux4; break;
6071 case 0xEE: op = Iop_Max16Sx4; break;
6072 case 0xDE: op = Iop_Max8Ux8; break;
6073 case 0xEA: op = Iop_Min16Sx4; break;
6074 case 0xDA: op = Iop_Min8Ux8; break;
6075 case 0xE4: op = Iop_MulHi16Ux4; break;
6076 case 0xF6: XXX(x86g_calculate_mmx_psadbw); break;
6077
6078 /* Introduced in SSE2 */
6079 case 0xD4: op = Iop_Add64; break;
6080 case 0xFB: op = Iop_Sub64; break;
6081
6082 default:
6083 vex_printf("\n0x%x\n", opc);
6084 vpanic("dis_MMXop_regmem_to_reg");
6085 }
6086
6087 # undef XXX
6088
6089 argG = getMMXReg(gregOfRM(modrm));
6090 if (invG)
6091 argG = unop(Iop_Not64, argG);
6092
6093 if (isReg) {
6094 delta++;
6095 argE = getMMXReg(eregOfRM(modrm));
6096 } else {
6097 Int len;
6098 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
6099 delta += len;
6100 argE = loadLE(Ity_I64, mkexpr(addr));
6101 }
6102
6103 if (eLeft) {
6104 argL = argE;
6105 argR = argG;
6106 } else {
6107 argL = argG;
6108 argR = argE;
6109 }
6110
6111 if (op != Iop_INVALID) {
6112 vassert(hName == NULL);
6113 vassert(hAddr == NULL);
6114 assign(res, binop(op, argL, argR));
6115 } else {
6116 vassert(hName != NULL);
6117 vassert(hAddr != NULL);
6118 assign( res,
6119 mkIRExprCCall(
6120 Ity_I64,
6121 0/*regparms*/, hName, hAddr,
6122 mkIRExprVec_2( argL, argR )
6123 )
6124 );
6125 }
6126
6127 putMMXReg( gregOfRM(modrm), mkexpr(res) );
6128
6129 DIP("%s%s %s, %s\n",
6130 name, show_granularity ? nameMMXGran(opc & 3) : "",
6131 ( isReg ? nameMMXReg(eregOfRM(modrm)) : dis_buf ),
6132 nameMMXReg(gregOfRM(modrm)) );
6133
6134 return delta;
6135 }
6136
6137
6138 /* Vector by scalar shift of G by the amount specified at the bottom
6139 of E. This is a straight copy of dis_SSE_shiftG_byE. */
6140
dis_MMX_shiftG_byE(UChar sorb,Int delta,const HChar * opname,IROp op)6141 static UInt dis_MMX_shiftG_byE ( UChar sorb, Int delta,
6142 const HChar* opname, IROp op )
6143 {
6144 HChar dis_buf[50];
6145 Int alen, size;
6146 IRTemp addr;
6147 Bool shl, shr, sar;
6148 UChar rm = getIByte(delta);
6149 IRTemp g0 = newTemp(Ity_I64);
6150 IRTemp g1 = newTemp(Ity_I64);
6151 IRTemp amt = newTemp(Ity_I32);
6152 IRTemp amt8 = newTemp(Ity_I8);
6153
6154 if (epartIsReg(rm)) {
6155 assign( amt, unop(Iop_64to32, getMMXReg(eregOfRM(rm))) );
6156 DIP("%s %s,%s\n", opname,
6157 nameMMXReg(eregOfRM(rm)),
6158 nameMMXReg(gregOfRM(rm)) );
6159 delta++;
6160 } else {
6161 addr = disAMode ( &alen, sorb, delta, dis_buf );
6162 assign( amt, loadLE(Ity_I32, mkexpr(addr)) );
6163 DIP("%s %s,%s\n", opname,
6164 dis_buf,
6165 nameMMXReg(gregOfRM(rm)) );
6166 delta += alen;
6167 }
6168 assign( g0, getMMXReg(gregOfRM(rm)) );
6169 assign( amt8, unop(Iop_32to8, mkexpr(amt)) );
6170
6171 shl = shr = sar = False;
6172 size = 0;
6173 switch (op) {
6174 case Iop_ShlN16x4: shl = True; size = 32; break;
6175 case Iop_ShlN32x2: shl = True; size = 32; break;
6176 case Iop_Shl64: shl = True; size = 64; break;
6177 case Iop_ShrN16x4: shr = True; size = 16; break;
6178 case Iop_ShrN32x2: shr = True; size = 32; break;
6179 case Iop_Shr64: shr = True; size = 64; break;
6180 case Iop_SarN16x4: sar = True; size = 16; break;
6181 case Iop_SarN32x2: sar = True; size = 32; break;
6182 default: vassert(0);
6183 }
6184
6185 if (shl || shr) {
6186 assign(
6187 g1,
6188 IRExpr_ITE(
6189 binop(Iop_CmpLT32U,mkexpr(amt),mkU32(size)),
6190 binop(op, mkexpr(g0), mkexpr(amt8)),
6191 mkU64(0)
6192 )
6193 );
6194 } else
6195 if (sar) {
6196 assign(
6197 g1,
6198 IRExpr_ITE(
6199 binop(Iop_CmpLT32U,mkexpr(amt),mkU32(size)),
6200 binop(op, mkexpr(g0), mkexpr(amt8)),
6201 binop(op, mkexpr(g0), mkU8(size-1))
6202 )
6203 );
6204 } else {
6205 /*NOTREACHED*/
6206 vassert(0);
6207 }
6208
6209 putMMXReg( gregOfRM(rm), mkexpr(g1) );
6210 return delta;
6211 }
6212
6213
6214 /* Vector by scalar shift of E by an immediate byte. This is a
6215 straight copy of dis_SSE_shiftE_imm. */
6216
6217 static
dis_MMX_shiftE_imm(Int delta,const HChar * opname,IROp op)6218 UInt dis_MMX_shiftE_imm ( Int delta, const HChar* opname, IROp op )
6219 {
6220 Bool shl, shr, sar;
6221 UChar rm = getIByte(delta);
6222 IRTemp e0 = newTemp(Ity_I64);
6223 IRTemp e1 = newTemp(Ity_I64);
6224 UChar amt, size;
6225 vassert(epartIsReg(rm));
6226 vassert(gregOfRM(rm) == 2
6227 || gregOfRM(rm) == 4 || gregOfRM(rm) == 6);
6228 amt = getIByte(delta+1);
6229 delta += 2;
6230 DIP("%s $%d,%s\n", opname,
6231 (Int)amt,
6232 nameMMXReg(eregOfRM(rm)) );
6233
6234 assign( e0, getMMXReg(eregOfRM(rm)) );
6235
6236 shl = shr = sar = False;
6237 size = 0;
6238 switch (op) {
6239 case Iop_ShlN16x4: shl = True; size = 16; break;
6240 case Iop_ShlN32x2: shl = True; size = 32; break;
6241 case Iop_Shl64: shl = True; size = 64; break;
6242 case Iop_SarN16x4: sar = True; size = 16; break;
6243 case Iop_SarN32x2: sar = True; size = 32; break;
6244 case Iop_ShrN16x4: shr = True; size = 16; break;
6245 case Iop_ShrN32x2: shr = True; size = 32; break;
6246 case Iop_Shr64: shr = True; size = 64; break;
6247 default: vassert(0);
6248 }
6249
6250 if (shl || shr) {
6251 assign( e1, amt >= size
6252 ? mkU64(0)
6253 : binop(op, mkexpr(e0), mkU8(amt))
6254 );
6255 } else
6256 if (sar) {
6257 assign( e1, amt >= size
6258 ? binop(op, mkexpr(e0), mkU8(size-1))
6259 : binop(op, mkexpr(e0), mkU8(amt))
6260 );
6261 } else {
6262 /*NOTREACHED*/
6263 vassert(0);
6264 }
6265
6266 putMMXReg( eregOfRM(rm), mkexpr(e1) );
6267 return delta;
6268 }
6269
6270
6271 /* Completely handle all MMX instructions except emms. */
6272
6273 static
dis_MMX(Bool * decode_ok,UChar sorb,Int sz,Int delta)6274 UInt dis_MMX ( Bool* decode_ok, UChar sorb, Int sz, Int delta )
6275 {
6276 Int len;
6277 UChar modrm;
6278 HChar dis_buf[50];
6279 UChar opc = getIByte(delta);
6280 delta++;
6281
6282 /* dis_MMX handles all insns except emms. */
6283 do_MMX_preamble();
6284
6285 switch (opc) {
6286
6287 case 0x6E:
6288 /* MOVD (src)ireg-or-mem (E), (dst)mmxreg (G)*/
6289 if (sz != 4)
6290 goto mmx_decode_failure;
6291 modrm = getIByte(delta);
6292 if (epartIsReg(modrm)) {
6293 delta++;
6294 putMMXReg(
6295 gregOfRM(modrm),
6296 binop( Iop_32HLto64,
6297 mkU32(0),
6298 getIReg(4, eregOfRM(modrm)) ) );
6299 DIP("movd %s, %s\n",
6300 nameIReg(4,eregOfRM(modrm)), nameMMXReg(gregOfRM(modrm)));
6301 } else {
6302 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
6303 delta += len;
6304 putMMXReg(
6305 gregOfRM(modrm),
6306 binop( Iop_32HLto64,
6307 mkU32(0),
6308 loadLE(Ity_I32, mkexpr(addr)) ) );
6309 DIP("movd %s, %s\n", dis_buf, nameMMXReg(gregOfRM(modrm)));
6310 }
6311 break;
6312
6313 case 0x7E: /* MOVD (src)mmxreg (G), (dst)ireg-or-mem (E) */
6314 if (sz != 4)
6315 goto mmx_decode_failure;
6316 modrm = getIByte(delta);
6317 if (epartIsReg(modrm)) {
6318 delta++;
6319 putIReg( 4, eregOfRM(modrm),
6320 unop(Iop_64to32, getMMXReg(gregOfRM(modrm)) ) );
6321 DIP("movd %s, %s\n",
6322 nameMMXReg(gregOfRM(modrm)), nameIReg(4,eregOfRM(modrm)));
6323 } else {
6324 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
6325 delta += len;
6326 storeLE( mkexpr(addr),
6327 unop(Iop_64to32, getMMXReg(gregOfRM(modrm)) ) );
6328 DIP("movd %s, %s\n", nameMMXReg(gregOfRM(modrm)), dis_buf);
6329 }
6330 break;
6331
6332 case 0x6F:
6333 /* MOVQ (src)mmxreg-or-mem, (dst)mmxreg */
6334 if (sz != 4)
6335 goto mmx_decode_failure;
6336 modrm = getIByte(delta);
6337 if (epartIsReg(modrm)) {
6338 delta++;
6339 putMMXReg( gregOfRM(modrm), getMMXReg(eregOfRM(modrm)) );
6340 DIP("movq %s, %s\n",
6341 nameMMXReg(eregOfRM(modrm)), nameMMXReg(gregOfRM(modrm)));
6342 } else {
6343 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
6344 delta += len;
6345 putMMXReg( gregOfRM(modrm), loadLE(Ity_I64, mkexpr(addr)) );
6346 DIP("movq %s, %s\n",
6347 dis_buf, nameMMXReg(gregOfRM(modrm)));
6348 }
6349 break;
6350
6351 case 0x7F:
6352 /* MOVQ (src)mmxreg, (dst)mmxreg-or-mem */
6353 if (sz != 4)
6354 goto mmx_decode_failure;
6355 modrm = getIByte(delta);
6356 if (epartIsReg(modrm)) {
6357 delta++;
6358 putMMXReg( eregOfRM(modrm), getMMXReg(gregOfRM(modrm)) );
6359 DIP("movq %s, %s\n",
6360 nameMMXReg(gregOfRM(modrm)), nameMMXReg(eregOfRM(modrm)));
6361 } else {
6362 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
6363 delta += len;
6364 storeLE( mkexpr(addr), getMMXReg(gregOfRM(modrm)) );
6365 DIP("mov(nt)q %s, %s\n",
6366 nameMMXReg(gregOfRM(modrm)), dis_buf);
6367 }
6368 break;
6369
6370 case 0xFC:
6371 case 0xFD:
6372 case 0xFE: /* PADDgg (src)mmxreg-or-mem, (dst)mmxreg */
6373 if (sz != 4)
6374 goto mmx_decode_failure;
6375 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "padd", True );
6376 break;
6377
6378 case 0xEC:
6379 case 0xED: /* PADDSgg (src)mmxreg-or-mem, (dst)mmxreg */
6380 if (sz != 4)
6381 goto mmx_decode_failure;
6382 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "padds", True );
6383 break;
6384
6385 case 0xDC:
6386 case 0xDD: /* PADDUSgg (src)mmxreg-or-mem, (dst)mmxreg */
6387 if (sz != 4)
6388 goto mmx_decode_failure;
6389 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "paddus", True );
6390 break;
6391
6392 case 0xF8:
6393 case 0xF9:
6394 case 0xFA: /* PSUBgg (src)mmxreg-or-mem, (dst)mmxreg */
6395 if (sz != 4)
6396 goto mmx_decode_failure;
6397 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "psub", True );
6398 break;
6399
6400 case 0xE8:
6401 case 0xE9: /* PSUBSgg (src)mmxreg-or-mem, (dst)mmxreg */
6402 if (sz != 4)
6403 goto mmx_decode_failure;
6404 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "psubs", True );
6405 break;
6406
6407 case 0xD8:
6408 case 0xD9: /* PSUBUSgg (src)mmxreg-or-mem, (dst)mmxreg */
6409 if (sz != 4)
6410 goto mmx_decode_failure;
6411 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "psubus", True );
6412 break;
6413
6414 case 0xE5: /* PMULHW (src)mmxreg-or-mem, (dst)mmxreg */
6415 if (sz != 4)
6416 goto mmx_decode_failure;
6417 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pmulhw", False );
6418 break;
6419
6420 case 0xD5: /* PMULLW (src)mmxreg-or-mem, (dst)mmxreg */
6421 if (sz != 4)
6422 goto mmx_decode_failure;
6423 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pmullw", False );
6424 break;
6425
6426 case 0xF5: /* PMADDWD (src)mmxreg-or-mem, (dst)mmxreg */
6427 vassert(sz == 4);
6428 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pmaddwd", False );
6429 break;
6430
6431 case 0x74:
6432 case 0x75:
6433 case 0x76: /* PCMPEQgg (src)mmxreg-or-mem, (dst)mmxreg */
6434 if (sz != 4)
6435 goto mmx_decode_failure;
6436 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pcmpeq", True );
6437 break;
6438
6439 case 0x64:
6440 case 0x65:
6441 case 0x66: /* PCMPGTgg (src)mmxreg-or-mem, (dst)mmxreg */
6442 if (sz != 4)
6443 goto mmx_decode_failure;
6444 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pcmpgt", True );
6445 break;
6446
6447 case 0x6B: /* PACKSSDW (src)mmxreg-or-mem, (dst)mmxreg */
6448 if (sz != 4)
6449 goto mmx_decode_failure;
6450 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "packssdw", False );
6451 break;
6452
6453 case 0x63: /* PACKSSWB (src)mmxreg-or-mem, (dst)mmxreg */
6454 if (sz != 4)
6455 goto mmx_decode_failure;
6456 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "packsswb", False );
6457 break;
6458
6459 case 0x67: /* PACKUSWB (src)mmxreg-or-mem, (dst)mmxreg */
6460 if (sz != 4)
6461 goto mmx_decode_failure;
6462 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "packuswb", False );
6463 break;
6464
6465 case 0x68:
6466 case 0x69:
6467 case 0x6A: /* PUNPCKHgg (src)mmxreg-or-mem, (dst)mmxreg */
6468 if (sz != 4)
6469 goto mmx_decode_failure;
6470 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "punpckh", True );
6471 break;
6472
6473 case 0x60:
6474 case 0x61:
6475 case 0x62: /* PUNPCKLgg (src)mmxreg-or-mem, (dst)mmxreg */
6476 if (sz != 4)
6477 goto mmx_decode_failure;
6478 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "punpckl", True );
6479 break;
6480
6481 case 0xDB: /* PAND (src)mmxreg-or-mem, (dst)mmxreg */
6482 if (sz != 4)
6483 goto mmx_decode_failure;
6484 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pand", False );
6485 break;
6486
6487 case 0xDF: /* PANDN (src)mmxreg-or-mem, (dst)mmxreg */
6488 if (sz != 4)
6489 goto mmx_decode_failure;
6490 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pandn", False );
6491 break;
6492
6493 case 0xEB: /* POR (src)mmxreg-or-mem, (dst)mmxreg */
6494 if (sz != 4)
6495 goto mmx_decode_failure;
6496 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "por", False );
6497 break;
6498
6499 case 0xEF: /* PXOR (src)mmxreg-or-mem, (dst)mmxreg */
6500 if (sz != 4)
6501 goto mmx_decode_failure;
6502 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pxor", False );
6503 break;
6504
6505 # define SHIFT_BY_REG(_name,_op) \
6506 delta = dis_MMX_shiftG_byE(sorb, delta, _name, _op); \
6507 break;
6508
6509 /* PSLLgg (src)mmxreg-or-mem, (dst)mmxreg */
6510 case 0xF1: SHIFT_BY_REG("psllw", Iop_ShlN16x4);
6511 case 0xF2: SHIFT_BY_REG("pslld", Iop_ShlN32x2);
6512 case 0xF3: SHIFT_BY_REG("psllq", Iop_Shl64);
6513
6514 /* PSRLgg (src)mmxreg-or-mem, (dst)mmxreg */
6515 case 0xD1: SHIFT_BY_REG("psrlw", Iop_ShrN16x4);
6516 case 0xD2: SHIFT_BY_REG("psrld", Iop_ShrN32x2);
6517 case 0xD3: SHIFT_BY_REG("psrlq", Iop_Shr64);
6518
6519 /* PSRAgg (src)mmxreg-or-mem, (dst)mmxreg */
6520 case 0xE1: SHIFT_BY_REG("psraw", Iop_SarN16x4);
6521 case 0xE2: SHIFT_BY_REG("psrad", Iop_SarN32x2);
6522
6523 # undef SHIFT_BY_REG
6524
6525 case 0x71:
6526 case 0x72:
6527 case 0x73: {
6528 /* (sz==4): PSLLgg/PSRAgg/PSRLgg mmxreg by imm8 */
6529 UChar byte2, subopc;
6530 if (sz != 4)
6531 goto mmx_decode_failure;
6532 byte2 = getIByte(delta); /* amode / sub-opcode */
6533 subopc = toUChar( (byte2 >> 3) & 7 );
6534
6535 # define SHIFT_BY_IMM(_name,_op) \
6536 do { delta = dis_MMX_shiftE_imm(delta,_name,_op); \
6537 } while (0)
6538
6539 if (subopc == 2 /*SRL*/ && opc == 0x71)
6540 SHIFT_BY_IMM("psrlw", Iop_ShrN16x4);
6541 else if (subopc == 2 /*SRL*/ && opc == 0x72)
6542 SHIFT_BY_IMM("psrld", Iop_ShrN32x2);
6543 else if (subopc == 2 /*SRL*/ && opc == 0x73)
6544 SHIFT_BY_IMM("psrlq", Iop_Shr64);
6545
6546 else if (subopc == 4 /*SAR*/ && opc == 0x71)
6547 SHIFT_BY_IMM("psraw", Iop_SarN16x4);
6548 else if (subopc == 4 /*SAR*/ && opc == 0x72)
6549 SHIFT_BY_IMM("psrad", Iop_SarN32x2);
6550
6551 else if (subopc == 6 /*SHL*/ && opc == 0x71)
6552 SHIFT_BY_IMM("psllw", Iop_ShlN16x4);
6553 else if (subopc == 6 /*SHL*/ && opc == 0x72)
6554 SHIFT_BY_IMM("pslld", Iop_ShlN32x2);
6555 else if (subopc == 6 /*SHL*/ && opc == 0x73)
6556 SHIFT_BY_IMM("psllq", Iop_Shl64);
6557
6558 else goto mmx_decode_failure;
6559
6560 # undef SHIFT_BY_IMM
6561 break;
6562 }
6563
6564 case 0xF7: {
6565 IRTemp addr = newTemp(Ity_I32);
6566 IRTemp regD = newTemp(Ity_I64);
6567 IRTemp regM = newTemp(Ity_I64);
6568 IRTemp mask = newTemp(Ity_I64);
6569 IRTemp olddata = newTemp(Ity_I64);
6570 IRTemp newdata = newTemp(Ity_I64);
6571
6572 modrm = getIByte(delta);
6573 if (sz != 4 || (!epartIsReg(modrm)))
6574 goto mmx_decode_failure;
6575 delta++;
6576
6577 assign( addr, handleSegOverride( sorb, getIReg(4, R_EDI) ));
6578 assign( regM, getMMXReg( eregOfRM(modrm) ));
6579 assign( regD, getMMXReg( gregOfRM(modrm) ));
6580 assign( mask, binop(Iop_SarN8x8, mkexpr(regM), mkU8(7)) );
6581 assign( olddata, loadLE( Ity_I64, mkexpr(addr) ));
6582 assign( newdata,
6583 binop(Iop_Or64,
6584 binop(Iop_And64,
6585 mkexpr(regD),
6586 mkexpr(mask) ),
6587 binop(Iop_And64,
6588 mkexpr(olddata),
6589 unop(Iop_Not64, mkexpr(mask)))) );
6590 storeLE( mkexpr(addr), mkexpr(newdata) );
6591 DIP("maskmovq %s,%s\n", nameMMXReg( eregOfRM(modrm) ),
6592 nameMMXReg( gregOfRM(modrm) ) );
6593 break;
6594 }
6595
6596 /* --- MMX decode failure --- */
6597 default:
6598 mmx_decode_failure:
6599 *decode_ok = False;
6600 return delta; /* ignored */
6601
6602 }
6603
6604 *decode_ok = True;
6605 return delta;
6606 }
6607
6608
6609 /*------------------------------------------------------------*/
6610 /*--- More misc arithmetic and other obscure insns. ---*/
6611 /*------------------------------------------------------------*/
6612
6613 /* Double length left and right shifts. Apparently only required in
6614 v-size (no b- variant). */
6615 static
dis_SHLRD_Gv_Ev(UChar sorb,Int delta,UChar modrm,Int sz,IRExpr * shift_amt,Bool amt_is_literal,const HChar * shift_amt_txt,Bool left_shift)6616 UInt dis_SHLRD_Gv_Ev ( UChar sorb,
6617 Int delta, UChar modrm,
6618 Int sz,
6619 IRExpr* shift_amt,
6620 Bool amt_is_literal,
6621 const HChar* shift_amt_txt,
6622 Bool left_shift )
6623 {
6624 /* shift_amt :: Ity_I8 is the amount to shift. shift_amt_txt is used
6625 for printing it. And eip on entry points at the modrm byte. */
6626 Int len;
6627 HChar dis_buf[50];
6628
6629 IRType ty = szToITy(sz);
6630 IRTemp gsrc = newTemp(ty);
6631 IRTemp esrc = newTemp(ty);
6632 IRTemp addr = IRTemp_INVALID;
6633 IRTemp tmpSH = newTemp(Ity_I8);
6634 IRTemp tmpL = IRTemp_INVALID;
6635 IRTemp tmpRes = IRTemp_INVALID;
6636 IRTemp tmpSubSh = IRTemp_INVALID;
6637 IROp mkpair;
6638 IROp getres;
6639 IROp shift;
6640 IRExpr* mask = NULL;
6641
6642 vassert(sz == 2 || sz == 4);
6643
6644 /* The E-part is the destination; this is shifted. The G-part
6645 supplies bits to be shifted into the E-part, but is not
6646 changed.
6647
6648 If shifting left, form a double-length word with E at the top
6649 and G at the bottom, and shift this left. The result is then in
6650 the high part.
6651
6652 If shifting right, form a double-length word with G at the top
6653 and E at the bottom, and shift this right. The result is then
6654 at the bottom. */
6655
6656 /* Fetch the operands. */
6657
6658 assign( gsrc, getIReg(sz, gregOfRM(modrm)) );
6659
6660 if (epartIsReg(modrm)) {
6661 delta++;
6662 assign( esrc, getIReg(sz, eregOfRM(modrm)) );
6663 DIP("sh%cd%c %s, %s, %s\n",
6664 ( left_shift ? 'l' : 'r' ), nameISize(sz),
6665 shift_amt_txt,
6666 nameIReg(sz, gregOfRM(modrm)), nameIReg(sz, eregOfRM(modrm)));
6667 } else {
6668 addr = disAMode ( &len, sorb, delta, dis_buf );
6669 delta += len;
6670 assign( esrc, loadLE(ty, mkexpr(addr)) );
6671 DIP("sh%cd%c %s, %s, %s\n",
6672 ( left_shift ? 'l' : 'r' ), nameISize(sz),
6673 shift_amt_txt,
6674 nameIReg(sz, gregOfRM(modrm)), dis_buf);
6675 }
6676
6677 /* Round up the relevant primops. */
6678
6679 if (sz == 4) {
6680 tmpL = newTemp(Ity_I64);
6681 tmpRes = newTemp(Ity_I32);
6682 tmpSubSh = newTemp(Ity_I32);
6683 mkpair = Iop_32HLto64;
6684 getres = left_shift ? Iop_64HIto32 : Iop_64to32;
6685 shift = left_shift ? Iop_Shl64 : Iop_Shr64;
6686 mask = mkU8(31);
6687 } else {
6688 /* sz == 2 */
6689 tmpL = newTemp(Ity_I32);
6690 tmpRes = newTemp(Ity_I16);
6691 tmpSubSh = newTemp(Ity_I16);
6692 mkpair = Iop_16HLto32;
6693 getres = left_shift ? Iop_32HIto16 : Iop_32to16;
6694 shift = left_shift ? Iop_Shl32 : Iop_Shr32;
6695 mask = mkU8(15);
6696 }
6697
6698 /* Do the shift, calculate the subshift value, and set
6699 the flag thunk. */
6700
6701 assign( tmpSH, binop(Iop_And8, shift_amt, mask) );
6702
6703 if (left_shift)
6704 assign( tmpL, binop(mkpair, mkexpr(esrc), mkexpr(gsrc)) );
6705 else
6706 assign( tmpL, binop(mkpair, mkexpr(gsrc), mkexpr(esrc)) );
6707
6708 assign( tmpRes, unop(getres, binop(shift, mkexpr(tmpL), mkexpr(tmpSH)) ) );
6709 assign( tmpSubSh,
6710 unop(getres,
6711 binop(shift,
6712 mkexpr(tmpL),
6713 binop(Iop_And8,
6714 binop(Iop_Sub8, mkexpr(tmpSH), mkU8(1) ),
6715 mask))) );
6716
6717 setFlags_DEP1_DEP2_shift ( left_shift ? Iop_Shl32 : Iop_Sar32,
6718 tmpRes, tmpSubSh, ty, tmpSH );
6719
6720 /* Put result back. */
6721
6722 if (epartIsReg(modrm)) {
6723 putIReg(sz, eregOfRM(modrm), mkexpr(tmpRes));
6724 } else {
6725 storeLE( mkexpr(addr), mkexpr(tmpRes) );
6726 }
6727
6728 if (amt_is_literal) delta++;
6729 return delta;
6730 }
6731
6732
6733 /* Handle BT/BTS/BTR/BTC Gv, Ev. Apparently b-size is not
6734 required. */
6735
6736 typedef enum { BtOpNone, BtOpSet, BtOpReset, BtOpComp } BtOp;
6737
nameBtOp(BtOp op)6738 static const HChar* nameBtOp ( BtOp op )
6739 {
6740 switch (op) {
6741 case BtOpNone: return "";
6742 case BtOpSet: return "s";
6743 case BtOpReset: return "r";
6744 case BtOpComp: return "c";
6745 default: vpanic("nameBtOp(x86)");
6746 }
6747 }
6748
6749
6750 static
dis_bt_G_E(const VexAbiInfo * vbi,UChar sorb,Bool locked,Int sz,Int delta,BtOp op)6751 UInt dis_bt_G_E ( const VexAbiInfo* vbi,
6752 UChar sorb, Bool locked, Int sz, Int delta, BtOp op )
6753 {
6754 HChar dis_buf[50];
6755 UChar modrm;
6756 Int len;
6757 IRTemp t_fetched, t_bitno0, t_bitno1, t_bitno2, t_addr0,
6758 t_addr1, t_esp, t_mask, t_new;
6759
6760 vassert(sz == 2 || sz == 4);
6761
6762 t_fetched = t_bitno0 = t_bitno1 = t_bitno2
6763 = t_addr0 = t_addr1 = t_esp
6764 = t_mask = t_new = IRTemp_INVALID;
6765
6766 t_fetched = newTemp(Ity_I8);
6767 t_new = newTemp(Ity_I8);
6768 t_bitno0 = newTemp(Ity_I32);
6769 t_bitno1 = newTemp(Ity_I32);
6770 t_bitno2 = newTemp(Ity_I8);
6771 t_addr1 = newTemp(Ity_I32);
6772 modrm = getIByte(delta);
6773
6774 assign( t_bitno0, widenSto32(getIReg(sz, gregOfRM(modrm))) );
6775
6776 if (epartIsReg(modrm)) {
6777 delta++;
6778 /* Get it onto the client's stack. */
6779 t_esp = newTemp(Ity_I32);
6780 t_addr0 = newTemp(Ity_I32);
6781
6782 /* For the choice of the value 128, see comment in dis_bt_G_E in
6783 guest_amd64_toIR.c. We point out here only that 128 is
6784 fast-cased in Memcheck and is > 0, so seems like a good
6785 choice. */
6786 vassert(vbi->guest_stack_redzone_size == 0);
6787 assign( t_esp, binop(Iop_Sub32, getIReg(4, R_ESP), mkU32(128)) );
6788 putIReg(4, R_ESP, mkexpr(t_esp));
6789
6790 storeLE( mkexpr(t_esp), getIReg(sz, eregOfRM(modrm)) );
6791
6792 /* Make t_addr0 point at it. */
6793 assign( t_addr0, mkexpr(t_esp) );
6794
6795 /* Mask out upper bits of the shift amount, since we're doing a
6796 reg. */
6797 assign( t_bitno1, binop(Iop_And32,
6798 mkexpr(t_bitno0),
6799 mkU32(sz == 4 ? 31 : 15)) );
6800
6801 } else {
6802 t_addr0 = disAMode ( &len, sorb, delta, dis_buf );
6803 delta += len;
6804 assign( t_bitno1, mkexpr(t_bitno0) );
6805 }
6806
6807 /* At this point: t_addr0 is the address being operated on. If it
6808 was a reg, we will have pushed it onto the client's stack.
6809 t_bitno1 is the bit number, suitably masked in the case of a
6810 reg. */
6811
6812 /* Now the main sequence. */
6813 assign( t_addr1,
6814 binop(Iop_Add32,
6815 mkexpr(t_addr0),
6816 binop(Iop_Sar32, mkexpr(t_bitno1), mkU8(3))) );
6817
6818 /* t_addr1 now holds effective address */
6819
6820 assign( t_bitno2,
6821 unop(Iop_32to8,
6822 binop(Iop_And32, mkexpr(t_bitno1), mkU32(7))) );
6823
6824 /* t_bitno2 contains offset of bit within byte */
6825
6826 if (op != BtOpNone) {
6827 t_mask = newTemp(Ity_I8);
6828 assign( t_mask, binop(Iop_Shl8, mkU8(1), mkexpr(t_bitno2)) );
6829 }
6830
6831 /* t_mask is now a suitable byte mask */
6832
6833 assign( t_fetched, loadLE(Ity_I8, mkexpr(t_addr1)) );
6834
6835 if (op != BtOpNone) {
6836 switch (op) {
6837 case BtOpSet:
6838 assign( t_new,
6839 binop(Iop_Or8, mkexpr(t_fetched), mkexpr(t_mask)) );
6840 break;
6841 case BtOpComp:
6842 assign( t_new,
6843 binop(Iop_Xor8, mkexpr(t_fetched), mkexpr(t_mask)) );
6844 break;
6845 case BtOpReset:
6846 assign( t_new,
6847 binop(Iop_And8, mkexpr(t_fetched),
6848 unop(Iop_Not8, mkexpr(t_mask))) );
6849 break;
6850 default:
6851 vpanic("dis_bt_G_E(x86)");
6852 }
6853 if (locked && !epartIsReg(modrm)) {
6854 casLE( mkexpr(t_addr1), mkexpr(t_fetched)/*expd*/,
6855 mkexpr(t_new)/*new*/,
6856 guest_EIP_curr_instr );
6857 } else {
6858 storeLE( mkexpr(t_addr1), mkexpr(t_new) );
6859 }
6860 }
6861
6862 /* Side effect done; now get selected bit into Carry flag */
6863 /* Flags: C=selected bit, O,S,Z,A,P undefined, so are set to zero. */
6864 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
6865 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
6866 stmt( IRStmt_Put(
6867 OFFB_CC_DEP1,
6868 binop(Iop_And32,
6869 binop(Iop_Shr32,
6870 unop(Iop_8Uto32, mkexpr(t_fetched)),
6871 mkexpr(t_bitno2)),
6872 mkU32(1)))
6873 );
6874 /* Set NDEP even though it isn't used. This makes redundant-PUT
6875 elimination of previous stores to this field work better. */
6876 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
6877
6878 /* Move reg operand from stack back to reg */
6879 if (epartIsReg(modrm)) {
6880 /* t_esp still points at it. */
6881 putIReg(sz, eregOfRM(modrm), loadLE(szToITy(sz), mkexpr(t_esp)) );
6882 putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(t_esp), mkU32(128)) );
6883 }
6884
6885 DIP("bt%s%c %s, %s\n",
6886 nameBtOp(op), nameISize(sz), nameIReg(sz, gregOfRM(modrm)),
6887 ( epartIsReg(modrm) ? nameIReg(sz, eregOfRM(modrm)) : dis_buf ) );
6888
6889 return delta;
6890 }
6891
6892
6893
6894 /* Handle BSF/BSR. Only v-size seems necessary. */
6895 static
dis_bs_E_G(UChar sorb,Int sz,Int delta,Bool fwds)6896 UInt dis_bs_E_G ( UChar sorb, Int sz, Int delta, Bool fwds )
6897 {
6898 Bool isReg;
6899 UChar modrm;
6900 HChar dis_buf[50];
6901
6902 IRType ty = szToITy(sz);
6903 IRTemp src = newTemp(ty);
6904 IRTemp dst = newTemp(ty);
6905
6906 IRTemp src32 = newTemp(Ity_I32);
6907 IRTemp dst32 = newTemp(Ity_I32);
6908 IRTemp srcB = newTemp(Ity_I1);
6909
6910 vassert(sz == 4 || sz == 2);
6911
6912 modrm = getIByte(delta);
6913
6914 isReg = epartIsReg(modrm);
6915 if (isReg) {
6916 delta++;
6917 assign( src, getIReg(sz, eregOfRM(modrm)) );
6918 } else {
6919 Int len;
6920 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
6921 delta += len;
6922 assign( src, loadLE(ty, mkexpr(addr)) );
6923 }
6924
6925 DIP("bs%c%c %s, %s\n",
6926 fwds ? 'f' : 'r', nameISize(sz),
6927 ( isReg ? nameIReg(sz, eregOfRM(modrm)) : dis_buf ),
6928 nameIReg(sz, gregOfRM(modrm)));
6929
6930 /* Generate a bool expression which is zero iff the original is
6931 zero, and nonzero otherwise. Ask for a CmpNE version which, if
6932 instrumented by Memcheck, is instrumented expensively, since
6933 this may be used on the output of a preceding movmskb insn,
6934 which has been known to be partially defined, and in need of
6935 careful handling. */
6936 assign( srcB, binop(mkSizedOp(ty,Iop_ExpCmpNE8),
6937 mkexpr(src), mkU(ty,0)) );
6938
6939 /* Flags: Z is 1 iff source value is zero. All others
6940 are undefined -- we force them to zero. */
6941 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
6942 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
6943 stmt( IRStmt_Put(
6944 OFFB_CC_DEP1,
6945 IRExpr_ITE( mkexpr(srcB),
6946 /* src!=0 */
6947 mkU32(0),
6948 /* src==0 */
6949 mkU32(X86G_CC_MASK_Z)
6950 )
6951 ));
6952 /* Set NDEP even though it isn't used. This makes redundant-PUT
6953 elimination of previous stores to this field work better. */
6954 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
6955
6956 /* Result: iff source value is zero, we can't use
6957 Iop_Clz32/Iop_Ctz32 as they have no defined result in that case.
6958 But anyway, Intel x86 semantics say the result is undefined in
6959 such situations. Hence handle the zero case specially. */
6960
6961 /* Bleh. What we compute:
6962
6963 bsf32: if src == 0 then 0 else Ctz32(src)
6964 bsr32: if src == 0 then 0 else 31 - Clz32(src)
6965
6966 bsf16: if src == 0 then 0 else Ctz32(16Uto32(src))
6967 bsr16: if src == 0 then 0 else 31 - Clz32(16Uto32(src))
6968
6969 First, widen src to 32 bits if it is not already.
6970
6971 Postscript 15 Oct 04: it seems that at least VIA Nehemiah leaves the
6972 dst register unchanged when src == 0. Hence change accordingly.
6973 */
6974 if (sz == 2)
6975 assign( src32, unop(Iop_16Uto32, mkexpr(src)) );
6976 else
6977 assign( src32, mkexpr(src) );
6978
6979 /* The main computation, guarding against zero. */
6980 assign( dst32,
6981 IRExpr_ITE(
6982 mkexpr(srcB),
6983 /* src != 0 */
6984 fwds ? unop(Iop_Ctz32, mkexpr(src32))
6985 : binop(Iop_Sub32,
6986 mkU32(31),
6987 unop(Iop_Clz32, mkexpr(src32))),
6988 /* src == 0 -- leave dst unchanged */
6989 widenUto32( getIReg( sz, gregOfRM(modrm) ) )
6990 )
6991 );
6992
6993 if (sz == 2)
6994 assign( dst, unop(Iop_32to16, mkexpr(dst32)) );
6995 else
6996 assign( dst, mkexpr(dst32) );
6997
6998 /* dump result back */
6999 putIReg( sz, gregOfRM(modrm), mkexpr(dst) );
7000
7001 return delta;
7002 }
7003
7004
7005 static
codegen_xchg_eAX_Reg(Int sz,Int reg)7006 void codegen_xchg_eAX_Reg ( Int sz, Int reg )
7007 {
7008 IRType ty = szToITy(sz);
7009 IRTemp t1 = newTemp(ty);
7010 IRTemp t2 = newTemp(ty);
7011 vassert(sz == 2 || sz == 4);
7012 assign( t1, getIReg(sz, R_EAX) );
7013 assign( t2, getIReg(sz, reg) );
7014 putIReg( sz, R_EAX, mkexpr(t2) );
7015 putIReg( sz, reg, mkexpr(t1) );
7016 DIP("xchg%c %s, %s\n",
7017 nameISize(sz), nameIReg(sz, R_EAX), nameIReg(sz, reg));
7018 }
7019
7020
7021 static
codegen_SAHF(void)7022 void codegen_SAHF ( void )
7023 {
7024 /* Set the flags to:
7025 (x86g_calculate_flags_all() & X86G_CC_MASK_O) -- retain the old O flag
7026 | (%AH & (X86G_CC_MASK_S|X86G_CC_MASK_Z|X86G_CC_MASK_A
7027 |X86G_CC_MASK_P|X86G_CC_MASK_C)
7028 */
7029 UInt mask_SZACP = X86G_CC_MASK_S|X86G_CC_MASK_Z|X86G_CC_MASK_A
7030 |X86G_CC_MASK_C|X86G_CC_MASK_P;
7031 IRTemp oldflags = newTemp(Ity_I32);
7032 assign( oldflags, mk_x86g_calculate_eflags_all() );
7033 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
7034 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
7035 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
7036 stmt( IRStmt_Put( OFFB_CC_DEP1,
7037 binop(Iop_Or32,
7038 binop(Iop_And32, mkexpr(oldflags), mkU32(X86G_CC_MASK_O)),
7039 binop(Iop_And32,
7040 binop(Iop_Shr32, getIReg(4, R_EAX), mkU8(8)),
7041 mkU32(mask_SZACP))
7042 )
7043 ));
7044 /* Set NDEP even though it isn't used. This makes redundant-PUT
7045 elimination of previous stores to this field work better. */
7046 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
7047 }
7048
7049
7050 static
codegen_LAHF(void)7051 void codegen_LAHF ( void )
7052 {
7053 /* AH <- EFLAGS(SF:ZF:0:AF:0:PF:1:CF) */
7054 IRExpr* eax_with_hole;
7055 IRExpr* new_byte;
7056 IRExpr* new_eax;
7057 UInt mask_SZACP = X86G_CC_MASK_S|X86G_CC_MASK_Z|X86G_CC_MASK_A
7058 |X86G_CC_MASK_C|X86G_CC_MASK_P;
7059
7060 IRTemp flags = newTemp(Ity_I32);
7061 assign( flags, mk_x86g_calculate_eflags_all() );
7062
7063 eax_with_hole
7064 = binop(Iop_And32, getIReg(4, R_EAX), mkU32(0xFFFF00FF));
7065 new_byte
7066 = binop(Iop_Or32, binop(Iop_And32, mkexpr(flags), mkU32(mask_SZACP)),
7067 mkU32(1<<1));
7068 new_eax
7069 = binop(Iop_Or32, eax_with_hole,
7070 binop(Iop_Shl32, new_byte, mkU8(8)));
7071 putIReg(4, R_EAX, new_eax);
7072 }
7073
7074
7075 static
dis_cmpxchg_G_E(UChar sorb,Bool locked,Int size,Int delta0)7076 UInt dis_cmpxchg_G_E ( UChar sorb,
7077 Bool locked,
7078 Int size,
7079 Int delta0 )
7080 {
7081 HChar dis_buf[50];
7082 Int len;
7083
7084 IRType ty = szToITy(size);
7085 IRTemp acc = newTemp(ty);
7086 IRTemp src = newTemp(ty);
7087 IRTemp dest = newTemp(ty);
7088 IRTemp dest2 = newTemp(ty);
7089 IRTemp acc2 = newTemp(ty);
7090 IRTemp cond = newTemp(Ity_I1);
7091 IRTemp addr = IRTemp_INVALID;
7092 UChar rm = getUChar(delta0);
7093
7094 /* There are 3 cases to consider:
7095
7096 reg-reg: ignore any lock prefix, generate sequence based
7097 on ITE
7098
7099 reg-mem, not locked: ignore any lock prefix, generate sequence
7100 based on ITE
7101
7102 reg-mem, locked: use IRCAS
7103 */
7104 if (epartIsReg(rm)) {
7105 /* case 1 */
7106 assign( dest, getIReg(size, eregOfRM(rm)) );
7107 delta0++;
7108 assign( src, getIReg(size, gregOfRM(rm)) );
7109 assign( acc, getIReg(size, R_EAX) );
7110 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty);
7111 assign( cond, mk_x86g_calculate_condition(X86CondZ) );
7112 assign( dest2, IRExpr_ITE(mkexpr(cond), mkexpr(src), mkexpr(dest)) );
7113 assign( acc2, IRExpr_ITE(mkexpr(cond), mkexpr(acc), mkexpr(dest)) );
7114 putIReg(size, R_EAX, mkexpr(acc2));
7115 putIReg(size, eregOfRM(rm), mkexpr(dest2));
7116 DIP("cmpxchg%c %s,%s\n", nameISize(size),
7117 nameIReg(size,gregOfRM(rm)),
7118 nameIReg(size,eregOfRM(rm)) );
7119 }
7120 else if (!epartIsReg(rm) && !locked) {
7121 /* case 2 */
7122 addr = disAMode ( &len, sorb, delta0, dis_buf );
7123 assign( dest, loadLE(ty, mkexpr(addr)) );
7124 delta0 += len;
7125 assign( src, getIReg(size, gregOfRM(rm)) );
7126 assign( acc, getIReg(size, R_EAX) );
7127 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty);
7128 assign( cond, mk_x86g_calculate_condition(X86CondZ) );
7129 assign( dest2, IRExpr_ITE(mkexpr(cond), mkexpr(src), mkexpr(dest)) );
7130 assign( acc2, IRExpr_ITE(mkexpr(cond), mkexpr(acc), mkexpr(dest)) );
7131 putIReg(size, R_EAX, mkexpr(acc2));
7132 storeLE( mkexpr(addr), mkexpr(dest2) );
7133 DIP("cmpxchg%c %s,%s\n", nameISize(size),
7134 nameIReg(size,gregOfRM(rm)), dis_buf);
7135 }
7136 else if (!epartIsReg(rm) && locked) {
7137 /* case 3 */
7138 /* src is new value. acc is expected value. dest is old value.
7139 Compute success from the output of the IRCAS, and steer the
7140 new value for EAX accordingly: in case of success, EAX is
7141 unchanged. */
7142 addr = disAMode ( &len, sorb, delta0, dis_buf );
7143 delta0 += len;
7144 assign( src, getIReg(size, gregOfRM(rm)) );
7145 assign( acc, getIReg(size, R_EAX) );
7146 stmt( IRStmt_CAS(
7147 mkIRCAS( IRTemp_INVALID, dest, Iend_LE, mkexpr(addr),
7148 NULL, mkexpr(acc), NULL, mkexpr(src) )
7149 ));
7150 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty);
7151 assign( cond, mk_x86g_calculate_condition(X86CondZ) );
7152 assign( acc2, IRExpr_ITE(mkexpr(cond), mkexpr(acc), mkexpr(dest)) );
7153 putIReg(size, R_EAX, mkexpr(acc2));
7154 DIP("cmpxchg%c %s,%s\n", nameISize(size),
7155 nameIReg(size,gregOfRM(rm)), dis_buf);
7156 }
7157 else vassert(0);
7158
7159 return delta0;
7160 }
7161
7162
7163 /* Handle conditional move instructions of the form
7164 cmovcc E(reg-or-mem), G(reg)
7165
7166 E(src) is reg-or-mem
7167 G(dst) is reg.
7168
7169 If E is reg, --> GET %E, tmps
7170 GET %G, tmpd
7171 CMOVcc tmps, tmpd
7172 PUT tmpd, %G
7173
7174 If E is mem --> (getAddr E) -> tmpa
7175 LD (tmpa), tmps
7176 GET %G, tmpd
7177 CMOVcc tmps, tmpd
7178 PUT tmpd, %G
7179 */
7180 static
dis_cmov_E_G(UChar sorb,Int sz,X86Condcode cond,Int delta0)7181 UInt dis_cmov_E_G ( UChar sorb,
7182 Int sz,
7183 X86Condcode cond,
7184 Int delta0 )
7185 {
7186 UChar rm = getIByte(delta0);
7187 HChar dis_buf[50];
7188 Int len;
7189
7190 IRType ty = szToITy(sz);
7191 IRTemp tmps = newTemp(ty);
7192 IRTemp tmpd = newTemp(ty);
7193
7194 if (epartIsReg(rm)) {
7195 assign( tmps, getIReg(sz, eregOfRM(rm)) );
7196 assign( tmpd, getIReg(sz, gregOfRM(rm)) );
7197
7198 putIReg(sz, gregOfRM(rm),
7199 IRExpr_ITE( mk_x86g_calculate_condition(cond),
7200 mkexpr(tmps),
7201 mkexpr(tmpd) )
7202 );
7203 DIP("cmov%c%s %s,%s\n", nameISize(sz),
7204 name_X86Condcode(cond),
7205 nameIReg(sz,eregOfRM(rm)),
7206 nameIReg(sz,gregOfRM(rm)));
7207 return 1+delta0;
7208 }
7209
7210 /* E refers to memory */
7211 {
7212 IRTemp addr = disAMode ( &len, sorb, delta0, dis_buf );
7213 assign( tmps, loadLE(ty, mkexpr(addr)) );
7214 assign( tmpd, getIReg(sz, gregOfRM(rm)) );
7215
7216 putIReg(sz, gregOfRM(rm),
7217 IRExpr_ITE( mk_x86g_calculate_condition(cond),
7218 mkexpr(tmps),
7219 mkexpr(tmpd) )
7220 );
7221
7222 DIP("cmov%c%s %s,%s\n", nameISize(sz),
7223 name_X86Condcode(cond),
7224 dis_buf,
7225 nameIReg(sz,gregOfRM(rm)));
7226 return len+delta0;
7227 }
7228 }
7229
7230
7231 static
dis_xadd_G_E(UChar sorb,Bool locked,Int sz,Int delta0,Bool * decodeOK)7232 UInt dis_xadd_G_E ( UChar sorb, Bool locked, Int sz, Int delta0,
7233 Bool* decodeOK )
7234 {
7235 Int len;
7236 UChar rm = getIByte(delta0);
7237 HChar dis_buf[50];
7238
7239 IRType ty = szToITy(sz);
7240 IRTemp tmpd = newTemp(ty);
7241 IRTemp tmpt0 = newTemp(ty);
7242 IRTemp tmpt1 = newTemp(ty);
7243
7244 /* There are 3 cases to consider:
7245
7246 reg-reg: ignore any lock prefix,
7247 generate 'naive' (non-atomic) sequence
7248
7249 reg-mem, not locked: ignore any lock prefix, generate 'naive'
7250 (non-atomic) sequence
7251
7252 reg-mem, locked: use IRCAS
7253 */
7254
7255 if (epartIsReg(rm)) {
7256 /* case 1 */
7257 assign( tmpd, getIReg(sz, eregOfRM(rm)));
7258 assign( tmpt0, getIReg(sz, gregOfRM(rm)) );
7259 assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8),
7260 mkexpr(tmpd), mkexpr(tmpt0)) );
7261 setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty );
7262 putIReg(sz, eregOfRM(rm), mkexpr(tmpt1));
7263 putIReg(sz, gregOfRM(rm), mkexpr(tmpd));
7264 DIP("xadd%c %s, %s\n",
7265 nameISize(sz), nameIReg(sz,gregOfRM(rm)),
7266 nameIReg(sz,eregOfRM(rm)));
7267 *decodeOK = True;
7268 return 1+delta0;
7269 }
7270 else if (!epartIsReg(rm) && !locked) {
7271 /* case 2 */
7272 IRTemp addr = disAMode ( &len, sorb, delta0, dis_buf );
7273 assign( tmpd, loadLE(ty, mkexpr(addr)) );
7274 assign( tmpt0, getIReg(sz, gregOfRM(rm)) );
7275 assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8),
7276 mkexpr(tmpd), mkexpr(tmpt0)) );
7277 storeLE( mkexpr(addr), mkexpr(tmpt1) );
7278 setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty );
7279 putIReg(sz, gregOfRM(rm), mkexpr(tmpd));
7280 DIP("xadd%c %s, %s\n",
7281 nameISize(sz), nameIReg(sz,gregOfRM(rm)), dis_buf);
7282 *decodeOK = True;
7283 return len+delta0;
7284 }
7285 else if (!epartIsReg(rm) && locked) {
7286 /* case 3 */
7287 IRTemp addr = disAMode ( &len, sorb, delta0, dis_buf );
7288 assign( tmpd, loadLE(ty, mkexpr(addr)) );
7289 assign( tmpt0, getIReg(sz, gregOfRM(rm)) );
7290 assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8),
7291 mkexpr(tmpd), mkexpr(tmpt0)) );
7292 casLE( mkexpr(addr), mkexpr(tmpd)/*expVal*/,
7293 mkexpr(tmpt1)/*newVal*/, guest_EIP_curr_instr );
7294 setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty );
7295 putIReg(sz, gregOfRM(rm), mkexpr(tmpd));
7296 DIP("xadd%c %s, %s\n",
7297 nameISize(sz), nameIReg(sz,gregOfRM(rm)), dis_buf);
7298 *decodeOK = True;
7299 return len+delta0;
7300 }
7301 /*UNREACHED*/
7302 vassert(0);
7303 }
7304
7305 /* Move 16 bits from Ew (ireg or mem) to G (a segment register). */
7306
7307 static
dis_mov_Ew_Sw(UChar sorb,Int delta0)7308 UInt dis_mov_Ew_Sw ( UChar sorb, Int delta0 )
7309 {
7310 Int len;
7311 IRTemp addr;
7312 UChar rm = getIByte(delta0);
7313 HChar dis_buf[50];
7314
7315 if (epartIsReg(rm)) {
7316 putSReg( gregOfRM(rm), getIReg(2, eregOfRM(rm)) );
7317 DIP("movw %s,%s\n", nameIReg(2,eregOfRM(rm)), nameSReg(gregOfRM(rm)));
7318 return 1+delta0;
7319 } else {
7320 addr = disAMode ( &len, sorb, delta0, dis_buf );
7321 putSReg( gregOfRM(rm), loadLE(Ity_I16, mkexpr(addr)) );
7322 DIP("movw %s,%s\n", dis_buf, nameSReg(gregOfRM(rm)));
7323 return len+delta0;
7324 }
7325 }
7326
7327 /* Move 16 bits from G (a segment register) to Ew (ireg or mem). If
7328 dst is ireg and sz==4, zero out top half of it. */
7329
7330 static
dis_mov_Sw_Ew(UChar sorb,Int sz,Int delta0)7331 UInt dis_mov_Sw_Ew ( UChar sorb,
7332 Int sz,
7333 Int delta0 )
7334 {
7335 Int len;
7336 IRTemp addr;
7337 UChar rm = getIByte(delta0);
7338 HChar dis_buf[50];
7339
7340 vassert(sz == 2 || sz == 4);
7341
7342 if (epartIsReg(rm)) {
7343 if (sz == 4)
7344 putIReg(4, eregOfRM(rm), unop(Iop_16Uto32, getSReg(gregOfRM(rm))));
7345 else
7346 putIReg(2, eregOfRM(rm), getSReg(gregOfRM(rm)));
7347
7348 DIP("mov %s,%s\n", nameSReg(gregOfRM(rm)), nameIReg(sz,eregOfRM(rm)));
7349 return 1+delta0;
7350 } else {
7351 addr = disAMode ( &len, sorb, delta0, dis_buf );
7352 storeLE( mkexpr(addr), getSReg(gregOfRM(rm)) );
7353 DIP("mov %s,%s\n", nameSReg(gregOfRM(rm)), dis_buf);
7354 return len+delta0;
7355 }
7356 }
7357
7358
7359 static
dis_push_segreg(UInt sreg,Int sz)7360 void dis_push_segreg ( UInt sreg, Int sz )
7361 {
7362 IRTemp t1 = newTemp(Ity_I16);
7363 IRTemp ta = newTemp(Ity_I32);
7364 vassert(sz == 2 || sz == 4);
7365
7366 assign( t1, getSReg(sreg) );
7367 assign( ta, binop(Iop_Sub32, getIReg(4, R_ESP), mkU32(sz)) );
7368 putIReg(4, R_ESP, mkexpr(ta));
7369 storeLE( mkexpr(ta), mkexpr(t1) );
7370
7371 DIP("push%c %s\n", sz==2 ? 'w' : 'l', nameSReg(sreg));
7372 }
7373
7374 static
dis_pop_segreg(UInt sreg,Int sz)7375 void dis_pop_segreg ( UInt sreg, Int sz )
7376 {
7377 IRTemp t1 = newTemp(Ity_I16);
7378 IRTemp ta = newTemp(Ity_I32);
7379 vassert(sz == 2 || sz == 4);
7380
7381 assign( ta, getIReg(4, R_ESP) );
7382 assign( t1, loadLE(Ity_I16, mkexpr(ta)) );
7383
7384 putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(ta), mkU32(sz)) );
7385 putSReg( sreg, mkexpr(t1) );
7386 DIP("pop%c %s\n", sz==2 ? 'w' : 'l', nameSReg(sreg));
7387 }
7388
7389 static
dis_ret(DisResult * dres,UInt d32)7390 void dis_ret ( /*MOD*/DisResult* dres, UInt d32 )
7391 {
7392 IRTemp t1 = newTemp(Ity_I32);
7393 IRTemp t2 = newTemp(Ity_I32);
7394 assign(t1, getIReg(4,R_ESP));
7395 assign(t2, loadLE(Ity_I32,mkexpr(t1)));
7396 putIReg(4, R_ESP,binop(Iop_Add32, mkexpr(t1), mkU32(4+d32)));
7397 jmp_treg(dres, Ijk_Ret, t2);
7398 vassert(dres->whatNext == Dis_StopHere);
7399 }
7400
7401 /*------------------------------------------------------------*/
7402 /*--- SSE/SSE2/SSE3 helpers ---*/
7403 /*------------------------------------------------------------*/
7404
7405 /* Indicates whether the op requires a rounding-mode argument. Note
7406 that this covers only vector floating point arithmetic ops, and
7407 omits the scalar ones that need rounding modes. Note also that
7408 inconsistencies here will get picked up later by the IR sanity
7409 checker, so this isn't correctness-critical. */
requiresRMode(IROp op)7410 static Bool requiresRMode ( IROp op )
7411 {
7412 switch (op) {
7413 /* 128 bit ops */
7414 case Iop_Add32Fx4: case Iop_Sub32Fx4:
7415 case Iop_Mul32Fx4: case Iop_Div32Fx4:
7416 case Iop_Add64Fx2: case Iop_Sub64Fx2:
7417 case Iop_Mul64Fx2: case Iop_Div64Fx2:
7418 return True;
7419 default:
7420 break;
7421 }
7422 return False;
7423 }
7424
7425
7426 /* Worker function; do not call directly.
7427 Handles full width G = G `op` E and G = (not G) `op` E.
7428 */
7429
dis_SSE_E_to_G_all_wrk(UChar sorb,Int delta,const HChar * opname,IROp op,Bool invertG)7430 static UInt dis_SSE_E_to_G_all_wrk (
7431 UChar sorb, Int delta,
7432 const HChar* opname, IROp op,
7433 Bool invertG
7434 )
7435 {
7436 HChar dis_buf[50];
7437 Int alen;
7438 IRTemp addr;
7439 UChar rm = getIByte(delta);
7440 IRExpr* gpart
7441 = invertG ? unop(Iop_NotV128, getXMMReg(gregOfRM(rm)))
7442 : getXMMReg(gregOfRM(rm));
7443 if (epartIsReg(rm)) {
7444 putXMMReg(
7445 gregOfRM(rm),
7446 requiresRMode(op)
7447 ? triop(op, get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
7448 gpart,
7449 getXMMReg(eregOfRM(rm)))
7450 : binop(op, gpart,
7451 getXMMReg(eregOfRM(rm)))
7452 );
7453 DIP("%s %s,%s\n", opname,
7454 nameXMMReg(eregOfRM(rm)),
7455 nameXMMReg(gregOfRM(rm)) );
7456 return delta+1;
7457 } else {
7458 addr = disAMode ( &alen, sorb, delta, dis_buf );
7459 putXMMReg(
7460 gregOfRM(rm),
7461 requiresRMode(op)
7462 ? triop(op, get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
7463 gpart,
7464 loadLE(Ity_V128, mkexpr(addr)))
7465 : binop(op, gpart,
7466 loadLE(Ity_V128, mkexpr(addr)))
7467 );
7468 DIP("%s %s,%s\n", opname,
7469 dis_buf,
7470 nameXMMReg(gregOfRM(rm)) );
7471 return delta+alen;
7472 }
7473 }
7474
7475
7476 /* All lanes SSE binary operation, G = G `op` E. */
7477
7478 static
dis_SSE_E_to_G_all(UChar sorb,Int delta,const HChar * opname,IROp op)7479 UInt dis_SSE_E_to_G_all ( UChar sorb, Int delta, const HChar* opname, IROp op )
7480 {
7481 return dis_SSE_E_to_G_all_wrk( sorb, delta, opname, op, False );
7482 }
7483
7484 /* All lanes SSE binary operation, G = (not G) `op` E. */
7485
7486 static
dis_SSE_E_to_G_all_invG(UChar sorb,Int delta,const HChar * opname,IROp op)7487 UInt dis_SSE_E_to_G_all_invG ( UChar sorb, Int delta,
7488 const HChar* opname, IROp op )
7489 {
7490 return dis_SSE_E_to_G_all_wrk( sorb, delta, opname, op, True );
7491 }
7492
7493
7494 /* Lowest 32-bit lane only SSE binary operation, G = G `op` E. */
7495
dis_SSE_E_to_G_lo32(UChar sorb,Int delta,const HChar * opname,IROp op)7496 static UInt dis_SSE_E_to_G_lo32 ( UChar sorb, Int delta,
7497 const HChar* opname, IROp op )
7498 {
7499 HChar dis_buf[50];
7500 Int alen;
7501 IRTemp addr;
7502 UChar rm = getIByte(delta);
7503 IRExpr* gpart = getXMMReg(gregOfRM(rm));
7504 if (epartIsReg(rm)) {
7505 putXMMReg( gregOfRM(rm),
7506 binop(op, gpart,
7507 getXMMReg(eregOfRM(rm))) );
7508 DIP("%s %s,%s\n", opname,
7509 nameXMMReg(eregOfRM(rm)),
7510 nameXMMReg(gregOfRM(rm)) );
7511 return delta+1;
7512 } else {
7513 /* We can only do a 32-bit memory read, so the upper 3/4 of the
7514 E operand needs to be made simply of zeroes. */
7515 IRTemp epart = newTemp(Ity_V128);
7516 addr = disAMode ( &alen, sorb, delta, dis_buf );
7517 assign( epart, unop( Iop_32UtoV128,
7518 loadLE(Ity_I32, mkexpr(addr))) );
7519 putXMMReg( gregOfRM(rm),
7520 binop(op, gpart, mkexpr(epart)) );
7521 DIP("%s %s,%s\n", opname,
7522 dis_buf,
7523 nameXMMReg(gregOfRM(rm)) );
7524 return delta+alen;
7525 }
7526 }
7527
7528
7529 /* Lower 64-bit lane only SSE binary operation, G = G `op` E. */
7530
dis_SSE_E_to_G_lo64(UChar sorb,Int delta,const HChar * opname,IROp op)7531 static UInt dis_SSE_E_to_G_lo64 ( UChar sorb, Int delta,
7532 const HChar* opname, IROp op )
7533 {
7534 HChar dis_buf[50];
7535 Int alen;
7536 IRTemp addr;
7537 UChar rm = getIByte(delta);
7538 IRExpr* gpart = getXMMReg(gregOfRM(rm));
7539 if (epartIsReg(rm)) {
7540 putXMMReg( gregOfRM(rm),
7541 binop(op, gpart,
7542 getXMMReg(eregOfRM(rm))) );
7543 DIP("%s %s,%s\n", opname,
7544 nameXMMReg(eregOfRM(rm)),
7545 nameXMMReg(gregOfRM(rm)) );
7546 return delta+1;
7547 } else {
7548 /* We can only do a 64-bit memory read, so the upper half of the
7549 E operand needs to be made simply of zeroes. */
7550 IRTemp epart = newTemp(Ity_V128);
7551 addr = disAMode ( &alen, sorb, delta, dis_buf );
7552 assign( epart, unop( Iop_64UtoV128,
7553 loadLE(Ity_I64, mkexpr(addr))) );
7554 putXMMReg( gregOfRM(rm),
7555 binop(op, gpart, mkexpr(epart)) );
7556 DIP("%s %s,%s\n", opname,
7557 dis_buf,
7558 nameXMMReg(gregOfRM(rm)) );
7559 return delta+alen;
7560 }
7561 }
7562
7563
7564 /* All lanes unary SSE operation, G = op(E). */
7565
dis_SSE_E_to_G_unary_all(UChar sorb,Int delta,const HChar * opname,IROp op)7566 static UInt dis_SSE_E_to_G_unary_all (
7567 UChar sorb, Int delta,
7568 const HChar* opname, IROp op
7569 )
7570 {
7571 HChar dis_buf[50];
7572 Int alen;
7573 IRTemp addr;
7574 UChar rm = getIByte(delta);
7575 // Sqrt32Fx4 and Sqrt64Fx2 take a rounding mode, which is faked
7576 // up in the usual way.
7577 Bool needsIRRM = op == Iop_Sqrt32Fx4 || op == Iop_Sqrt64Fx2;
7578 if (epartIsReg(rm)) {
7579 IRExpr* src = getXMMReg(eregOfRM(rm));
7580 /* XXXROUNDINGFIXME */
7581 IRExpr* res = needsIRRM ? binop(op, get_FAKE_roundingmode(), src)
7582 : unop(op, src);
7583 putXMMReg( gregOfRM(rm), res );
7584 DIP("%s %s,%s\n", opname,
7585 nameXMMReg(eregOfRM(rm)),
7586 nameXMMReg(gregOfRM(rm)) );
7587 return delta+1;
7588 } else {
7589 addr = disAMode ( &alen, sorb, delta, dis_buf );
7590 IRExpr* src = loadLE(Ity_V128, mkexpr(addr));
7591 /* XXXROUNDINGFIXME */
7592 IRExpr* res = needsIRRM ? binop(op, get_FAKE_roundingmode(), src)
7593 : unop(op, src);
7594 putXMMReg( gregOfRM(rm), res );
7595 DIP("%s %s,%s\n", opname,
7596 dis_buf,
7597 nameXMMReg(gregOfRM(rm)) );
7598 return delta+alen;
7599 }
7600 }
7601
7602
7603 /* Lowest 32-bit lane only unary SSE operation, G = op(E). */
7604
dis_SSE_E_to_G_unary_lo32(UChar sorb,Int delta,const HChar * opname,IROp op)7605 static UInt dis_SSE_E_to_G_unary_lo32 (
7606 UChar sorb, Int delta,
7607 const HChar* opname, IROp op
7608 )
7609 {
7610 /* First we need to get the old G value and patch the low 32 bits
7611 of the E operand into it. Then apply op and write back to G. */
7612 HChar dis_buf[50];
7613 Int alen;
7614 IRTemp addr;
7615 UChar rm = getIByte(delta);
7616 IRTemp oldG0 = newTemp(Ity_V128);
7617 IRTemp oldG1 = newTemp(Ity_V128);
7618
7619 assign( oldG0, getXMMReg(gregOfRM(rm)) );
7620
7621 if (epartIsReg(rm)) {
7622 assign( oldG1,
7623 binop( Iop_SetV128lo32,
7624 mkexpr(oldG0),
7625 getXMMRegLane32(eregOfRM(rm), 0)) );
7626 putXMMReg( gregOfRM(rm), unop(op, mkexpr(oldG1)) );
7627 DIP("%s %s,%s\n", opname,
7628 nameXMMReg(eregOfRM(rm)),
7629 nameXMMReg(gregOfRM(rm)) );
7630 return delta+1;
7631 } else {
7632 addr = disAMode ( &alen, sorb, delta, dis_buf );
7633 assign( oldG1,
7634 binop( Iop_SetV128lo32,
7635 mkexpr(oldG0),
7636 loadLE(Ity_I32, mkexpr(addr)) ));
7637 putXMMReg( gregOfRM(rm), unop(op, mkexpr(oldG1)) );
7638 DIP("%s %s,%s\n", opname,
7639 dis_buf,
7640 nameXMMReg(gregOfRM(rm)) );
7641 return delta+alen;
7642 }
7643 }
7644
7645
7646 /* Lowest 64-bit lane only unary SSE operation, G = op(E). */
7647
dis_SSE_E_to_G_unary_lo64(UChar sorb,Int delta,const HChar * opname,IROp op)7648 static UInt dis_SSE_E_to_G_unary_lo64 (
7649 UChar sorb, Int delta,
7650 const HChar* opname, IROp op
7651 )
7652 {
7653 /* First we need to get the old G value and patch the low 64 bits
7654 of the E operand into it. Then apply op and write back to G. */
7655 HChar dis_buf[50];
7656 Int alen;
7657 IRTemp addr;
7658 UChar rm = getIByte(delta);
7659 IRTemp oldG0 = newTemp(Ity_V128);
7660 IRTemp oldG1 = newTemp(Ity_V128);
7661
7662 assign( oldG0, getXMMReg(gregOfRM(rm)) );
7663
7664 if (epartIsReg(rm)) {
7665 assign( oldG1,
7666 binop( Iop_SetV128lo64,
7667 mkexpr(oldG0),
7668 getXMMRegLane64(eregOfRM(rm), 0)) );
7669 putXMMReg( gregOfRM(rm), unop(op, mkexpr(oldG1)) );
7670 DIP("%s %s,%s\n", opname,
7671 nameXMMReg(eregOfRM(rm)),
7672 nameXMMReg(gregOfRM(rm)) );
7673 return delta+1;
7674 } else {
7675 addr = disAMode ( &alen, sorb, delta, dis_buf );
7676 assign( oldG1,
7677 binop( Iop_SetV128lo64,
7678 mkexpr(oldG0),
7679 loadLE(Ity_I64, mkexpr(addr)) ));
7680 putXMMReg( gregOfRM(rm), unop(op, mkexpr(oldG1)) );
7681 DIP("%s %s,%s\n", opname,
7682 dis_buf,
7683 nameXMMReg(gregOfRM(rm)) );
7684 return delta+alen;
7685 }
7686 }
7687
7688
7689 /* SSE integer binary operation:
7690 G = G `op` E (eLeft == False)
7691 G = E `op` G (eLeft == True)
7692 */
dis_SSEint_E_to_G(UChar sorb,Int delta,const HChar * opname,IROp op,Bool eLeft)7693 static UInt dis_SSEint_E_to_G(
7694 UChar sorb, Int delta,
7695 const HChar* opname, IROp op,
7696 Bool eLeft
7697 )
7698 {
7699 HChar dis_buf[50];
7700 Int alen;
7701 IRTemp addr;
7702 UChar rm = getIByte(delta);
7703 IRExpr* gpart = getXMMReg(gregOfRM(rm));
7704 IRExpr* epart = NULL;
7705 if (epartIsReg(rm)) {
7706 epart = getXMMReg(eregOfRM(rm));
7707 DIP("%s %s,%s\n", opname,
7708 nameXMMReg(eregOfRM(rm)),
7709 nameXMMReg(gregOfRM(rm)) );
7710 delta += 1;
7711 } else {
7712 addr = disAMode ( &alen, sorb, delta, dis_buf );
7713 epart = loadLE(Ity_V128, mkexpr(addr));
7714 DIP("%s %s,%s\n", opname,
7715 dis_buf,
7716 nameXMMReg(gregOfRM(rm)) );
7717 delta += alen;
7718 }
7719 putXMMReg( gregOfRM(rm),
7720 eLeft ? binop(op, epart, gpart)
7721 : binop(op, gpart, epart) );
7722 return delta;
7723 }
7724
7725
7726 /* Helper for doing SSE FP comparisons. */
7727
findSSECmpOp(Bool * needNot,IROp * op,Int imm8,Bool all_lanes,Int sz)7728 static void findSSECmpOp ( Bool* needNot, IROp* op,
7729 Int imm8, Bool all_lanes, Int sz )
7730 {
7731 imm8 &= 7;
7732 *needNot = False;
7733 *op = Iop_INVALID;
7734 if (imm8 >= 4) {
7735 *needNot = True;
7736 imm8 -= 4;
7737 }
7738
7739 if (sz == 4 && all_lanes) {
7740 switch (imm8) {
7741 case 0: *op = Iop_CmpEQ32Fx4; return;
7742 case 1: *op = Iop_CmpLT32Fx4; return;
7743 case 2: *op = Iop_CmpLE32Fx4; return;
7744 case 3: *op = Iop_CmpUN32Fx4; return;
7745 default: break;
7746 }
7747 }
7748 if (sz == 4 && !all_lanes) {
7749 switch (imm8) {
7750 case 0: *op = Iop_CmpEQ32F0x4; return;
7751 case 1: *op = Iop_CmpLT32F0x4; return;
7752 case 2: *op = Iop_CmpLE32F0x4; return;
7753 case 3: *op = Iop_CmpUN32F0x4; return;
7754 default: break;
7755 }
7756 }
7757 if (sz == 8 && all_lanes) {
7758 switch (imm8) {
7759 case 0: *op = Iop_CmpEQ64Fx2; return;
7760 case 1: *op = Iop_CmpLT64Fx2; return;
7761 case 2: *op = Iop_CmpLE64Fx2; return;
7762 case 3: *op = Iop_CmpUN64Fx2; return;
7763 default: break;
7764 }
7765 }
7766 if (sz == 8 && !all_lanes) {
7767 switch (imm8) {
7768 case 0: *op = Iop_CmpEQ64F0x2; return;
7769 case 1: *op = Iop_CmpLT64F0x2; return;
7770 case 2: *op = Iop_CmpLE64F0x2; return;
7771 case 3: *op = Iop_CmpUN64F0x2; return;
7772 default: break;
7773 }
7774 }
7775 vpanic("findSSECmpOp(x86,guest)");
7776 }
7777
7778 /* Handles SSE 32F/64F comparisons. */
7779
dis_SSEcmp_E_to_G(UChar sorb,Int delta,const HChar * opname,Bool all_lanes,Int sz)7780 static UInt dis_SSEcmp_E_to_G ( UChar sorb, Int delta,
7781 const HChar* opname, Bool all_lanes, Int sz )
7782 {
7783 HChar dis_buf[50];
7784 Int alen, imm8;
7785 IRTemp addr;
7786 Bool needNot = False;
7787 IROp op = Iop_INVALID;
7788 IRTemp plain = newTemp(Ity_V128);
7789 UChar rm = getIByte(delta);
7790 UShort mask = 0;
7791 vassert(sz == 4 || sz == 8);
7792 if (epartIsReg(rm)) {
7793 imm8 = getIByte(delta+1);
7794 findSSECmpOp(&needNot, &op, imm8, all_lanes, sz);
7795 assign( plain, binop(op, getXMMReg(gregOfRM(rm)),
7796 getXMMReg(eregOfRM(rm))) );
7797 delta += 2;
7798 DIP("%s $%d,%s,%s\n", opname,
7799 imm8,
7800 nameXMMReg(eregOfRM(rm)),
7801 nameXMMReg(gregOfRM(rm)) );
7802 } else {
7803 addr = disAMode ( &alen, sorb, delta, dis_buf );
7804 imm8 = getIByte(delta+alen);
7805 findSSECmpOp(&needNot, &op, imm8, all_lanes, sz);
7806 assign( plain,
7807 binop(
7808 op,
7809 getXMMReg(gregOfRM(rm)),
7810 all_lanes ? loadLE(Ity_V128, mkexpr(addr))
7811 : sz == 8 ? unop( Iop_64UtoV128, loadLE(Ity_I64, mkexpr(addr)))
7812 : /*sz==4*/ unop( Iop_32UtoV128, loadLE(Ity_I32, mkexpr(addr)))
7813 )
7814 );
7815 delta += alen+1;
7816 DIP("%s $%d,%s,%s\n", opname,
7817 imm8,
7818 dis_buf,
7819 nameXMMReg(gregOfRM(rm)) );
7820 }
7821
7822 if (needNot && all_lanes) {
7823 putXMMReg( gregOfRM(rm),
7824 unop(Iop_NotV128, mkexpr(plain)) );
7825 }
7826 else
7827 if (needNot && !all_lanes) {
7828 mask = toUShort( sz==4 ? 0x000F : 0x00FF );
7829 putXMMReg( gregOfRM(rm),
7830 binop(Iop_XorV128, mkexpr(plain), mkV128(mask)) );
7831 }
7832 else {
7833 putXMMReg( gregOfRM(rm), mkexpr(plain) );
7834 }
7835
7836 return delta;
7837 }
7838
7839
7840 /* Vector by scalar shift of G by the amount specified at the bottom
7841 of E. */
7842
dis_SSE_shiftG_byE(UChar sorb,Int delta,const HChar * opname,IROp op)7843 static UInt dis_SSE_shiftG_byE ( UChar sorb, Int delta,
7844 const HChar* opname, IROp op )
7845 {
7846 HChar dis_buf[50];
7847 Int alen, size;
7848 IRTemp addr;
7849 Bool shl, shr, sar;
7850 UChar rm = getIByte(delta);
7851 IRTemp g0 = newTemp(Ity_V128);
7852 IRTemp g1 = newTemp(Ity_V128);
7853 IRTemp amt = newTemp(Ity_I32);
7854 IRTemp amt8 = newTemp(Ity_I8);
7855 if (epartIsReg(rm)) {
7856 assign( amt, getXMMRegLane32(eregOfRM(rm), 0) );
7857 DIP("%s %s,%s\n", opname,
7858 nameXMMReg(eregOfRM(rm)),
7859 nameXMMReg(gregOfRM(rm)) );
7860 delta++;
7861 } else {
7862 addr = disAMode ( &alen, sorb, delta, dis_buf );
7863 assign( amt, loadLE(Ity_I32, mkexpr(addr)) );
7864 DIP("%s %s,%s\n", opname,
7865 dis_buf,
7866 nameXMMReg(gregOfRM(rm)) );
7867 delta += alen;
7868 }
7869 assign( g0, getXMMReg(gregOfRM(rm)) );
7870 assign( amt8, unop(Iop_32to8, mkexpr(amt)) );
7871
7872 shl = shr = sar = False;
7873 size = 0;
7874 switch (op) {
7875 case Iop_ShlN16x8: shl = True; size = 32; break;
7876 case Iop_ShlN32x4: shl = True; size = 32; break;
7877 case Iop_ShlN64x2: shl = True; size = 64; break;
7878 case Iop_SarN16x8: sar = True; size = 16; break;
7879 case Iop_SarN32x4: sar = True; size = 32; break;
7880 case Iop_ShrN16x8: shr = True; size = 16; break;
7881 case Iop_ShrN32x4: shr = True; size = 32; break;
7882 case Iop_ShrN64x2: shr = True; size = 64; break;
7883 default: vassert(0);
7884 }
7885
7886 if (shl || shr) {
7887 assign(
7888 g1,
7889 IRExpr_ITE(
7890 binop(Iop_CmpLT32U,mkexpr(amt),mkU32(size)),
7891 binop(op, mkexpr(g0), mkexpr(amt8)),
7892 mkV128(0x0000)
7893 )
7894 );
7895 } else
7896 if (sar) {
7897 assign(
7898 g1,
7899 IRExpr_ITE(
7900 binop(Iop_CmpLT32U,mkexpr(amt),mkU32(size)),
7901 binop(op, mkexpr(g0), mkexpr(amt8)),
7902 binop(op, mkexpr(g0), mkU8(size-1))
7903 )
7904 );
7905 } else {
7906 /*NOTREACHED*/
7907 vassert(0);
7908 }
7909
7910 putXMMReg( gregOfRM(rm), mkexpr(g1) );
7911 return delta;
7912 }
7913
7914
7915 /* Vector by scalar shift of E by an immediate byte. */
7916
7917 static
dis_SSE_shiftE_imm(Int delta,const HChar * opname,IROp op)7918 UInt dis_SSE_shiftE_imm ( Int delta, const HChar* opname, IROp op )
7919 {
7920 Bool shl, shr, sar;
7921 UChar rm = getIByte(delta);
7922 IRTemp e0 = newTemp(Ity_V128);
7923 IRTemp e1 = newTemp(Ity_V128);
7924 UChar amt, size;
7925 vassert(epartIsReg(rm));
7926 vassert(gregOfRM(rm) == 2
7927 || gregOfRM(rm) == 4 || gregOfRM(rm) == 6);
7928 amt = getIByte(delta+1);
7929 delta += 2;
7930 DIP("%s $%d,%s\n", opname,
7931 (Int)amt,
7932 nameXMMReg(eregOfRM(rm)) );
7933 assign( e0, getXMMReg(eregOfRM(rm)) );
7934
7935 shl = shr = sar = False;
7936 size = 0;
7937 switch (op) {
7938 case Iop_ShlN16x8: shl = True; size = 16; break;
7939 case Iop_ShlN32x4: shl = True; size = 32; break;
7940 case Iop_ShlN64x2: shl = True; size = 64; break;
7941 case Iop_SarN16x8: sar = True; size = 16; break;
7942 case Iop_SarN32x4: sar = True; size = 32; break;
7943 case Iop_ShrN16x8: shr = True; size = 16; break;
7944 case Iop_ShrN32x4: shr = True; size = 32; break;
7945 case Iop_ShrN64x2: shr = True; size = 64; break;
7946 default: vassert(0);
7947 }
7948
7949 if (shl || shr) {
7950 assign( e1, amt >= size
7951 ? mkV128(0x0000)
7952 : binop(op, mkexpr(e0), mkU8(amt))
7953 );
7954 } else
7955 if (sar) {
7956 assign( e1, amt >= size
7957 ? binop(op, mkexpr(e0), mkU8(size-1))
7958 : binop(op, mkexpr(e0), mkU8(amt))
7959 );
7960 } else {
7961 /*NOTREACHED*/
7962 vassert(0);
7963 }
7964
7965 putXMMReg( eregOfRM(rm), mkexpr(e1) );
7966 return delta;
7967 }
7968
7969
7970 /* Get the current SSE rounding mode. */
7971
get_sse_roundingmode(void)7972 static IRExpr* /* :: Ity_I32 */ get_sse_roundingmode ( void )
7973 {
7974 return binop( Iop_And32,
7975 IRExpr_Get( OFFB_SSEROUND, Ity_I32 ),
7976 mkU32(3) );
7977 }
7978
put_sse_roundingmode(IRExpr * sseround)7979 static void put_sse_roundingmode ( IRExpr* sseround )
7980 {
7981 vassert(typeOfIRExpr(irsb->tyenv, sseround) == Ity_I32);
7982 stmt( IRStmt_Put( OFFB_SSEROUND, sseround ) );
7983 }
7984
7985 /* Break a 128-bit value up into four 32-bit ints. */
7986
breakup128to32s(IRTemp t128,IRTemp * t3,IRTemp * t2,IRTemp * t1,IRTemp * t0)7987 static void breakup128to32s ( IRTemp t128,
7988 /*OUTs*/
7989 IRTemp* t3, IRTemp* t2,
7990 IRTemp* t1, IRTemp* t0 )
7991 {
7992 IRTemp hi64 = newTemp(Ity_I64);
7993 IRTemp lo64 = newTemp(Ity_I64);
7994 assign( hi64, unop(Iop_V128HIto64, mkexpr(t128)) );
7995 assign( lo64, unop(Iop_V128to64, mkexpr(t128)) );
7996
7997 vassert(t0 && *t0 == IRTemp_INVALID);
7998 vassert(t1 && *t1 == IRTemp_INVALID);
7999 vassert(t2 && *t2 == IRTemp_INVALID);
8000 vassert(t3 && *t3 == IRTemp_INVALID);
8001
8002 *t0 = newTemp(Ity_I32);
8003 *t1 = newTemp(Ity_I32);
8004 *t2 = newTemp(Ity_I32);
8005 *t3 = newTemp(Ity_I32);
8006 assign( *t0, unop(Iop_64to32, mkexpr(lo64)) );
8007 assign( *t1, unop(Iop_64HIto32, mkexpr(lo64)) );
8008 assign( *t2, unop(Iop_64to32, mkexpr(hi64)) );
8009 assign( *t3, unop(Iop_64HIto32, mkexpr(hi64)) );
8010 }
8011
8012 /* Construct a 128-bit value from four 32-bit ints. */
8013
mk128from32s(IRTemp t3,IRTemp t2,IRTemp t1,IRTemp t0)8014 static IRExpr* mk128from32s ( IRTemp t3, IRTemp t2,
8015 IRTemp t1, IRTemp t0 )
8016 {
8017 return
8018 binop( Iop_64HLtoV128,
8019 binop(Iop_32HLto64, mkexpr(t3), mkexpr(t2)),
8020 binop(Iop_32HLto64, mkexpr(t1), mkexpr(t0))
8021 );
8022 }
8023
8024 /* Break a 64-bit value up into four 16-bit ints. */
8025
breakup64to16s(IRTemp t64,IRTemp * t3,IRTemp * t2,IRTemp * t1,IRTemp * t0)8026 static void breakup64to16s ( IRTemp t64,
8027 /*OUTs*/
8028 IRTemp* t3, IRTemp* t2,
8029 IRTemp* t1, IRTemp* t0 )
8030 {
8031 IRTemp hi32 = newTemp(Ity_I32);
8032 IRTemp lo32 = newTemp(Ity_I32);
8033 assign( hi32, unop(Iop_64HIto32, mkexpr(t64)) );
8034 assign( lo32, unop(Iop_64to32, mkexpr(t64)) );
8035
8036 vassert(t0 && *t0 == IRTemp_INVALID);
8037 vassert(t1 && *t1 == IRTemp_INVALID);
8038 vassert(t2 && *t2 == IRTemp_INVALID);
8039 vassert(t3 && *t3 == IRTemp_INVALID);
8040
8041 *t0 = newTemp(Ity_I16);
8042 *t1 = newTemp(Ity_I16);
8043 *t2 = newTemp(Ity_I16);
8044 *t3 = newTemp(Ity_I16);
8045 assign( *t0, unop(Iop_32to16, mkexpr(lo32)) );
8046 assign( *t1, unop(Iop_32HIto16, mkexpr(lo32)) );
8047 assign( *t2, unop(Iop_32to16, mkexpr(hi32)) );
8048 assign( *t3, unop(Iop_32HIto16, mkexpr(hi32)) );
8049 }
8050
8051 /* Construct a 64-bit value from four 16-bit ints. */
8052
mk64from16s(IRTemp t3,IRTemp t2,IRTemp t1,IRTemp t0)8053 static IRExpr* mk64from16s ( IRTemp t3, IRTemp t2,
8054 IRTemp t1, IRTemp t0 )
8055 {
8056 return
8057 binop( Iop_32HLto64,
8058 binop(Iop_16HLto32, mkexpr(t3), mkexpr(t2)),
8059 binop(Iop_16HLto32, mkexpr(t1), mkexpr(t0))
8060 );
8061 }
8062
8063 /* Generate IR to set the guest %EFLAGS from the pushfl-format image
8064 in the given 32-bit temporary. The flags that are set are: O S Z A
8065 C P D ID AC.
8066
8067 In all cases, code to set AC is generated. However, VEX actually
8068 ignores the AC value and so can optionally emit an emulation
8069 warning when it is enabled. In this routine, an emulation warning
8070 is only emitted if emit_AC_emwarn is True, in which case
8071 next_insn_EIP must be correct (this allows for correct code
8072 generation for popfl/popfw). If emit_AC_emwarn is False,
8073 next_insn_EIP is unimportant (this allows for easy if kludgey code
8074 generation for IRET.) */
8075
8076 static
set_EFLAGS_from_value(IRTemp t1,Bool emit_AC_emwarn,Addr32 next_insn_EIP)8077 void set_EFLAGS_from_value ( IRTemp t1,
8078 Bool emit_AC_emwarn,
8079 Addr32 next_insn_EIP )
8080 {
8081 vassert(typeOfIRTemp(irsb->tyenv,t1) == Ity_I32);
8082
8083 /* t1 is the flag word. Mask out everything except OSZACP and set
8084 the flags thunk to X86G_CC_OP_COPY. */
8085 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
8086 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
8087 stmt( IRStmt_Put( OFFB_CC_DEP1,
8088 binop(Iop_And32,
8089 mkexpr(t1),
8090 mkU32( X86G_CC_MASK_C | X86G_CC_MASK_P
8091 | X86G_CC_MASK_A | X86G_CC_MASK_Z
8092 | X86G_CC_MASK_S| X86G_CC_MASK_O )
8093 )
8094 )
8095 );
8096 /* Set NDEP even though it isn't used. This makes redundant-PUT
8097 elimination of previous stores to this field work better. */
8098 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
8099
8100 /* Also need to set the D flag, which is held in bit 10 of t1.
8101 If zero, put 1 in OFFB_DFLAG, else -1 in OFFB_DFLAG. */
8102 stmt( IRStmt_Put(
8103 OFFB_DFLAG,
8104 IRExpr_ITE(
8105 unop(Iop_32to1,
8106 binop(Iop_And32,
8107 binop(Iop_Shr32, mkexpr(t1), mkU8(10)),
8108 mkU32(1))),
8109 mkU32(0xFFFFFFFF),
8110 mkU32(1)))
8111 );
8112
8113 /* Set the ID flag */
8114 stmt( IRStmt_Put(
8115 OFFB_IDFLAG,
8116 IRExpr_ITE(
8117 unop(Iop_32to1,
8118 binop(Iop_And32,
8119 binop(Iop_Shr32, mkexpr(t1), mkU8(21)),
8120 mkU32(1))),
8121 mkU32(1),
8122 mkU32(0)))
8123 );
8124
8125 /* And set the AC flag. If setting it 1 to, possibly emit an
8126 emulation warning. */
8127 stmt( IRStmt_Put(
8128 OFFB_ACFLAG,
8129 IRExpr_ITE(
8130 unop(Iop_32to1,
8131 binop(Iop_And32,
8132 binop(Iop_Shr32, mkexpr(t1), mkU8(18)),
8133 mkU32(1))),
8134 mkU32(1),
8135 mkU32(0)))
8136 );
8137
8138 if (emit_AC_emwarn) {
8139 put_emwarn( mkU32(EmWarn_X86_acFlag) );
8140 stmt(
8141 IRStmt_Exit(
8142 binop( Iop_CmpNE32,
8143 binop(Iop_And32, mkexpr(t1), mkU32(1<<18)),
8144 mkU32(0) ),
8145 Ijk_EmWarn,
8146 IRConst_U32( next_insn_EIP ),
8147 OFFB_EIP
8148 )
8149 );
8150 }
8151 }
8152
8153
8154 /* Helper for the SSSE3 (not SSE3) PMULHRSW insns. Given two 64-bit
8155 values (aa,bb), computes, for each of the 4 16-bit lanes:
8156
8157 (((aa_lane *s32 bb_lane) >>u 14) + 1) >>u 1
8158 */
dis_PMULHRSW_helper(IRExpr * aax,IRExpr * bbx)8159 static IRExpr* dis_PMULHRSW_helper ( IRExpr* aax, IRExpr* bbx )
8160 {
8161 IRTemp aa = newTemp(Ity_I64);
8162 IRTemp bb = newTemp(Ity_I64);
8163 IRTemp aahi32s = newTemp(Ity_I64);
8164 IRTemp aalo32s = newTemp(Ity_I64);
8165 IRTemp bbhi32s = newTemp(Ity_I64);
8166 IRTemp bblo32s = newTemp(Ity_I64);
8167 IRTemp rHi = newTemp(Ity_I64);
8168 IRTemp rLo = newTemp(Ity_I64);
8169 IRTemp one32x2 = newTemp(Ity_I64);
8170 assign(aa, aax);
8171 assign(bb, bbx);
8172 assign( aahi32s,
8173 binop(Iop_SarN32x2,
8174 binop(Iop_InterleaveHI16x4, mkexpr(aa), mkexpr(aa)),
8175 mkU8(16) ));
8176 assign( aalo32s,
8177 binop(Iop_SarN32x2,
8178 binop(Iop_InterleaveLO16x4, mkexpr(aa), mkexpr(aa)),
8179 mkU8(16) ));
8180 assign( bbhi32s,
8181 binop(Iop_SarN32x2,
8182 binop(Iop_InterleaveHI16x4, mkexpr(bb), mkexpr(bb)),
8183 mkU8(16) ));
8184 assign( bblo32s,
8185 binop(Iop_SarN32x2,
8186 binop(Iop_InterleaveLO16x4, mkexpr(bb), mkexpr(bb)),
8187 mkU8(16) ));
8188 assign(one32x2, mkU64( (1ULL << 32) + 1 ));
8189 assign(
8190 rHi,
8191 binop(
8192 Iop_ShrN32x2,
8193 binop(
8194 Iop_Add32x2,
8195 binop(
8196 Iop_ShrN32x2,
8197 binop(Iop_Mul32x2, mkexpr(aahi32s), mkexpr(bbhi32s)),
8198 mkU8(14)
8199 ),
8200 mkexpr(one32x2)
8201 ),
8202 mkU8(1)
8203 )
8204 );
8205 assign(
8206 rLo,
8207 binop(
8208 Iop_ShrN32x2,
8209 binop(
8210 Iop_Add32x2,
8211 binop(
8212 Iop_ShrN32x2,
8213 binop(Iop_Mul32x2, mkexpr(aalo32s), mkexpr(bblo32s)),
8214 mkU8(14)
8215 ),
8216 mkexpr(one32x2)
8217 ),
8218 mkU8(1)
8219 )
8220 );
8221 return
8222 binop(Iop_CatEvenLanes16x4, mkexpr(rHi), mkexpr(rLo));
8223 }
8224
8225 /* Helper for the SSSE3 (not SSE3) PSIGN{B,W,D} insns. Given two 64-bit
8226 values (aa,bb), computes, for each lane:
8227
8228 if aa_lane < 0 then - bb_lane
8229 else if aa_lane > 0 then bb_lane
8230 else 0
8231 */
dis_PSIGN_helper(IRExpr * aax,IRExpr * bbx,Int laneszB)8232 static IRExpr* dis_PSIGN_helper ( IRExpr* aax, IRExpr* bbx, Int laneszB )
8233 {
8234 IRTemp aa = newTemp(Ity_I64);
8235 IRTemp bb = newTemp(Ity_I64);
8236 IRTemp zero = newTemp(Ity_I64);
8237 IRTemp bbNeg = newTemp(Ity_I64);
8238 IRTemp negMask = newTemp(Ity_I64);
8239 IRTemp posMask = newTemp(Ity_I64);
8240 IROp opSub = Iop_INVALID;
8241 IROp opCmpGTS = Iop_INVALID;
8242
8243 switch (laneszB) {
8244 case 1: opSub = Iop_Sub8x8; opCmpGTS = Iop_CmpGT8Sx8; break;
8245 case 2: opSub = Iop_Sub16x4; opCmpGTS = Iop_CmpGT16Sx4; break;
8246 case 4: opSub = Iop_Sub32x2; opCmpGTS = Iop_CmpGT32Sx2; break;
8247 default: vassert(0);
8248 }
8249
8250 assign( aa, aax );
8251 assign( bb, bbx );
8252 assign( zero, mkU64(0) );
8253 assign( bbNeg, binop(opSub, mkexpr(zero), mkexpr(bb)) );
8254 assign( negMask, binop(opCmpGTS, mkexpr(zero), mkexpr(aa)) );
8255 assign( posMask, binop(opCmpGTS, mkexpr(aa), mkexpr(zero)) );
8256
8257 return
8258 binop(Iop_Or64,
8259 binop(Iop_And64, mkexpr(bb), mkexpr(posMask)),
8260 binop(Iop_And64, mkexpr(bbNeg), mkexpr(negMask)) );
8261
8262 }
8263
8264 /* Helper for the SSSE3 (not SSE3) PABS{B,W,D} insns. Given a 64-bit
8265 value aa, computes, for each lane
8266
8267 if aa < 0 then -aa else aa
8268
8269 Note that the result is interpreted as unsigned, so that the
8270 absolute value of the most negative signed input can be
8271 represented.
8272 */
dis_PABS_helper(IRExpr * aax,Int laneszB)8273 static IRExpr* dis_PABS_helper ( IRExpr* aax, Int laneszB )
8274 {
8275 IRTemp aa = newTemp(Ity_I64);
8276 IRTemp zero = newTemp(Ity_I64);
8277 IRTemp aaNeg = newTemp(Ity_I64);
8278 IRTemp negMask = newTemp(Ity_I64);
8279 IRTemp posMask = newTemp(Ity_I64);
8280 IROp opSub = Iop_INVALID;
8281 IROp opSarN = Iop_INVALID;
8282
8283 switch (laneszB) {
8284 case 1: opSub = Iop_Sub8x8; opSarN = Iop_SarN8x8; break;
8285 case 2: opSub = Iop_Sub16x4; opSarN = Iop_SarN16x4; break;
8286 case 4: opSub = Iop_Sub32x2; opSarN = Iop_SarN32x2; break;
8287 default: vassert(0);
8288 }
8289
8290 assign( aa, aax );
8291 assign( negMask, binop(opSarN, mkexpr(aa), mkU8(8*laneszB-1)) );
8292 assign( posMask, unop(Iop_Not64, mkexpr(negMask)) );
8293 assign( zero, mkU64(0) );
8294 assign( aaNeg, binop(opSub, mkexpr(zero), mkexpr(aa)) );
8295 return
8296 binop(Iop_Or64,
8297 binop(Iop_And64, mkexpr(aa), mkexpr(posMask)),
8298 binop(Iop_And64, mkexpr(aaNeg), mkexpr(negMask)) );
8299 }
8300
dis_PALIGNR_XMM_helper(IRTemp hi64,IRTemp lo64,Int byteShift)8301 static IRExpr* dis_PALIGNR_XMM_helper ( IRTemp hi64,
8302 IRTemp lo64, Int byteShift )
8303 {
8304 vassert(byteShift >= 1 && byteShift <= 7);
8305 return
8306 binop(Iop_Or64,
8307 binop(Iop_Shl64, mkexpr(hi64), mkU8(8*(8-byteShift))),
8308 binop(Iop_Shr64, mkexpr(lo64), mkU8(8*byteShift))
8309 );
8310 }
8311
8312 /* Generate a SIGSEGV followed by a restart of the current instruction
8313 if effective_addr is not 16-aligned. This is required behaviour
8314 for some SSE3 instructions and all 128-bit SSSE3 instructions.
8315 This assumes that guest_RIP_curr_instr is set correctly! */
gen_SEGV_if_not_16_aligned(IRTemp effective_addr)8316 static void gen_SEGV_if_not_16_aligned ( IRTemp effective_addr )
8317 {
8318 stmt(
8319 IRStmt_Exit(
8320 binop(Iop_CmpNE32,
8321 binop(Iop_And32,mkexpr(effective_addr),mkU32(0xF)),
8322 mkU32(0)),
8323 Ijk_SigSEGV,
8324 IRConst_U32(guest_EIP_curr_instr),
8325 OFFB_EIP
8326 )
8327 );
8328 }
8329
8330
8331 /* Helper for deciding whether a given insn (starting at the opcode
8332 byte) may validly be used with a LOCK prefix. The following insns
8333 may be used with LOCK when their destination operand is in memory.
8334 AFAICS this is exactly the same for both 32-bit and 64-bit mode.
8335
8336 ADD 80 /0, 81 /0, 82 /0, 83 /0, 00, 01
8337 OR 80 /1, 81 /1, 82 /x, 83 /1, 08, 09
8338 ADC 80 /2, 81 /2, 82 /2, 83 /2, 10, 11
8339 SBB 81 /3, 81 /3, 82 /x, 83 /3, 18, 19
8340 AND 80 /4, 81 /4, 82 /x, 83 /4, 20, 21
8341 SUB 80 /5, 81 /5, 82 /x, 83 /5, 28, 29
8342 XOR 80 /6, 81 /6, 82 /x, 83 /6, 30, 31
8343
8344 DEC FE /1, FF /1
8345 INC FE /0, FF /0
8346
8347 NEG F6 /3, F7 /3
8348 NOT F6 /2, F7 /2
8349
8350 XCHG 86, 87
8351
8352 BTC 0F BB, 0F BA /7
8353 BTR 0F B3, 0F BA /6
8354 BTS 0F AB, 0F BA /5
8355
8356 CMPXCHG 0F B0, 0F B1
8357 CMPXCHG8B 0F C7 /1
8358
8359 XADD 0F C0, 0F C1
8360
8361 ------------------------------
8362
8363 80 /0 = addb $imm8, rm8
8364 81 /0 = addl $imm32, rm32 and addw $imm16, rm16
8365 82 /0 = addb $imm8, rm8
8366 83 /0 = addl $simm8, rm32 and addw $simm8, rm16
8367
8368 00 = addb r8, rm8
8369 01 = addl r32, rm32 and addw r16, rm16
8370
8371 Same for ADD OR ADC SBB AND SUB XOR
8372
8373 FE /1 = dec rm8
8374 FF /1 = dec rm32 and dec rm16
8375
8376 FE /0 = inc rm8
8377 FF /0 = inc rm32 and inc rm16
8378
8379 F6 /3 = neg rm8
8380 F7 /3 = neg rm32 and neg rm16
8381
8382 F6 /2 = not rm8
8383 F7 /2 = not rm32 and not rm16
8384
8385 0F BB = btcw r16, rm16 and btcl r32, rm32
8386 OF BA /7 = btcw $imm8, rm16 and btcw $imm8, rm32
8387
8388 Same for BTS, BTR
8389 */
can_be_used_with_LOCK_prefix(const UChar * opc)8390 static Bool can_be_used_with_LOCK_prefix ( const UChar* opc )
8391 {
8392 switch (opc[0]) {
8393 case 0x00: case 0x01: case 0x08: case 0x09:
8394 case 0x10: case 0x11: case 0x18: case 0x19:
8395 case 0x20: case 0x21: case 0x28: case 0x29:
8396 case 0x30: case 0x31:
8397 if (!epartIsReg(opc[1]))
8398 return True;
8399 break;
8400
8401 case 0x80: case 0x81: case 0x82: case 0x83:
8402 if (gregOfRM(opc[1]) >= 0 && gregOfRM(opc[1]) <= 6
8403 && !epartIsReg(opc[1]))
8404 return True;
8405 break;
8406
8407 case 0xFE: case 0xFF:
8408 if (gregOfRM(opc[1]) >= 0 && gregOfRM(opc[1]) <= 1
8409 && !epartIsReg(opc[1]))
8410 return True;
8411 break;
8412
8413 case 0xF6: case 0xF7:
8414 if (gregOfRM(opc[1]) >= 2 && gregOfRM(opc[1]) <= 3
8415 && !epartIsReg(opc[1]))
8416 return True;
8417 break;
8418
8419 case 0x86: case 0x87:
8420 if (!epartIsReg(opc[1]))
8421 return True;
8422 break;
8423
8424 case 0x0F: {
8425 switch (opc[1]) {
8426 case 0xBB: case 0xB3: case 0xAB:
8427 if (!epartIsReg(opc[2]))
8428 return True;
8429 break;
8430 case 0xBA:
8431 if (gregOfRM(opc[2]) >= 5 && gregOfRM(opc[2]) <= 7
8432 && !epartIsReg(opc[2]))
8433 return True;
8434 break;
8435 case 0xB0: case 0xB1:
8436 if (!epartIsReg(opc[2]))
8437 return True;
8438 break;
8439 case 0xC7:
8440 if (gregOfRM(opc[2]) == 1 && !epartIsReg(opc[2]) )
8441 return True;
8442 break;
8443 case 0xC0: case 0xC1:
8444 if (!epartIsReg(opc[2]))
8445 return True;
8446 break;
8447 default:
8448 break;
8449 } /* switch (opc[1]) */
8450 break;
8451 }
8452
8453 default:
8454 break;
8455 } /* switch (opc[0]) */
8456
8457 return False;
8458 }
8459
math_BSWAP(IRTemp t1,IRType ty)8460 static IRTemp math_BSWAP ( IRTemp t1, IRType ty )
8461 {
8462 IRTemp t2 = newTemp(ty);
8463 if (ty == Ity_I32) {
8464 assign( t2,
8465 binop(
8466 Iop_Or32,
8467 binop(Iop_Shl32, mkexpr(t1), mkU8(24)),
8468 binop(
8469 Iop_Or32,
8470 binop(Iop_And32, binop(Iop_Shl32, mkexpr(t1), mkU8(8)),
8471 mkU32(0x00FF0000)),
8472 binop(Iop_Or32,
8473 binop(Iop_And32, binop(Iop_Shr32, mkexpr(t1), mkU8(8)),
8474 mkU32(0x0000FF00)),
8475 binop(Iop_And32, binop(Iop_Shr32, mkexpr(t1), mkU8(24)),
8476 mkU32(0x000000FF) )
8477 )))
8478 );
8479 return t2;
8480 }
8481 if (ty == Ity_I16) {
8482 assign(t2,
8483 binop(Iop_Or16,
8484 binop(Iop_Shl16, mkexpr(t1), mkU8(8)),
8485 binop(Iop_Shr16, mkexpr(t1), mkU8(8)) ));
8486 return t2;
8487 }
8488 vassert(0);
8489 /*NOTREACHED*/
8490 return IRTemp_INVALID;
8491 }
8492
8493 /*------------------------------------------------------------*/
8494 /*--- Disassemble a single instruction ---*/
8495 /*------------------------------------------------------------*/
8496
8497 /* Disassemble a single instruction into IR. The instruction is
8498 located in host memory at &guest_code[delta]. *expect_CAS is set
8499 to True if the resulting IR is expected to contain an IRCAS
8500 statement, and False if it's not expected to. This makes it
8501 possible for the caller of disInstr_X86_WRK to check that
8502 LOCK-prefixed instructions are at least plausibly translated, in
8503 that it becomes possible to check that a (validly) LOCK-prefixed
8504 instruction generates a translation containing an IRCAS, and
8505 instructions without LOCK prefixes don't generate translations
8506 containing an IRCAS.
8507 */
8508 static
disInstr_X86_WRK(Bool * expect_CAS,Bool (* resteerOkFn)(void *,Addr),Bool resteerCisOk,void * callback_opaque,Long delta64,const VexArchInfo * archinfo,const VexAbiInfo * vbi,Bool sigill_diag)8509 DisResult disInstr_X86_WRK (
8510 /*OUT*/Bool* expect_CAS,
8511 Bool (*resteerOkFn) ( /*opaque*/void*, Addr ),
8512 Bool resteerCisOk,
8513 void* callback_opaque,
8514 Long delta64,
8515 const VexArchInfo* archinfo,
8516 const VexAbiInfo* vbi,
8517 Bool sigill_diag
8518 )
8519 {
8520 IRType ty;
8521 IRTemp addr, t0, t1, t2, t3, t4, t5, t6;
8522 Int alen;
8523 UChar opc, modrm, abyte, pre;
8524 UInt d32;
8525 HChar dis_buf[50];
8526 Int am_sz, d_sz, n_prefixes;
8527 DisResult dres;
8528 const UChar* insn; /* used in SSE decoders */
8529 Bool has_66_pfx = 0;
8530
8531 /* The running delta */
8532 Int delta = (Int)delta64;
8533
8534 /* Holds eip at the start of the insn, so that we can print
8535 consistent error messages for unimplemented insns. */
8536 Int delta_start = delta;
8537
8538 /* we keep using sz in order to avoid changing a lot of code without
8539 * any gain. So sz is equal to the current_sz_data.
8540 */
8541 Int sz;
8542 if (archinfo->x86_cr0 & 1) {
8543 sz = 4;
8544 current_sz_addr = 4;
8545 current_sz_data = 4;
8546 protected_mode = True;
8547 } else {
8548 sz = 2;
8549 current_sz_addr = 2;
8550 current_sz_data = 2;
8551 protected_mode = False;
8552 }
8553
8554 /* sorb holds the segment-override-prefix byte, if any. Zero if no
8555 prefix has been seen, else one of {0x26, 0x36, 0x3E, 0x64, 0x65}
8556 indicating the prefix. */
8557 UChar sorb = 0;
8558
8559 /* Gets set to True if a LOCK prefix is seen. */
8560 Bool pfx_lock = False;
8561
8562 /* Set result defaults. */
8563 dres.whatNext = Dis_Continue;
8564 dres.len = 0;
8565 dres.continueAt = 0;
8566 dres.hint = Dis_HintNone;
8567 dres.jk_StopHere = Ijk_INVALID;
8568
8569 *expect_CAS = False;
8570
8571 addr = t0 = t1 = t2 = t3 = t4 = t5 = t6 = IRTemp_INVALID;
8572
8573 vassert(guest_EIP_bbstart + delta == guest_EIP_curr_instr);
8574 DIP("\t0x%x: ", guest_EIP_bbstart+delta);
8575
8576 /* Spot "Special" instructions (see comment at top of file). */
8577 {
8578 const UChar* code = guest_code + delta;
8579 /* Spot the 12-byte preamble:
8580 C1C703 roll $3, %edi
8581 C1C70D roll $13, %edi
8582 C1C71D roll $29, %edi
8583 C1C713 roll $19, %edi
8584 */
8585 if (code[ 0] == 0xC1 && code[ 1] == 0xC7 && code[ 2] == 0x03 &&
8586 code[ 3] == 0xC1 && code[ 4] == 0xC7 && code[ 5] == 0x0D &&
8587 code[ 6] == 0xC1 && code[ 7] == 0xC7 && code[ 8] == 0x1D &&
8588 code[ 9] == 0xC1 && code[10] == 0xC7 && code[11] == 0x13) {
8589 /* Got a "Special" instruction preamble. Which one is it? */
8590 if (code[12] == 0x87 && code[13] == 0xDB /* xchgl %ebx,%ebx */) {
8591 /* %EDX = client_request ( %EAX ) */
8592 DIP("%%edx = client_request ( %%eax )\n");
8593 delta += 14;
8594 jmp_lit(&dres, Ijk_ClientReq, guest_EIP_bbstart+delta);
8595 vassert(dres.whatNext == Dis_StopHere);
8596 goto decode_success;
8597 }
8598 else
8599 if (code[12] == 0x87 && code[13] == 0xC9 /* xchgl %ecx,%ecx */) {
8600 /* %EAX = guest_NRADDR */
8601 DIP("%%eax = guest_NRADDR\n");
8602 delta += 14;
8603 putIReg(4, R_EAX, IRExpr_Get( OFFB_NRADDR, Ity_I32 ));
8604 goto decode_success;
8605 }
8606 else
8607 if (code[12] == 0x87 && code[13] == 0xD2 /* xchgl %edx,%edx */) {
8608 /* call-noredir *%EAX */
8609 DIP("call-noredir *%%eax\n");
8610 delta += 14;
8611 t1 = newTemp(Ity_I32);
8612 assign(t1, getIReg(4,R_EAX));
8613 t2 = newTemp(Ity_I32);
8614 assign(t2, binop(Iop_Sub32, getIReg(4,R_ESP), mkU32(4)));
8615 putIReg(4, R_ESP, mkexpr(t2));
8616 storeLE( mkexpr(t2), mkU32(guest_EIP_bbstart+delta));
8617 jmp_treg(&dres, Ijk_NoRedir, t1);
8618 vassert(dres.whatNext == Dis_StopHere);
8619 goto decode_success;
8620 }
8621 else
8622 if (code[12] == 0x87 && code[13] == 0xFF /* xchgl %edi,%edi */) {
8623 /* IR injection */
8624 DIP("IR injection\n");
8625 vex_inject_ir(irsb, Iend_LE);
8626
8627 // Invalidate the current insn. The reason is that the IRop we're
8628 // injecting here can change. In which case the translation has to
8629 // be redone. For ease of handling, we simply invalidate all the
8630 // time.
8631 stmt(IRStmt_Put(OFFB_CMSTART, mkU32(guest_EIP_curr_instr)));
8632 stmt(IRStmt_Put(OFFB_CMLEN, mkU32(14)));
8633
8634 delta += 14;
8635
8636 stmt( IRStmt_Put( OFFB_EIP, mkU32(guest_EIP_bbstart + delta) ) );
8637 dres.whatNext = Dis_StopHere;
8638 dres.jk_StopHere = Ijk_InvalICache;
8639 goto decode_success;
8640 }
8641 /* We don't know what it is. */
8642 goto decode_failure;
8643 /*NOTREACHED*/
8644 }
8645 }
8646
8647 /* Handle a couple of weird-ass NOPs that have been observed in the
8648 wild. */
8649 {
8650 const UChar* code = guest_code + delta;
8651 /* Sun's JVM 1.5.0 uses the following as a NOP:
8652 26 2E 64 65 90 %es:%cs:%fs:%gs:nop */
8653 if (code[0] == 0x26 && code[1] == 0x2E && code[2] == 0x64
8654 && code[3] == 0x65 && code[4] == 0x90) {
8655 DIP("%%es:%%cs:%%fs:%%gs:nop\n");
8656 delta += 5;
8657 goto decode_success;
8658 }
8659 /* Don't barf on recent binutils padding,
8660 all variants of which are: nopw %cs:0x0(%eax,%eax,1)
8661 66 2e 0f 1f 84 00 00 00 00 00
8662 66 66 2e 0f 1f 84 00 00 00 00 00
8663 66 66 66 2e 0f 1f 84 00 00 00 00 00
8664 66 66 66 66 2e 0f 1f 84 00 00 00 00 00
8665 66 66 66 66 66 2e 0f 1f 84 00 00 00 00 00
8666 66 66 66 66 66 66 2e 0f 1f 84 00 00 00 00 00
8667 */
8668 if (code[0] == 0x66) {
8669 Int data16_cnt;
8670 for (data16_cnt = 1; data16_cnt < 6; data16_cnt++)
8671 if (code[data16_cnt] != 0x66)
8672 break;
8673 if (code[data16_cnt] == 0x2E && code[data16_cnt + 1] == 0x0F
8674 && code[data16_cnt + 2] == 0x1F && code[data16_cnt + 3] == 0x84
8675 && code[data16_cnt + 4] == 0x00 && code[data16_cnt + 5] == 0x00
8676 && code[data16_cnt + 6] == 0x00 && code[data16_cnt + 7] == 0x00
8677 && code[data16_cnt + 8] == 0x00 ) {
8678 DIP("nopw %%cs:0x0(%%eax,%%eax,1)\n");
8679 delta += 9 + data16_cnt;
8680 goto decode_success;
8681 }
8682 }
8683 }
8684
8685 /* Normal instruction handling starts here. */
8686
8687 /* Deal with some but not all prefixes:
8688 66(oso)
8689 F0(lock)
8690 2E(cs:) 3E(ds:) 26(es:) 64(fs:) 65(gs:) 36(ss:)
8691 Not dealt with (left in place):
8692 F2 F3
8693 */
8694 n_prefixes = 0;
8695 while (True) {
8696 if (n_prefixes > 7) goto decode_failure;
8697 pre = getUChar(delta);
8698 switch (pre) {
8699 case 0x66:
8700 has_66_pfx = 1;
8701 if (protected_mode) {
8702 sz = 2;
8703 current_sz_data = 2;
8704 } else {
8705 sz = 4;
8706 current_sz_data = 4;
8707 }
8708 break;
8709 case 0x67:
8710 if (protected_mode) {
8711 current_sz_addr = 2;
8712 } else {
8713 current_sz_addr = 4;
8714 }
8715 break;
8716 case 0xF0:
8717 pfx_lock = True;
8718 *expect_CAS = True;
8719 break;
8720 case 0x3E: /* %DS: */
8721 case 0x26: /* %ES: */
8722 case 0x64: /* %FS: */
8723 case 0x65: /* %GS: */
8724 case 0x36: /* %SS: */
8725 if (sorb != 0)
8726 goto decode_failure; /* only one seg override allowed */
8727 sorb = pre;
8728 break;
8729 case 0x2E: { /* %CS: */
8730 /* 2E prefix on a conditional branch instruction is a
8731 branch-prediction hint, which can safely be ignored. */
8732 UChar op1 = getIByte(delta+1);
8733 UChar op2 = getIByte(delta+2);
8734 if ((op1 >= 0x70 && op1 <= 0x7F)
8735 || (op1 == 0xE3)
8736 || (op1 == 0x0F && op2 >= 0x80 && op2 <= 0x8F)) {
8737 if (0) vex_printf("vex x86->IR: ignoring branch hint\n");
8738 } else {
8739 sorb = pre;
8740 }
8741 break;
8742 }
8743 default:
8744 goto not_a_prefix;
8745 }
8746 n_prefixes++;
8747 delta++;
8748 }
8749
8750 not_a_prefix:
8751
8752 /* Now we should be looking at the primary opcode byte or the
8753 leading F2 or F3. Check that any LOCK prefix is actually
8754 allowed. */
8755
8756 if (pfx_lock) {
8757 if (can_be_used_with_LOCK_prefix( &guest_code[delta] )) {
8758 DIP("lock ");
8759 } else {
8760 *expect_CAS = False;
8761 goto decode_failure;
8762 }
8763 }
8764
8765
8766 /* ---------------------------------------------------- */
8767 /* --- The SSE decoder. --- */
8768 /* ---------------------------------------------------- */
8769
8770 /* What did I do to deserve SSE ? Perhaps I was really bad in a
8771 previous life? */
8772
8773 /* Note, this doesn't handle SSE2 or SSE3. That is handled in a
8774 later section, further on. */
8775
8776 insn = &guest_code[delta];
8777
8778 /* Treat fxsave specially. It should be doable even on an SSE0
8779 (Pentium-II class) CPU. Hence be prepared to handle it on
8780 any subarchitecture variant.
8781 */
8782
8783 /* 0F AE /0 = FXSAVE m512 -- write x87 and SSE state to memory */
8784 if (!has_66_pfx && insn[0] == 0x0F && insn[1] == 0xAE
8785 && !epartIsReg(insn[2]) && gregOfRM(insn[2]) == 0) {
8786 IRDirty* d;
8787 modrm = getIByte(delta+2);
8788 vassert(!has_66_pfx);
8789 vassert(!epartIsReg(modrm));
8790
8791 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
8792 delta += 2+alen;
8793 gen_SEGV_if_not_16_aligned(addr);
8794
8795 DIP("fxsave %s\n", dis_buf);
8796
8797 /* Uses dirty helper:
8798 void x86g_do_FXSAVE ( VexGuestX86State*, UInt ) */
8799 d = unsafeIRDirty_0_N (
8800 0/*regparms*/,
8801 "x86g_dirtyhelper_FXSAVE",
8802 &x86g_dirtyhelper_FXSAVE,
8803 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr) )
8804 );
8805
8806 /* declare we're writing memory */
8807 d->mFx = Ifx_Write;
8808 d->mAddr = mkexpr(addr);
8809 d->mSize = 464; /* according to recent Intel docs */
8810
8811 /* declare we're reading guest state */
8812 d->nFxState = 7;
8813 vex_bzero(&d->fxState, sizeof(d->fxState));
8814
8815 d->fxState[0].fx = Ifx_Read;
8816 d->fxState[0].offset = OFFB_FTOP;
8817 d->fxState[0].size = sizeof(UInt);
8818
8819 d->fxState[1].fx = Ifx_Read;
8820 d->fxState[1].offset = OFFB_FPREGS;
8821 d->fxState[1].size = 8 * sizeof(ULong);
8822
8823 d->fxState[2].fx = Ifx_Read;
8824 d->fxState[2].offset = OFFB_FPTAGS;
8825 d->fxState[2].size = 8 * sizeof(UChar);
8826
8827 d->fxState[3].fx = Ifx_Read;
8828 d->fxState[3].offset = OFFB_FPROUND;
8829 d->fxState[3].size = sizeof(UInt);
8830
8831 d->fxState[4].fx = Ifx_Read;
8832 d->fxState[4].offset = OFFB_FC3210;
8833 d->fxState[4].size = sizeof(UInt);
8834
8835 d->fxState[5].fx = Ifx_Read;
8836 d->fxState[5].offset = OFFB_XMM0;
8837 d->fxState[5].size = 8 * sizeof(U128);
8838
8839 d->fxState[6].fx = Ifx_Read;
8840 d->fxState[6].offset = OFFB_SSEROUND;
8841 d->fxState[6].size = sizeof(UInt);
8842
8843 /* Be paranoid ... this assertion tries to ensure the 8 %xmm
8844 images are packed back-to-back. If not, the value of
8845 d->fxState[5].size is wrong. */
8846 vassert(16 == sizeof(U128));
8847 vassert(OFFB_XMM7 == (OFFB_XMM0 + 7 * 16));
8848
8849 stmt( IRStmt_Dirty(d) );
8850
8851 goto decode_success;
8852 }
8853
8854 /* 0F AE /1 = FXRSTOR m512 -- read x87 and SSE state from memory */
8855 if (!has_66_pfx && insn[0] == 0x0F && insn[1] == 0xAE
8856 && !epartIsReg(insn[2]) && gregOfRM(insn[2]) == 1) {
8857 IRDirty* d;
8858 modrm = getIByte(delta+2);
8859 vassert(!has_66_pfx);
8860 vassert(!epartIsReg(modrm));
8861
8862 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
8863 delta += 2+alen;
8864 gen_SEGV_if_not_16_aligned(addr);
8865
8866 DIP("fxrstor %s\n", dis_buf);
8867
8868 /* Uses dirty helper:
8869 VexEmNote x86g_do_FXRSTOR ( VexGuestX86State*, UInt )
8870 NOTE:
8871 the VexEmNote value is simply ignored (unlike for FRSTOR)
8872 */
8873 d = unsafeIRDirty_0_N (
8874 0/*regparms*/,
8875 "x86g_dirtyhelper_FXRSTOR",
8876 &x86g_dirtyhelper_FXRSTOR,
8877 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr) )
8878 );
8879
8880 /* declare we're reading memory */
8881 d->mFx = Ifx_Read;
8882 d->mAddr = mkexpr(addr);
8883 d->mSize = 464; /* according to recent Intel docs */
8884
8885 /* declare we're writing guest state */
8886 d->nFxState = 7;
8887 vex_bzero(&d->fxState, sizeof(d->fxState));
8888
8889 d->fxState[0].fx = Ifx_Write;
8890 d->fxState[0].offset = OFFB_FTOP;
8891 d->fxState[0].size = sizeof(UInt);
8892
8893 d->fxState[1].fx = Ifx_Write;
8894 d->fxState[1].offset = OFFB_FPREGS;
8895 d->fxState[1].size = 8 * sizeof(ULong);
8896
8897 d->fxState[2].fx = Ifx_Write;
8898 d->fxState[2].offset = OFFB_FPTAGS;
8899 d->fxState[2].size = 8 * sizeof(UChar);
8900
8901 d->fxState[3].fx = Ifx_Write;
8902 d->fxState[3].offset = OFFB_FPROUND;
8903 d->fxState[3].size = sizeof(UInt);
8904
8905 d->fxState[4].fx = Ifx_Write;
8906 d->fxState[4].offset = OFFB_FC3210;
8907 d->fxState[4].size = sizeof(UInt);
8908
8909 d->fxState[5].fx = Ifx_Write;
8910 d->fxState[5].offset = OFFB_XMM0;
8911 d->fxState[5].size = 8 * sizeof(U128);
8912
8913 d->fxState[6].fx = Ifx_Write;
8914 d->fxState[6].offset = OFFB_SSEROUND;
8915 d->fxState[6].size = sizeof(UInt);
8916
8917 /* Be paranoid ... this assertion tries to ensure the 8 %xmm
8918 images are packed back-to-back. If not, the value of
8919 d->fxState[5].size is wrong. */
8920 vassert(16 == sizeof(U128));
8921 vassert(OFFB_XMM7 == (OFFB_XMM0 + 7 * 16));
8922
8923 stmt( IRStmt_Dirty(d) );
8924
8925 goto decode_success;
8926 }
8927
8928 /* ------ SSE decoder main ------ */
8929
8930 /* Skip parts of the decoder which don't apply given the stated
8931 guest subarchitecture. */
8932 if (archinfo->hwcaps == 0/*baseline, no sse at all*/)
8933 goto after_sse_decoders;
8934
8935 /* With mmxext only some extended MMX instructions are recognized.
8936 The mmxext instructions are MASKMOVQ MOVNTQ PAVGB PAVGW PMAXSW
8937 PMAXUB PMINSW PMINUB PMULHUW PSADBW PSHUFW PEXTRW PINSRW PMOVMSKB
8938 PREFETCHNTA PREFETCHT0 PREFETCHT1 PREFETCHT2 SFENCE
8939
8940 http://support.amd.com/us/Embedded_TechDocs/22466.pdf
8941 https://en.wikipedia.org/wiki/3DNow!#3DNow.21_extensions */
8942
8943 if (archinfo->hwcaps == VEX_HWCAPS_X86_MMXEXT/*integer only sse1 subset*/)
8944 goto mmxext;
8945
8946 /* Otherwise we must be doing sse1 or sse2, so we can at least try
8947 for SSE1 here. */
8948
8949 /* 0F 58 = ADDPS -- add 32Fx4 from R/M to R */
8950 if (!has_66_pfx && insn[0] == 0x0F && insn[1] == 0x58) {
8951 delta = dis_SSE_E_to_G_all( sorb, delta+2, "addps", Iop_Add32Fx4 );
8952 goto decode_success;
8953 }
8954
8955 /* F3 0F 58 = ADDSS -- add 32F0x4 from R/M to R */
8956 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x58) {
8957 vassert(!has_66_pfx);
8958 delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "addss", Iop_Add32F0x4 );
8959 goto decode_success;
8960 }
8961
8962 /* 0F 55 = ANDNPS -- G = (not G) and E */
8963 if (!has_66_pfx && insn[0] == 0x0F && insn[1] == 0x55) {
8964 delta = dis_SSE_E_to_G_all_invG( sorb, delta+2, "andnps", Iop_AndV128 );
8965 goto decode_success;
8966 }
8967
8968 /* 0F 54 = ANDPS -- G = G and E */
8969 if (!has_66_pfx && insn[0] == 0x0F && insn[1] == 0x54) {
8970 delta = dis_SSE_E_to_G_all( sorb, delta+2, "andps", Iop_AndV128 );
8971 goto decode_success;
8972 }
8973
8974 /* 0F C2 = CMPPS -- 32Fx4 comparison from R/M to R */
8975 if (!has_66_pfx && insn[0] == 0x0F && insn[1] == 0xC2) {
8976 delta = dis_SSEcmp_E_to_G( sorb, delta+2, "cmpps", True, 4 );
8977 goto decode_success;
8978 }
8979
8980 /* F3 0F C2 = CMPSS -- 32F0x4 comparison from R/M to R */
8981 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0xC2) {
8982 vassert(!has_66_pfx);
8983 delta = dis_SSEcmp_E_to_G( sorb, delta+3, "cmpss", False, 4 );
8984 goto decode_success;
8985 }
8986
8987 /* 0F 2F = COMISS -- 32F0x4 comparison G,E, and set ZCP */
8988 /* 0F 2E = UCOMISS -- 32F0x4 comparison G,E, and set ZCP */
8989 if (!has_66_pfx && insn[0] == 0x0F && (insn[1] == 0x2F || insn[1] == 0x2E)) {
8990 IRTemp argL = newTemp(Ity_F32);
8991 IRTemp argR = newTemp(Ity_F32);
8992 modrm = getIByte(delta+2);
8993 if (epartIsReg(modrm)) {
8994 assign( argR, getXMMRegLane32F( eregOfRM(modrm), 0/*lowest lane*/ ) );
8995 delta += 2+1;
8996 DIP("[u]comiss %s,%s\n", nameXMMReg(eregOfRM(modrm)),
8997 nameXMMReg(gregOfRM(modrm)) );
8998 } else {
8999 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9000 assign( argR, loadLE(Ity_F32, mkexpr(addr)) );
9001 delta += 2+alen;
9002 DIP("[u]comiss %s,%s\n", dis_buf,
9003 nameXMMReg(gregOfRM(modrm)) );
9004 }
9005 assign( argL, getXMMRegLane32F( gregOfRM(modrm), 0/*lowest lane*/ ) );
9006
9007 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
9008 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
9009 stmt( IRStmt_Put(
9010 OFFB_CC_DEP1,
9011 binop( Iop_And32,
9012 binop(Iop_CmpF64,
9013 unop(Iop_F32toF64,mkexpr(argL)),
9014 unop(Iop_F32toF64,mkexpr(argR))),
9015 mkU32(0x45)
9016 )));
9017 /* Set NDEP even though it isn't used. This makes redundant-PUT
9018 elimination of previous stores to this field work better. */
9019 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
9020 goto decode_success;
9021 }
9022
9023 /* 0F 2A = CVTPI2PS -- convert 2 x I32 in mem/mmx to 2 x F32 in low
9024 half xmm */
9025 if (!has_66_pfx && insn[0] == 0x0F && insn[1] == 0x2A) {
9026 IRTemp arg64 = newTemp(Ity_I64);
9027 IRTemp rmode = newTemp(Ity_I32);
9028 vassert(!has_66_pfx);
9029
9030 modrm = getIByte(delta+2);
9031 if (epartIsReg(modrm)) {
9032 /* Only switch to MMX mode if the source is a MMX register.
9033 See comments on CVTPI2PD for details. Fixes #357059. */
9034 do_MMX_preamble();
9035 assign( arg64, getMMXReg(eregOfRM(modrm)) );
9036 delta += 2+1;
9037 DIP("cvtpi2ps %s,%s\n", nameMMXReg(eregOfRM(modrm)),
9038 nameXMMReg(gregOfRM(modrm)));
9039 } else {
9040 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9041 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
9042 delta += 2+alen;
9043 DIP("cvtpi2ps %s,%s\n", dis_buf,
9044 nameXMMReg(gregOfRM(modrm)) );
9045 }
9046
9047 assign( rmode, get_sse_roundingmode() );
9048
9049 putXMMRegLane32F(
9050 gregOfRM(modrm), 0,
9051 binop(Iop_F64toF32,
9052 mkexpr(rmode),
9053 unop(Iop_I32StoF64,
9054 unop(Iop_64to32, mkexpr(arg64)) )) );
9055
9056 putXMMRegLane32F(
9057 gregOfRM(modrm), 1,
9058 binop(Iop_F64toF32,
9059 mkexpr(rmode),
9060 unop(Iop_I32StoF64,
9061 unop(Iop_64HIto32, mkexpr(arg64)) )) );
9062
9063 goto decode_success;
9064 }
9065
9066 /* F3 0F 2A = CVTSI2SS -- convert I32 in mem/ireg to F32 in low
9067 quarter xmm */
9068 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x2A) {
9069 IRTemp arg32 = newTemp(Ity_I32);
9070 IRTemp rmode = newTemp(Ity_I32);
9071 vassert(!has_66_pfx);
9072
9073 modrm = getIByte(delta+3);
9074 if (epartIsReg(modrm)) {
9075 assign( arg32, getIReg(4, eregOfRM(modrm)) );
9076 delta += 3+1;
9077 DIP("cvtsi2ss %s,%s\n", nameIReg(4, eregOfRM(modrm)),
9078 nameXMMReg(gregOfRM(modrm)));
9079 } else {
9080 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
9081 assign( arg32, loadLE(Ity_I32, mkexpr(addr)) );
9082 delta += 3+alen;
9083 DIP("cvtsi2ss %s,%s\n", dis_buf,
9084 nameXMMReg(gregOfRM(modrm)) );
9085 }
9086
9087 assign( rmode, get_sse_roundingmode() );
9088
9089 putXMMRegLane32F(
9090 gregOfRM(modrm), 0,
9091 binop(Iop_F64toF32,
9092 mkexpr(rmode),
9093 unop(Iop_I32StoF64, mkexpr(arg32)) ) );
9094
9095 goto decode_success;
9096 }
9097
9098 /* 0F 2D = CVTPS2PI -- convert 2 x F32 in mem/low half xmm to 2 x
9099 I32 in mmx, according to prevailing SSE rounding mode */
9100 /* 0F 2C = CVTTPS2PI -- convert 2 x F32 in mem/low half xmm to 2 x
9101 I32 in mmx, rounding towards zero */
9102 if (!has_66_pfx && insn[0] == 0x0F && (insn[1] == 0x2D || insn[1] == 0x2C)) {
9103 IRTemp dst64 = newTemp(Ity_I64);
9104 IRTemp rmode = newTemp(Ity_I32);
9105 IRTemp f32lo = newTemp(Ity_F32);
9106 IRTemp f32hi = newTemp(Ity_F32);
9107 Bool r2zero = toBool(insn[1] == 0x2C);
9108
9109 do_MMX_preamble();
9110 modrm = getIByte(delta+2);
9111
9112 if (epartIsReg(modrm)) {
9113 delta += 2+1;
9114 assign(f32lo, getXMMRegLane32F(eregOfRM(modrm), 0));
9115 assign(f32hi, getXMMRegLane32F(eregOfRM(modrm), 1));
9116 DIP("cvt%sps2pi %s,%s\n", r2zero ? "t" : "",
9117 nameXMMReg(eregOfRM(modrm)),
9118 nameMMXReg(gregOfRM(modrm)));
9119 } else {
9120 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9121 assign(f32lo, loadLE(Ity_F32, mkexpr(addr)));
9122 assign(f32hi, loadLE(Ity_F32, binop( Iop_Add32,
9123 mkexpr(addr),
9124 mkU32(4) )));
9125 delta += 2+alen;
9126 DIP("cvt%sps2pi %s,%s\n", r2zero ? "t" : "",
9127 dis_buf,
9128 nameMMXReg(gregOfRM(modrm)));
9129 }
9130
9131 if (r2zero) {
9132 assign(rmode, mkU32((UInt)Irrm_ZERO) );
9133 } else {
9134 assign( rmode, get_sse_roundingmode() );
9135 }
9136
9137 assign(
9138 dst64,
9139 binop( Iop_32HLto64,
9140 binop( Iop_F64toI32S,
9141 mkexpr(rmode),
9142 unop( Iop_F32toF64, mkexpr(f32hi) ) ),
9143 binop( Iop_F64toI32S,
9144 mkexpr(rmode),
9145 unop( Iop_F32toF64, mkexpr(f32lo) ) )
9146 )
9147 );
9148
9149 putMMXReg(gregOfRM(modrm), mkexpr(dst64));
9150 goto decode_success;
9151 }
9152
9153 /* F3 0F 2D = CVTSS2SI -- convert F32 in mem/low quarter xmm to
9154 I32 in ireg, according to prevailing SSE rounding mode */
9155 /* F3 0F 2C = CVTTSS2SI -- convert F32 in mem/low quarter xmm to
9156 I32 in ireg, rounding towards zero */
9157 if (insn[0] == 0xF3 && insn[1] == 0x0F
9158 && (insn[2] == 0x2D || insn[2] == 0x2C)) {
9159 IRTemp rmode = newTemp(Ity_I32);
9160 IRTemp f32lo = newTemp(Ity_F32);
9161 Bool r2zero = toBool(insn[2] == 0x2C);
9162 vassert(!has_66_pfx);
9163
9164 modrm = getIByte(delta+3);
9165 if (epartIsReg(modrm)) {
9166 delta += 3+1;
9167 assign(f32lo, getXMMRegLane32F(eregOfRM(modrm), 0));
9168 DIP("cvt%sss2si %s,%s\n", r2zero ? "t" : "",
9169 nameXMMReg(eregOfRM(modrm)),
9170 nameIReg(4, gregOfRM(modrm)));
9171 } else {
9172 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
9173 assign(f32lo, loadLE(Ity_F32, mkexpr(addr)));
9174 delta += 3+alen;
9175 DIP("cvt%sss2si %s,%s\n", r2zero ? "t" : "",
9176 dis_buf,
9177 nameIReg(4, gregOfRM(modrm)));
9178 }
9179
9180 if (r2zero) {
9181 assign( rmode, mkU32((UInt)Irrm_ZERO) );
9182 } else {
9183 assign( rmode, get_sse_roundingmode() );
9184 }
9185
9186 putIReg(4, gregOfRM(modrm),
9187 binop( Iop_F64toI32S,
9188 mkexpr(rmode),
9189 unop( Iop_F32toF64, mkexpr(f32lo) ) )
9190 );
9191
9192 goto decode_success;
9193 }
9194
9195 /* 0F 5E = DIVPS -- div 32Fx4 from R/M to R */
9196 if (!has_66_pfx && insn[0] == 0x0F && insn[1] == 0x5E) {
9197 delta = dis_SSE_E_to_G_all( sorb, delta+2, "divps", Iop_Div32Fx4 );
9198 goto decode_success;
9199 }
9200
9201 /* F3 0F 5E = DIVSS -- div 32F0x4 from R/M to R */
9202 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5E) {
9203 vassert(!has_66_pfx);
9204 delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "divss", Iop_Div32F0x4 );
9205 goto decode_success;
9206 }
9207
9208 /* 0F AE /2 = LDMXCSR m32 -- load %mxcsr */
9209 if (insn[0] == 0x0F && insn[1] == 0xAE
9210 && !epartIsReg(insn[2]) && gregOfRM(insn[2]) == 2) {
9211
9212 IRTemp t64 = newTemp(Ity_I64);
9213 IRTemp ew = newTemp(Ity_I32);
9214
9215 modrm = getIByte(delta+2);
9216 vassert(!epartIsReg(modrm));
9217 vassert(!has_66_pfx);
9218
9219 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9220 delta += 2+alen;
9221 DIP("ldmxcsr %s\n", dis_buf);
9222
9223 /* The only thing we observe in %mxcsr is the rounding mode.
9224 Therefore, pass the 32-bit value (SSE native-format control
9225 word) to a clean helper, getting back a 64-bit value, the
9226 lower half of which is the SSEROUND value to store, and the
9227 upper half of which is the emulation-warning token which may
9228 be generated.
9229 */
9230 /* ULong x86h_check_ldmxcsr ( UInt ); */
9231 assign( t64, mkIRExprCCall(
9232 Ity_I64, 0/*regparms*/,
9233 "x86g_check_ldmxcsr",
9234 &x86g_check_ldmxcsr,
9235 mkIRExprVec_1( loadLE(Ity_I32, mkexpr(addr)) )
9236 )
9237 );
9238
9239 put_sse_roundingmode( unop(Iop_64to32, mkexpr(t64)) );
9240 assign( ew, unop(Iop_64HIto32, mkexpr(t64) ) );
9241 put_emwarn( mkexpr(ew) );
9242 /* Finally, if an emulation warning was reported, side-exit to
9243 the next insn, reporting the warning, so that Valgrind's
9244 dispatcher sees the warning. */
9245 stmt(
9246 IRStmt_Exit(
9247 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)),
9248 Ijk_EmWarn,
9249 IRConst_U32( ((Addr32)guest_EIP_bbstart)+delta),
9250 OFFB_EIP
9251 )
9252 );
9253 goto decode_success;
9254 }
9255
9256
9257 /* mmxext sse1 subset starts here. mmxext only arches will parse
9258 only this subset of the sse1 instructions. */
9259 mmxext:
9260
9261 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
9262 /* 0F F7 = MASKMOVQ -- 8x8 masked store */
9263 if (!has_66_pfx && insn[0] == 0x0F && insn[1] == 0xF7) {
9264 Bool ok = False;
9265 delta = dis_MMX( &ok, sorb, sz, delta+1 );
9266 if (!ok)
9267 goto decode_failure;
9268 goto decode_success;
9269 }
9270
9271 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
9272 /* 0F E7 = MOVNTQ -- for us, just a plain MMX store. Note, the
9273 Intel manual does not say anything about the usual business of
9274 the FP reg tags getting trashed whenever an MMX insn happens.
9275 So we just leave them alone.
9276 */
9277 if (insn[0] == 0x0F && insn[1] == 0xE7) {
9278 modrm = getIByte(delta+2);
9279 if (!has_66_pfx && !epartIsReg(modrm)) {
9280 /* do_MMX_preamble(); Intel docs don't specify this */
9281 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9282 storeLE( mkexpr(addr), getMMXReg(gregOfRM(modrm)) );
9283 DIP("movntq %s,%s\n", dis_buf,
9284 nameMMXReg(gregOfRM(modrm)));
9285 delta += 2+alen;
9286 goto decode_success;
9287 }
9288 /* else fall through */
9289 }
9290
9291 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
9292 /* 0F E0 = PAVGB -- 8x8 unsigned Packed Average, with rounding */
9293 if (!has_66_pfx && insn[0] == 0x0F && insn[1] == 0xE0) {
9294 do_MMX_preamble();
9295 delta = dis_MMXop_regmem_to_reg (
9296 sorb, delta+2, insn[1], "pavgb", False );
9297 goto decode_success;
9298 }
9299
9300 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
9301 /* 0F E3 = PAVGW -- 16x4 unsigned Packed Average, with rounding */
9302 if (!has_66_pfx && insn[0] == 0x0F && insn[1] == 0xE3) {
9303 do_MMX_preamble();
9304 delta = dis_MMXop_regmem_to_reg (
9305 sorb, delta+2, insn[1], "pavgw", False );
9306 goto decode_success;
9307 }
9308
9309 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
9310 /* 0F C5 = PEXTRW -- extract 16-bit field from mmx(E) and put
9311 zero-extend of it in ireg(G). */
9312 if (insn[0] == 0x0F && insn[1] == 0xC5) {
9313 modrm = insn[2];
9314 if (!has_66_pfx && epartIsReg(modrm)) {
9315 IRTemp sV = newTemp(Ity_I64);
9316 t5 = newTemp(Ity_I16);
9317 do_MMX_preamble();
9318 assign(sV, getMMXReg(eregOfRM(modrm)));
9319 breakup64to16s( sV, &t3, &t2, &t1, &t0 );
9320 switch (insn[3] & 3) {
9321 case 0: assign(t5, mkexpr(t0)); break;
9322 case 1: assign(t5, mkexpr(t1)); break;
9323 case 2: assign(t5, mkexpr(t2)); break;
9324 case 3: assign(t5, mkexpr(t3)); break;
9325 default: vassert(0); /*NOTREACHED*/
9326 }
9327 putIReg(4, gregOfRM(modrm), unop(Iop_16Uto32, mkexpr(t5)));
9328 DIP("pextrw $%d,%s,%s\n",
9329 (Int)insn[3], nameMMXReg(eregOfRM(modrm)),
9330 nameIReg(4,gregOfRM(modrm)));
9331 delta += 4;
9332 goto decode_success;
9333 }
9334 /* else fall through */
9335 }
9336
9337 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
9338 /* 0F C4 = PINSRW -- get 16 bits from E(mem or low half ireg) and
9339 put it into the specified lane of mmx(G). */
9340 if (!has_66_pfx && insn[0] == 0x0F && insn[1] == 0xC4) {
9341 /* Use t0 .. t3 to hold the 4 original 16-bit lanes of the
9342 mmx reg. t4 is the new lane value. t5 is the original
9343 mmx value. t6 is the new mmx value. */
9344 Int lane;
9345 t4 = newTemp(Ity_I16);
9346 t5 = newTemp(Ity_I64);
9347 t6 = newTemp(Ity_I64);
9348 modrm = insn[2];
9349 do_MMX_preamble();
9350
9351 assign(t5, getMMXReg(gregOfRM(modrm)));
9352 breakup64to16s( t5, &t3, &t2, &t1, &t0 );
9353
9354 if (epartIsReg(modrm)) {
9355 assign(t4, getIReg(2, eregOfRM(modrm)));
9356 delta += 3+1;
9357 lane = insn[3+1-1];
9358 DIP("pinsrw $%d,%s,%s\n", lane,
9359 nameIReg(2,eregOfRM(modrm)),
9360 nameMMXReg(gregOfRM(modrm)));
9361 } else {
9362 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9363 delta += 3+alen;
9364 lane = insn[3+alen-1];
9365 assign(t4, loadLE(Ity_I16, mkexpr(addr)));
9366 DIP("pinsrw $%d,%s,%s\n", lane,
9367 dis_buf,
9368 nameMMXReg(gregOfRM(modrm)));
9369 }
9370
9371 switch (lane & 3) {
9372 case 0: assign(t6, mk64from16s(t3,t2,t1,t4)); break;
9373 case 1: assign(t6, mk64from16s(t3,t2,t4,t0)); break;
9374 case 2: assign(t6, mk64from16s(t3,t4,t1,t0)); break;
9375 case 3: assign(t6, mk64from16s(t4,t2,t1,t0)); break;
9376 default: vassert(0); /*NOTREACHED*/
9377 }
9378 putMMXReg(gregOfRM(modrm), mkexpr(t6));
9379 goto decode_success;
9380 }
9381
9382 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
9383 /* 0F EE = PMAXSW -- 16x4 signed max */
9384 if (!has_66_pfx && insn[0] == 0x0F && insn[1] == 0xEE) {
9385 do_MMX_preamble();
9386 delta = dis_MMXop_regmem_to_reg (
9387 sorb, delta+2, insn[1], "pmaxsw", False );
9388 goto decode_success;
9389 }
9390
9391 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
9392 /* 0F DE = PMAXUB -- 8x8 unsigned max */
9393 if (!has_66_pfx && insn[0] == 0x0F && insn[1] == 0xDE) {
9394 do_MMX_preamble();
9395 delta = dis_MMXop_regmem_to_reg (
9396 sorb, delta+2, insn[1], "pmaxub", False );
9397 goto decode_success;
9398 }
9399
9400 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
9401 /* 0F EA = PMINSW -- 16x4 signed min */
9402 if (!has_66_pfx && insn[0] == 0x0F && insn[1] == 0xEA) {
9403 do_MMX_preamble();
9404 delta = dis_MMXop_regmem_to_reg (
9405 sorb, delta+2, insn[1], "pminsw", False );
9406 goto decode_success;
9407 }
9408
9409 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
9410 /* 0F DA = PMINUB -- 8x8 unsigned min */
9411 if (!has_66_pfx && insn[0] == 0x0F && insn[1] == 0xDA) {
9412 do_MMX_preamble();
9413 delta = dis_MMXop_regmem_to_reg (
9414 sorb, delta+2, insn[1], "pminub", False );
9415 goto decode_success;
9416 }
9417
9418 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
9419 /* 0F D7 = PMOVMSKB -- extract sign bits from each of 8 lanes in
9420 mmx(E), turn them into a byte, and put zero-extend of it in
9421 ireg(G). */
9422 if (!has_66_pfx && insn[0] == 0x0F && insn[1] == 0xD7) {
9423 modrm = insn[2];
9424 if (epartIsReg(modrm)) {
9425 do_MMX_preamble();
9426 t0 = newTemp(Ity_I64);
9427 t1 = newTemp(Ity_I32);
9428 assign(t0, getMMXReg(eregOfRM(modrm)));
9429 assign(t1, unop(Iop_8Uto32, unop(Iop_GetMSBs8x8, mkexpr(t0))));
9430 putIReg(4, gregOfRM(modrm), mkexpr(t1));
9431 DIP("pmovmskb %s,%s\n", nameMMXReg(eregOfRM(modrm)),
9432 nameIReg(4,gregOfRM(modrm)));
9433 delta += 3;
9434 goto decode_success;
9435 }
9436 /* else fall through */
9437 }
9438
9439 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
9440 /* 0F E4 = PMULUH -- 16x4 hi-half of unsigned widening multiply */
9441 if (!has_66_pfx && insn[0] == 0x0F && insn[1] == 0xE4) {
9442 do_MMX_preamble();
9443 delta = dis_MMXop_regmem_to_reg (
9444 sorb, delta+2, insn[1], "pmuluh", False );
9445 goto decode_success;
9446 }
9447
9448 /* 0F 18 /0 = PREFETCHNTA -- prefetch into caches, */
9449 /* 0F 18 /1 = PREFETCH0 -- with various different hints */
9450 /* 0F 18 /2 = PREFETCH1 */
9451 /* 0F 18 /3 = PREFETCH2 */
9452 if (insn[0] == 0x0F && insn[1] == 0x18
9453 && !epartIsReg(insn[2])
9454 && gregOfRM(insn[2]) >= 0 && gregOfRM(insn[2]) <= 3) {
9455 const HChar* hintstr = "??";
9456
9457 modrm = getIByte(delta+2);
9458 vassert(!epartIsReg(modrm));
9459
9460 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9461 delta += 2+alen;
9462
9463 switch (gregOfRM(modrm)) {
9464 case 0: hintstr = "nta"; break;
9465 case 1: hintstr = "t0"; break;
9466 case 2: hintstr = "t1"; break;
9467 case 3: hintstr = "t2"; break;
9468 default: vassert(0); /*NOTREACHED*/
9469 }
9470
9471 DIP("prefetch%s %s\n", hintstr, dis_buf);
9472 goto decode_success;
9473 }
9474
9475 /* 0F 0D /0 = PREFETCH m8 -- 3DNow! prefetch */
9476 /* 0F 0D /1 = PREFETCHW m8 -- ditto, with some other hint */
9477 if (insn[0] == 0x0F && insn[1] == 0x0D
9478 && !epartIsReg(insn[2])
9479 && gregOfRM(insn[2]) >= 0 && gregOfRM(insn[2]) <= 1) {
9480 const HChar* hintstr = "??";
9481
9482 modrm = getIByte(delta+2);
9483 vassert(!epartIsReg(modrm));
9484
9485 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9486 delta += 2+alen;
9487
9488 switch (gregOfRM(modrm)) {
9489 case 0: hintstr = ""; break;
9490 case 1: hintstr = "w"; break;
9491 default: vassert(0); /*NOTREACHED*/
9492 }
9493
9494 DIP("prefetch%s %s\n", hintstr, dis_buf);
9495 goto decode_success;
9496 }
9497
9498 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
9499 /* 0F F6 = PSADBW -- sum of 8Ux8 absolute differences */
9500 if (!has_66_pfx && insn[0] == 0x0F && insn[1] == 0xF6) {
9501 do_MMX_preamble();
9502 delta = dis_MMXop_regmem_to_reg (
9503 sorb, delta+2, insn[1], "psadbw", False );
9504 goto decode_success;
9505 }
9506
9507 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
9508 /* 0F 70 = PSHUFW -- rearrange 4x16 from E(mmx or mem) to G(mmx) */
9509 if (!has_66_pfx && insn[0] == 0x0F && insn[1] == 0x70) {
9510 Int order;
9511 IRTemp sV, dV, s3, s2, s1, s0;
9512 s3 = s2 = s1 = s0 = IRTemp_INVALID;
9513 sV = newTemp(Ity_I64);
9514 dV = newTemp(Ity_I64);
9515 do_MMX_preamble();
9516 modrm = insn[2];
9517 if (epartIsReg(modrm)) {
9518 assign( sV, getMMXReg(eregOfRM(modrm)) );
9519 order = (Int)insn[3];
9520 delta += 2+2;
9521 DIP("pshufw $%d,%s,%s\n", order,
9522 nameMMXReg(eregOfRM(modrm)),
9523 nameMMXReg(gregOfRM(modrm)));
9524 } else {
9525 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9526 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
9527 order = (Int)insn[2+alen];
9528 delta += 3+alen;
9529 DIP("pshufw $%d,%s,%s\n", order,
9530 dis_buf,
9531 nameMMXReg(gregOfRM(modrm)));
9532 }
9533 breakup64to16s( sV, &s3, &s2, &s1, &s0 );
9534
9535 # define SEL(n) \
9536 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
9537 assign(dV,
9538 mk64from16s( SEL((order>>6)&3), SEL((order>>4)&3),
9539 SEL((order>>2)&3), SEL((order>>0)&3) )
9540 );
9541 putMMXReg(gregOfRM(modrm), mkexpr(dV));
9542 # undef SEL
9543 goto decode_success;
9544 }
9545
9546 /* 0F AE /7 = SFENCE -- flush pending operations to memory */
9547 if (insn[0] == 0x0F && insn[1] == 0xAE
9548 && epartIsReg(insn[2]) && gregOfRM(insn[2]) == 7) {
9549 vassert(!has_66_pfx);
9550 delta += 3;
9551 /* Insert a memory fence. It's sometimes important that these
9552 are carried through to the generated code. */
9553 stmt( IRStmt_MBE(Imbe_Fence) );
9554 DIP("sfence\n");
9555 goto decode_success;
9556 }
9557
9558 /* End of mmxext sse1 subset. No more sse parsing for mmxext only arches. */
9559 if (archinfo->hwcaps == VEX_HWCAPS_X86_MMXEXT/*integer only sse1 subset*/)
9560 goto after_sse_decoders;
9561
9562
9563 /* 0F 5F = MAXPS -- max 32Fx4 from R/M to R */
9564 if (!has_66_pfx && insn[0] == 0x0F && insn[1] == 0x5F) {
9565 delta = dis_SSE_E_to_G_all( sorb, delta+2, "maxps", Iop_Max32Fx4 );
9566 goto decode_success;
9567 }
9568
9569 /* F3 0F 5F = MAXSS -- max 32F0x4 from R/M to R */
9570 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5F) {
9571 vassert(!has_66_pfx);
9572 delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "maxss", Iop_Max32F0x4 );
9573 goto decode_success;
9574 }
9575
9576 /* 0F 5D = MINPS -- min 32Fx4 from R/M to R */
9577 if (!has_66_pfx && insn[0] == 0x0F && insn[1] == 0x5D) {
9578 delta = dis_SSE_E_to_G_all( sorb, delta+2, "minps", Iop_Min32Fx4 );
9579 goto decode_success;
9580 }
9581
9582 /* F3 0F 5D = MINSS -- min 32F0x4 from R/M to R */
9583 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5D) {
9584 vassert(!has_66_pfx);
9585 delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "minss", Iop_Min32F0x4 );
9586 goto decode_success;
9587 }
9588
9589 /* 0F 28 = MOVAPS -- move from E (mem or xmm) to G (xmm). */
9590 /* 0F 10 = MOVUPS -- move from E (mem or xmm) to G (xmm). */
9591 if (!has_66_pfx && insn[0] == 0x0F && (insn[1] == 0x28 || insn[1] == 0x10)) {
9592 modrm = getIByte(delta+2);
9593 if (epartIsReg(modrm)) {
9594 putXMMReg( gregOfRM(modrm),
9595 getXMMReg( eregOfRM(modrm) ));
9596 DIP("mov[ua]ps %s,%s\n", nameXMMReg(eregOfRM(modrm)),
9597 nameXMMReg(gregOfRM(modrm)));
9598 delta += 2+1;
9599 } else {
9600 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9601 if (insn[1] == 0x28/*movaps*/)
9602 gen_SEGV_if_not_16_aligned( addr );
9603 putXMMReg( gregOfRM(modrm),
9604 loadLE(Ity_V128, mkexpr(addr)) );
9605 DIP("mov[ua]ps %s,%s\n", dis_buf,
9606 nameXMMReg(gregOfRM(modrm)));
9607 delta += 2+alen;
9608 }
9609 goto decode_success;
9610 }
9611
9612 /* 0F 29 = MOVAPS -- move from G (xmm) to E (mem or xmm). */
9613 /* 0F 11 = MOVUPS -- move from G (xmm) to E (mem or xmm). */
9614 if (!has_66_pfx && insn[0] == 0x0F
9615 && (insn[1] == 0x29 || insn[1] == 0x11)) {
9616 modrm = getIByte(delta+2);
9617 if (epartIsReg(modrm)) {
9618 /* fall through; awaiting test case */
9619 } else {
9620 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9621 if (insn[1] == 0x29/*movaps*/)
9622 gen_SEGV_if_not_16_aligned( addr );
9623 storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) );
9624 DIP("mov[ua]ps %s,%s\n", nameXMMReg(gregOfRM(modrm)),
9625 dis_buf );
9626 delta += 2+alen;
9627 goto decode_success;
9628 }
9629 }
9630
9631 /* 0F 16 = MOVHPS -- move from mem to high half of XMM. */
9632 /* 0F 16 = MOVLHPS -- move from lo half to hi half of XMM. */
9633 if (!has_66_pfx && insn[0] == 0x0F && insn[1] == 0x16) {
9634 modrm = getIByte(delta+2);
9635 if (epartIsReg(modrm)) {
9636 delta += 2+1;
9637 putXMMRegLane64( gregOfRM(modrm), 1/*upper lane*/,
9638 getXMMRegLane64( eregOfRM(modrm), 0 ) );
9639 DIP("movhps %s,%s\n", nameXMMReg(eregOfRM(modrm)),
9640 nameXMMReg(gregOfRM(modrm)));
9641 } else {
9642 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9643 delta += 2+alen;
9644 putXMMRegLane64( gregOfRM(modrm), 1/*upper lane*/,
9645 loadLE(Ity_I64, mkexpr(addr)) );
9646 DIP("movhps %s,%s\n", dis_buf,
9647 nameXMMReg( gregOfRM(modrm) ));
9648 }
9649 goto decode_success;
9650 }
9651
9652 /* 0F 17 = MOVHPS -- move from high half of XMM to mem. */
9653 if (!has_66_pfx && insn[0] == 0x0F && insn[1] == 0x17) {
9654 if (!epartIsReg(insn[2])) {
9655 delta += 2;
9656 addr = disAMode ( &alen, sorb, delta, dis_buf );
9657 delta += alen;
9658 storeLE( mkexpr(addr),
9659 getXMMRegLane64( gregOfRM(insn[2]),
9660 1/*upper lane*/ ) );
9661 DIP("movhps %s,%s\n", nameXMMReg( gregOfRM(insn[2]) ),
9662 dis_buf);
9663 goto decode_success;
9664 }
9665 /* else fall through */
9666 }
9667
9668 /* 0F 12 = MOVLPS -- move from mem to low half of XMM. */
9669 /* OF 12 = MOVHLPS -- from from hi half to lo half of XMM. */
9670 if (!has_66_pfx && insn[0] == 0x0F && insn[1] == 0x12) {
9671 modrm = getIByte(delta+2);
9672 if (epartIsReg(modrm)) {
9673 delta += 2+1;
9674 putXMMRegLane64( gregOfRM(modrm),
9675 0/*lower lane*/,
9676 getXMMRegLane64( eregOfRM(modrm), 1 ));
9677 DIP("movhlps %s, %s\n", nameXMMReg(eregOfRM(modrm)),
9678 nameXMMReg(gregOfRM(modrm)));
9679 } else {
9680 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9681 delta += 2+alen;
9682 putXMMRegLane64( gregOfRM(modrm), 0/*lower lane*/,
9683 loadLE(Ity_I64, mkexpr(addr)) );
9684 DIP("movlps %s, %s\n",
9685 dis_buf, nameXMMReg( gregOfRM(modrm) ));
9686 }
9687 goto decode_success;
9688 }
9689
9690 /* 0F 13 = MOVLPS -- move from low half of XMM to mem. */
9691 if (!has_66_pfx && insn[0] == 0x0F && insn[1] == 0x13) {
9692 if (!epartIsReg(insn[2])) {
9693 delta += 2;
9694 addr = disAMode ( &alen, sorb, delta, dis_buf );
9695 delta += alen;
9696 storeLE( mkexpr(addr),
9697 getXMMRegLane64( gregOfRM(insn[2]),
9698 0/*lower lane*/ ) );
9699 DIP("movlps %s, %s\n", nameXMMReg( gregOfRM(insn[2]) ),
9700 dis_buf);
9701 goto decode_success;
9702 }
9703 /* else fall through */
9704 }
9705
9706 /* 0F 50 = MOVMSKPS - move 4 sign bits from 4 x F32 in xmm(E)
9707 to 4 lowest bits of ireg(G) */
9708 if (insn[0] == 0x0F && insn[1] == 0x50) {
9709 modrm = getIByte(delta+2);
9710 if (!has_66_pfx && epartIsReg(modrm)) {
9711 Int src;
9712 t0 = newTemp(Ity_I32);
9713 t1 = newTemp(Ity_I32);
9714 t2 = newTemp(Ity_I32);
9715 t3 = newTemp(Ity_I32);
9716 delta += 2+1;
9717 src = eregOfRM(modrm);
9718 assign( t0, binop( Iop_And32,
9719 binop(Iop_Shr32, getXMMRegLane32(src,0), mkU8(31)),
9720 mkU32(1) ));
9721 assign( t1, binop( Iop_And32,
9722 binop(Iop_Shr32, getXMMRegLane32(src,1), mkU8(30)),
9723 mkU32(2) ));
9724 assign( t2, binop( Iop_And32,
9725 binop(Iop_Shr32, getXMMRegLane32(src,2), mkU8(29)),
9726 mkU32(4) ));
9727 assign( t3, binop( Iop_And32,
9728 binop(Iop_Shr32, getXMMRegLane32(src,3), mkU8(28)),
9729 mkU32(8) ));
9730 putIReg(4, gregOfRM(modrm),
9731 binop(Iop_Or32,
9732 binop(Iop_Or32, mkexpr(t0), mkexpr(t1)),
9733 binop(Iop_Or32, mkexpr(t2), mkexpr(t3))
9734 )
9735 );
9736 DIP("movmskps %s,%s\n", nameXMMReg(src),
9737 nameIReg(4, gregOfRM(modrm)));
9738 goto decode_success;
9739 }
9740 /* else fall through */
9741 }
9742
9743 /* 0F 2B = MOVNTPS -- for us, just a plain SSE store. */
9744 /* 66 0F 2B = MOVNTPD -- for us, just a plain SSE store. */
9745 if (insn[0] == 0x0F && insn[1] == 0x2B) {
9746 modrm = getIByte(delta+2);
9747 if (!epartIsReg(modrm)) {
9748 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9749 gen_SEGV_if_not_16_aligned( addr );
9750 storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) );
9751 DIP("movntp%s %s,%s\n", sz==2 ? "d" : "s",
9752 dis_buf,
9753 nameXMMReg(gregOfRM(modrm)));
9754 delta += 2+alen;
9755 goto decode_success;
9756 }
9757 /* else fall through */
9758 }
9759
9760 /* F3 0F 10 = MOVSS -- move 32 bits from E (mem or lo 1/4 xmm) to G
9761 (lo 1/4 xmm). If E is mem, upper 3/4 of G is zeroed out. */
9762 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x10) {
9763 vassert(!has_66_pfx);
9764 modrm = getIByte(delta+3);
9765 if (epartIsReg(modrm)) {
9766 putXMMRegLane32( gregOfRM(modrm), 0,
9767 getXMMRegLane32( eregOfRM(modrm), 0 ));
9768 DIP("movss %s,%s\n", nameXMMReg(eregOfRM(modrm)),
9769 nameXMMReg(gregOfRM(modrm)));
9770 delta += 3+1;
9771 } else {
9772 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
9773 /* zero bits 127:64 */
9774 putXMMRegLane64( gregOfRM(modrm), 1, mkU64(0) );
9775 /* zero bits 63:32 */
9776 putXMMRegLane32( gregOfRM(modrm), 1, mkU32(0) );
9777 /* write bits 31:0 */
9778 putXMMRegLane32( gregOfRM(modrm), 0,
9779 loadLE(Ity_I32, mkexpr(addr)) );
9780 DIP("movss %s,%s\n", dis_buf,
9781 nameXMMReg(gregOfRM(modrm)));
9782 delta += 3+alen;
9783 }
9784 goto decode_success;
9785 }
9786
9787 /* F3 0F 11 = MOVSS -- move 32 bits from G (lo 1/4 xmm) to E (mem
9788 or lo 1/4 xmm). */
9789 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x11) {
9790 vassert(!has_66_pfx);
9791 modrm = getIByte(delta+3);
9792 if (epartIsReg(modrm)) {
9793 /* fall through, we don't yet have a test case */
9794 } else {
9795 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
9796 storeLE( mkexpr(addr),
9797 getXMMRegLane32(gregOfRM(modrm), 0) );
9798 DIP("movss %s,%s\n", nameXMMReg(gregOfRM(modrm)),
9799 dis_buf);
9800 delta += 3+alen;
9801 goto decode_success;
9802 }
9803 }
9804
9805 /* 0F 59 = MULPS -- mul 32Fx4 from R/M to R */
9806 if (!has_66_pfx && insn[0] == 0x0F && insn[1] == 0x59) {
9807 delta = dis_SSE_E_to_G_all( sorb, delta+2, "mulps", Iop_Mul32Fx4 );
9808 goto decode_success;
9809 }
9810
9811 /* F3 0F 59 = MULSS -- mul 32F0x4 from R/M to R */
9812 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x59) {
9813 vassert(!has_66_pfx);
9814 delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "mulss", Iop_Mul32F0x4 );
9815 goto decode_success;
9816 }
9817
9818 /* 0F 56 = ORPS -- G = G and E */
9819 if (!has_66_pfx && insn[0] == 0x0F && insn[1] == 0x56) {
9820 delta = dis_SSE_E_to_G_all( sorb, delta+2, "orps", Iop_OrV128 );
9821 goto decode_success;
9822 }
9823
9824 /* 0F 53 = RCPPS -- approx reciprocal 32Fx4 from R/M to R */
9825 if (insn[0] == 0x0F && insn[1] == 0x53) {
9826 vassert(!has_66_pfx);
9827 delta = dis_SSE_E_to_G_unary_all( sorb, delta+2,
9828 "rcpps", Iop_RecipEst32Fx4 );
9829 goto decode_success;
9830 }
9831
9832 /* F3 0F 53 = RCPSS -- approx reciprocal 32F0x4 from R/M to R */
9833 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x53) {
9834 vassert(!has_66_pfx);
9835 delta = dis_SSE_E_to_G_unary_lo32( sorb, delta+3,
9836 "rcpss", Iop_RecipEst32F0x4 );
9837 goto decode_success;
9838 }
9839
9840 /* 0F 52 = RSQRTPS -- approx reciprocal sqrt 32Fx4 from R/M to R */
9841 if (insn[0] == 0x0F && insn[1] == 0x52) {
9842 vassert(!has_66_pfx);
9843 delta = dis_SSE_E_to_G_unary_all( sorb, delta+2,
9844 "rsqrtps", Iop_RSqrtEst32Fx4 );
9845 goto decode_success;
9846 }
9847
9848 /* F3 0F 52 = RSQRTSS -- approx reciprocal sqrt 32F0x4 from R/M to R */
9849 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x52) {
9850 vassert(!has_66_pfx);
9851 delta = dis_SSE_E_to_G_unary_lo32( sorb, delta+3,
9852 "rsqrtss", Iop_RSqrtEst32F0x4 );
9853 goto decode_success;
9854 }
9855
9856 /* 0F C6 /r ib = SHUFPS -- shuffle packed F32s */
9857 if (!has_66_pfx && insn[0] == 0x0F && insn[1] == 0xC6) {
9858 Int select;
9859 IRTemp sV, dV;
9860 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
9861 sV = newTemp(Ity_V128);
9862 dV = newTemp(Ity_V128);
9863 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
9864 modrm = insn[2];
9865 assign( dV, getXMMReg(gregOfRM(modrm)) );
9866
9867 if (epartIsReg(modrm)) {
9868 assign( sV, getXMMReg(eregOfRM(modrm)) );
9869 select = (Int)insn[3];
9870 delta += 2+2;
9871 DIP("shufps $%d,%s,%s\n", select,
9872 nameXMMReg(eregOfRM(modrm)),
9873 nameXMMReg(gregOfRM(modrm)));
9874 } else {
9875 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9876 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
9877 select = (Int)insn[2+alen];
9878 delta += 3+alen;
9879 DIP("shufps $%d,%s,%s\n", select,
9880 dis_buf,
9881 nameXMMReg(gregOfRM(modrm)));
9882 }
9883
9884 breakup128to32s( dV, &d3, &d2, &d1, &d0 );
9885 breakup128to32s( sV, &s3, &s2, &s1, &s0 );
9886
9887 # define SELD(n) ((n)==0 ? d0 : ((n)==1 ? d1 : ((n)==2 ? d2 : d3)))
9888 # define SELS(n) ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
9889
9890 putXMMReg(
9891 gregOfRM(modrm),
9892 mk128from32s( SELS((select>>6)&3), SELS((select>>4)&3),
9893 SELD((select>>2)&3), SELD((select>>0)&3) )
9894 );
9895
9896 # undef SELD
9897 # undef SELS
9898
9899 goto decode_success;
9900 }
9901
9902 /* 0F 51 = SQRTPS -- approx sqrt 32Fx4 from R/M to R */
9903 if (!has_66_pfx && insn[0] == 0x0F && insn[1] == 0x51) {
9904 delta = dis_SSE_E_to_G_unary_all( sorb, delta+2,
9905 "sqrtps", Iop_Sqrt32Fx4 );
9906 goto decode_success;
9907 }
9908
9909 /* F3 0F 51 = SQRTSS -- approx sqrt 32F0x4 from R/M to R */
9910 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x51) {
9911 vassert(!has_66_pfx);
9912 delta = dis_SSE_E_to_G_unary_lo32( sorb, delta+3,
9913 "sqrtss", Iop_Sqrt32F0x4 );
9914 goto decode_success;
9915 }
9916
9917 /* 0F AE /3 = STMXCSR m32 -- store %mxcsr */
9918 if (insn[0] == 0x0F && insn[1] == 0xAE
9919 && !epartIsReg(insn[2]) && gregOfRM(insn[2]) == 3) {
9920 modrm = getIByte(delta+2);
9921 vassert(!has_66_pfx);
9922 vassert(!epartIsReg(modrm));
9923
9924 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9925 delta += 2+alen;
9926
9927 /* Fake up a native SSE mxcsr word. The only thing it depends
9928 on is SSEROUND[1:0], so call a clean helper to cook it up.
9929 */
9930 /* UInt x86h_create_mxcsr ( UInt sseround ) */
9931 DIP("stmxcsr %s\n", dis_buf);
9932 storeLE( mkexpr(addr),
9933 mkIRExprCCall(
9934 Ity_I32, 0/*regp*/,
9935 "x86g_create_mxcsr", &x86g_create_mxcsr,
9936 mkIRExprVec_1( get_sse_roundingmode() )
9937 )
9938 );
9939 goto decode_success;
9940 }
9941
9942 /* 0F 5C = SUBPS -- sub 32Fx4 from R/M to R */
9943 if (!has_66_pfx && insn[0] == 0x0F && insn[1] == 0x5C) {
9944 delta = dis_SSE_E_to_G_all( sorb, delta+2, "subps", Iop_Sub32Fx4 );
9945 goto decode_success;
9946 }
9947
9948 /* F3 0F 5C = SUBSS -- sub 32F0x4 from R/M to R */
9949 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5C) {
9950 vassert(!has_66_pfx);
9951 delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "subss", Iop_Sub32F0x4 );
9952 goto decode_success;
9953 }
9954
9955 /* 0F 15 = UNPCKHPS -- unpack and interleave high part F32s */
9956 /* 0F 14 = UNPCKLPS -- unpack and interleave low part F32s */
9957 /* These just appear to be special cases of SHUFPS */
9958 if (!has_66_pfx && insn[0] == 0x0F && (insn[1] == 0x15 || insn[1] == 0x14)) {
9959 IRTemp sV, dV;
9960 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
9961 Bool hi = toBool(insn[1] == 0x15);
9962 sV = newTemp(Ity_V128);
9963 dV = newTemp(Ity_V128);
9964 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
9965 modrm = insn[2];
9966 assign( dV, getXMMReg(gregOfRM(modrm)) );
9967
9968 if (epartIsReg(modrm)) {
9969 assign( sV, getXMMReg(eregOfRM(modrm)) );
9970 delta += 2+1;
9971 DIP("unpck%sps %s,%s\n", hi ? "h" : "l",
9972 nameXMMReg(eregOfRM(modrm)),
9973 nameXMMReg(gregOfRM(modrm)));
9974 } else {
9975 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9976 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
9977 delta += 2+alen;
9978 DIP("unpck%sps %s,%s\n", hi ? "h" : "l",
9979 dis_buf,
9980 nameXMMReg(gregOfRM(modrm)));
9981 }
9982
9983 breakup128to32s( dV, &d3, &d2, &d1, &d0 );
9984 breakup128to32s( sV, &s3, &s2, &s1, &s0 );
9985
9986 if (hi) {
9987 putXMMReg( gregOfRM(modrm), mk128from32s( s3, d3, s2, d2 ) );
9988 } else {
9989 putXMMReg( gregOfRM(modrm), mk128from32s( s1, d1, s0, d0 ) );
9990 }
9991
9992 goto decode_success;
9993 }
9994
9995 /* 0F 57 = XORPS -- G = G and E */
9996 if (!has_66_pfx && insn[0] == 0x0F && insn[1] == 0x57) {
9997 delta = dis_SSE_E_to_G_all( sorb, delta+2, "xorps", Iop_XorV128 );
9998 goto decode_success;
9999 }
10000
10001 /* ---------------------------------------------------- */
10002 /* --- end of the SSE decoder. --- */
10003 /* ---------------------------------------------------- */
10004
10005 /* ---------------------------------------------------- */
10006 /* --- start of the SSE2 decoder. --- */
10007 /* ---------------------------------------------------- */
10008
10009 /* Skip parts of the decoder which don't apply given the stated
10010 guest subarchitecture. */
10011 if (0 == (archinfo->hwcaps & VEX_HWCAPS_X86_SSE2))
10012 goto after_sse_decoders; /* no SSE2 capabilities */
10013
10014 insn = &guest_code[delta];
10015
10016 /* 66 0F 58 = ADDPD -- add 32Fx4 from R/M to R */
10017 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0x58) {
10018 delta = dis_SSE_E_to_G_all( sorb, delta+2, "addpd", Iop_Add64Fx2 );
10019 goto decode_success;
10020 }
10021
10022 /* F2 0F 58 = ADDSD -- add 64F0x2 from R/M to R */
10023 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x58) {
10024 vassert(!has_66_pfx);
10025 delta = dis_SSE_E_to_G_lo64( sorb, delta+3, "addsd", Iop_Add64F0x2 );
10026 goto decode_success;
10027 }
10028
10029 /* 66 0F 55 = ANDNPD -- G = (not G) and E */
10030 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0x55) {
10031 delta = dis_SSE_E_to_G_all_invG( sorb, delta+2, "andnpd", Iop_AndV128 );
10032 goto decode_success;
10033 }
10034
10035 /* 66 0F 54 = ANDPD -- G = G and E */
10036 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0x54) {
10037 delta = dis_SSE_E_to_G_all( sorb, delta+2, "andpd", Iop_AndV128 );
10038 goto decode_success;
10039 }
10040
10041 /* 66 0F C2 = CMPPD -- 64Fx2 comparison from R/M to R */
10042 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0xC2) {
10043 delta = dis_SSEcmp_E_to_G( sorb, delta+2, "cmppd", True, 8 );
10044 goto decode_success;
10045 }
10046
10047 /* F2 0F C2 = CMPSD -- 64F0x2 comparison from R/M to R */
10048 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0xC2) {
10049 vassert(!has_66_pfx);
10050 delta = dis_SSEcmp_E_to_G( sorb, delta+3, "cmpsd", False, 8 );
10051 goto decode_success;
10052 }
10053
10054 /* 66 0F 2F = COMISD -- 64F0x2 comparison G,E, and set ZCP */
10055 /* 66 0F 2E = UCOMISD -- 64F0x2 comparison G,E, and set ZCP */
10056 if (has_66_pfx && insn[0] == 0x0F && (insn[1] == 0x2F || insn[1] == 0x2E)) {
10057 IRTemp argL = newTemp(Ity_F64);
10058 IRTemp argR = newTemp(Ity_F64);
10059 modrm = getIByte(delta+2);
10060 if (epartIsReg(modrm)) {
10061 assign( argR, getXMMRegLane64F( eregOfRM(modrm), 0/*lowest lane*/ ) );
10062 delta += 2+1;
10063 DIP("[u]comisd %s,%s\n", nameXMMReg(eregOfRM(modrm)),
10064 nameXMMReg(gregOfRM(modrm)) );
10065 } else {
10066 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
10067 assign( argR, loadLE(Ity_F64, mkexpr(addr)) );
10068 delta += 2+alen;
10069 DIP("[u]comisd %s,%s\n", dis_buf,
10070 nameXMMReg(gregOfRM(modrm)) );
10071 }
10072 assign( argL, getXMMRegLane64F( gregOfRM(modrm), 0/*lowest lane*/ ) );
10073
10074 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
10075 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
10076 stmt( IRStmt_Put(
10077 OFFB_CC_DEP1,
10078 binop( Iop_And32,
10079 binop(Iop_CmpF64, mkexpr(argL), mkexpr(argR)),
10080 mkU32(0x45)
10081 )));
10082 /* Set NDEP even though it isn't used. This makes redundant-PUT
10083 elimination of previous stores to this field work better. */
10084 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
10085 goto decode_success;
10086 }
10087
10088 /* F3 0F E6 = CVTDQ2PD -- convert 2 x I32 in mem/lo half xmm to 2 x
10089 F64 in xmm(G) */
10090 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0xE6) {
10091 IRTemp arg64 = newTemp(Ity_I64);
10092 vassert(!has_66_pfx);
10093
10094 modrm = getIByte(delta+3);
10095 if (epartIsReg(modrm)) {
10096 assign( arg64, getXMMRegLane64(eregOfRM(modrm), 0) );
10097 delta += 3+1;
10098 DIP("cvtdq2pd %s,%s\n", nameXMMReg(eregOfRM(modrm)),
10099 nameXMMReg(gregOfRM(modrm)));
10100 } else {
10101 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
10102 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
10103 delta += 3+alen;
10104 DIP("cvtdq2pd %s,%s\n", dis_buf,
10105 nameXMMReg(gregOfRM(modrm)) );
10106 }
10107
10108 putXMMRegLane64F(
10109 gregOfRM(modrm), 0,
10110 unop(Iop_I32StoF64, unop(Iop_64to32, mkexpr(arg64)))
10111 );
10112
10113 putXMMRegLane64F(
10114 gregOfRM(modrm), 1,
10115 unop(Iop_I32StoF64, unop(Iop_64HIto32, mkexpr(arg64)))
10116 );
10117
10118 goto decode_success;
10119 }
10120
10121 /* 0F 5B = CVTDQ2PS -- convert 4 x I32 in mem/xmm to 4 x F32 in
10122 xmm(G) */
10123 if (!has_66_pfx && insn[0] == 0x0F && insn[1] == 0x5B) {
10124 IRTemp argV = newTemp(Ity_V128);
10125 IRTemp rmode = newTemp(Ity_I32);
10126
10127 modrm = getIByte(delta+2);
10128 if (epartIsReg(modrm)) {
10129 assign( argV, getXMMReg(eregOfRM(modrm)) );
10130 delta += 2+1;
10131 DIP("cvtdq2ps %s,%s\n", nameXMMReg(eregOfRM(modrm)),
10132 nameXMMReg(gregOfRM(modrm)));
10133 } else {
10134 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
10135 assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
10136 delta += 2+alen;
10137 DIP("cvtdq2ps %s,%s\n", dis_buf,
10138 nameXMMReg(gregOfRM(modrm)) );
10139 }
10140
10141 assign( rmode, get_sse_roundingmode() );
10142 breakup128to32s( argV, &t3, &t2, &t1, &t0 );
10143
10144 # define CVT(_t) binop( Iop_F64toF32, \
10145 mkexpr(rmode), \
10146 unop(Iop_I32StoF64,mkexpr(_t)))
10147
10148 putXMMRegLane32F( gregOfRM(modrm), 3, CVT(t3) );
10149 putXMMRegLane32F( gregOfRM(modrm), 2, CVT(t2) );
10150 putXMMRegLane32F( gregOfRM(modrm), 1, CVT(t1) );
10151 putXMMRegLane32F( gregOfRM(modrm), 0, CVT(t0) );
10152
10153 # undef CVT
10154
10155 goto decode_success;
10156 }
10157
10158 /* F2 0F E6 = CVTPD2DQ -- convert 2 x F64 in mem/xmm to 2 x I32 in
10159 lo half xmm(G), and zero upper half */
10160 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0xE6) {
10161 IRTemp argV = newTemp(Ity_V128);
10162 IRTemp rmode = newTemp(Ity_I32);
10163 vassert(!has_66_pfx);
10164
10165 modrm = getIByte(delta+3);
10166 if (epartIsReg(modrm)) {
10167 assign( argV, getXMMReg(eregOfRM(modrm)) );
10168 delta += 3+1;
10169 DIP("cvtpd2dq %s,%s\n", nameXMMReg(eregOfRM(modrm)),
10170 nameXMMReg(gregOfRM(modrm)));
10171 } else {
10172 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
10173 assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
10174 delta += 3+alen;
10175 DIP("cvtpd2dq %s,%s\n", dis_buf,
10176 nameXMMReg(gregOfRM(modrm)) );
10177 }
10178
10179 assign( rmode, get_sse_roundingmode() );
10180 t0 = newTemp(Ity_F64);
10181 t1 = newTemp(Ity_F64);
10182 assign( t0, unop(Iop_ReinterpI64asF64,
10183 unop(Iop_V128to64, mkexpr(argV))) );
10184 assign( t1, unop(Iop_ReinterpI64asF64,
10185 unop(Iop_V128HIto64, mkexpr(argV))) );
10186
10187 # define CVT(_t) binop( Iop_F64toI32S, \
10188 mkexpr(rmode), \
10189 mkexpr(_t) )
10190
10191 putXMMRegLane32( gregOfRM(modrm), 3, mkU32(0) );
10192 putXMMRegLane32( gregOfRM(modrm), 2, mkU32(0) );
10193 putXMMRegLane32( gregOfRM(modrm), 1, CVT(t1) );
10194 putXMMRegLane32( gregOfRM(modrm), 0, CVT(t0) );
10195
10196 # undef CVT
10197
10198 goto decode_success;
10199 }
10200
10201 /* 66 0F 2D = CVTPD2PI -- convert 2 x F64 in mem/xmm to 2 x
10202 I32 in mmx, according to prevailing SSE rounding mode */
10203 /* 66 0F 2C = CVTTPD2PI -- convert 2 x F64 in mem/xmm to 2 x
10204 I32 in mmx, rounding towards zero */
10205 if (has_66_pfx && insn[0] == 0x0F && (insn[1] == 0x2D || insn[1] == 0x2C)) {
10206 IRTemp dst64 = newTemp(Ity_I64);
10207 IRTemp rmode = newTemp(Ity_I32);
10208 IRTemp f64lo = newTemp(Ity_F64);
10209 IRTemp f64hi = newTemp(Ity_F64);
10210 Bool r2zero = toBool(insn[1] == 0x2C);
10211
10212 do_MMX_preamble();
10213 modrm = getIByte(delta+2);
10214
10215 if (epartIsReg(modrm)) {
10216 delta += 2+1;
10217 assign(f64lo, getXMMRegLane64F(eregOfRM(modrm), 0));
10218 assign(f64hi, getXMMRegLane64F(eregOfRM(modrm), 1));
10219 DIP("cvt%spd2pi %s,%s\n", r2zero ? "t" : "",
10220 nameXMMReg(eregOfRM(modrm)),
10221 nameMMXReg(gregOfRM(modrm)));
10222 } else {
10223 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
10224 assign(f64lo, loadLE(Ity_F64, mkexpr(addr)));
10225 assign(f64hi, loadLE(Ity_F64, binop( Iop_Add32,
10226 mkexpr(addr),
10227 mkU32(8) )));
10228 delta += 2+alen;
10229 DIP("cvt%spf2pi %s,%s\n", r2zero ? "t" : "",
10230 dis_buf,
10231 nameMMXReg(gregOfRM(modrm)));
10232 }
10233
10234 if (r2zero) {
10235 assign(rmode, mkU32((UInt)Irrm_ZERO) );
10236 } else {
10237 assign( rmode, get_sse_roundingmode() );
10238 }
10239
10240 assign(
10241 dst64,
10242 binop( Iop_32HLto64,
10243 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64hi) ),
10244 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64lo) )
10245 )
10246 );
10247
10248 putMMXReg(gregOfRM(modrm), mkexpr(dst64));
10249 goto decode_success;
10250 }
10251
10252 /* 66 0F 5A = CVTPD2PS -- convert 2 x F64 in mem/xmm to 2 x F32 in
10253 lo half xmm(G), and zero upper half */
10254 /* Note, this is practically identical to CVTPD2DQ. It would have
10255 been nicer to merge them together, but the insn[] offsets differ
10256 by one. */
10257 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0x5A) {
10258 IRTemp argV = newTemp(Ity_V128);
10259 IRTemp rmode = newTemp(Ity_I32);
10260
10261 modrm = getIByte(delta+2);
10262 if (epartIsReg(modrm)) {
10263 assign( argV, getXMMReg(eregOfRM(modrm)) );
10264 delta += 2+1;
10265 DIP("cvtpd2ps %s,%s\n", nameXMMReg(eregOfRM(modrm)),
10266 nameXMMReg(gregOfRM(modrm)));
10267 } else {
10268 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
10269 assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
10270 delta += 2+alen;
10271 DIP("cvtpd2ps %s,%s\n", dis_buf,
10272 nameXMMReg(gregOfRM(modrm)) );
10273 }
10274
10275 assign( rmode, get_sse_roundingmode() );
10276 t0 = newTemp(Ity_F64);
10277 t1 = newTemp(Ity_F64);
10278 assign( t0, unop(Iop_ReinterpI64asF64,
10279 unop(Iop_V128to64, mkexpr(argV))) );
10280 assign( t1, unop(Iop_ReinterpI64asF64,
10281 unop(Iop_V128HIto64, mkexpr(argV))) );
10282
10283 # define CVT(_t) binop( Iop_F64toF32, \
10284 mkexpr(rmode), \
10285 mkexpr(_t) )
10286
10287 putXMMRegLane32( gregOfRM(modrm), 3, mkU32(0) );
10288 putXMMRegLane32( gregOfRM(modrm), 2, mkU32(0) );
10289 putXMMRegLane32F( gregOfRM(modrm), 1, CVT(t1) );
10290 putXMMRegLane32F( gregOfRM(modrm), 0, CVT(t0) );
10291
10292 # undef CVT
10293
10294 goto decode_success;
10295 }
10296
10297 /* 66 0F 2A = CVTPI2PD -- convert 2 x I32 in mem/mmx to 2 x F64 in
10298 xmm(G) */
10299 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0x2A) {
10300 IRTemp arg64 = newTemp(Ity_I64);
10301
10302 modrm = getIByte(delta+2);
10303 if (epartIsReg(modrm)) {
10304 /* Only switch to MMX mode if the source is a MMX register.
10305 This is inconsistent with all other instructions which
10306 convert between XMM and (M64 or MMX), which always switch
10307 to MMX mode even if 64-bit operand is M64 and not MMX. At
10308 least, that's what the Intel docs seem to me to say.
10309 Fixes #210264. */
10310 do_MMX_preamble();
10311 assign( arg64, getMMXReg(eregOfRM(modrm)) );
10312 delta += 2+1;
10313 DIP("cvtpi2pd %s,%s\n", nameMMXReg(eregOfRM(modrm)),
10314 nameXMMReg(gregOfRM(modrm)));
10315 } else {
10316 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
10317 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
10318 delta += 2+alen;
10319 DIP("cvtpi2pd %s,%s\n", dis_buf,
10320 nameXMMReg(gregOfRM(modrm)) );
10321 }
10322
10323 putXMMRegLane64F(
10324 gregOfRM(modrm), 0,
10325 unop(Iop_I32StoF64, unop(Iop_64to32, mkexpr(arg64)) )
10326 );
10327
10328 putXMMRegLane64F(
10329 gregOfRM(modrm), 1,
10330 unop(Iop_I32StoF64, unop(Iop_64HIto32, mkexpr(arg64)) )
10331 );
10332
10333 goto decode_success;
10334 }
10335
10336 /* 66 0F 5B = CVTPS2DQ -- convert 4 x F32 in mem/xmm to 4 x I32 in
10337 xmm(G) */
10338 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0x5B) {
10339 IRTemp argV = newTemp(Ity_V128);
10340 IRTemp rmode = newTemp(Ity_I32);
10341
10342 modrm = getIByte(delta+2);
10343 if (epartIsReg(modrm)) {
10344 assign( argV, getXMMReg(eregOfRM(modrm)) );
10345 delta += 2+1;
10346 DIP("cvtps2dq %s,%s\n", nameXMMReg(eregOfRM(modrm)),
10347 nameXMMReg(gregOfRM(modrm)));
10348 } else {
10349 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
10350 assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
10351 delta += 2+alen;
10352 DIP("cvtps2dq %s,%s\n", dis_buf,
10353 nameXMMReg(gregOfRM(modrm)) );
10354 }
10355
10356 assign( rmode, get_sse_roundingmode() );
10357 breakup128to32s( argV, &t3, &t2, &t1, &t0 );
10358
10359 /* This is less than ideal. If it turns out to be a performance
10360 bottleneck it can be improved. */
10361 # define CVT(_t) \
10362 binop( Iop_F64toI32S, \
10363 mkexpr(rmode), \
10364 unop( Iop_F32toF64, \
10365 unop( Iop_ReinterpI32asF32, mkexpr(_t))) )
10366
10367 putXMMRegLane32( gregOfRM(modrm), 3, CVT(t3) );
10368 putXMMRegLane32( gregOfRM(modrm), 2, CVT(t2) );
10369 putXMMRegLane32( gregOfRM(modrm), 1, CVT(t1) );
10370 putXMMRegLane32( gregOfRM(modrm), 0, CVT(t0) );
10371
10372 # undef CVT
10373
10374 goto decode_success;
10375 }
10376
10377 /* 0F 5A = CVTPS2PD -- convert 2 x F32 in low half mem/xmm to 2 x
10378 F64 in xmm(G). */
10379 if (!has_66_pfx && insn[0] == 0x0F && insn[1] == 0x5A) {
10380 IRTemp f32lo = newTemp(Ity_F32);
10381 IRTemp f32hi = newTemp(Ity_F32);
10382
10383 modrm = getIByte(delta+2);
10384 if (epartIsReg(modrm)) {
10385 assign( f32lo, getXMMRegLane32F(eregOfRM(modrm), 0) );
10386 assign( f32hi, getXMMRegLane32F(eregOfRM(modrm), 1) );
10387 delta += 2+1;
10388 DIP("cvtps2pd %s,%s\n", nameXMMReg(eregOfRM(modrm)),
10389 nameXMMReg(gregOfRM(modrm)));
10390 } else {
10391 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
10392 assign( f32lo, loadLE(Ity_F32, mkexpr(addr)) );
10393 assign( f32hi, loadLE(Ity_F32,
10394 binop(Iop_Add32,mkexpr(addr),mkU32(4))) );
10395 delta += 2+alen;
10396 DIP("cvtps2pd %s,%s\n", dis_buf,
10397 nameXMMReg(gregOfRM(modrm)) );
10398 }
10399
10400 putXMMRegLane64F( gregOfRM(modrm), 1,
10401 unop(Iop_F32toF64, mkexpr(f32hi)) );
10402 putXMMRegLane64F( gregOfRM(modrm), 0,
10403 unop(Iop_F32toF64, mkexpr(f32lo)) );
10404
10405 goto decode_success;
10406 }
10407
10408 /* F2 0F 2D = CVTSD2SI -- convert F64 in mem/low half xmm to
10409 I32 in ireg, according to prevailing SSE rounding mode */
10410 /* F2 0F 2C = CVTTSD2SI -- convert F64 in mem/low half xmm to
10411 I32 in ireg, rounding towards zero */
10412 if (insn[0] == 0xF2 && insn[1] == 0x0F
10413 && (insn[2] == 0x2D || insn[2] == 0x2C)) {
10414 IRTemp rmode = newTemp(Ity_I32);
10415 IRTemp f64lo = newTemp(Ity_F64);
10416 Bool r2zero = toBool(insn[2] == 0x2C);
10417 vassert(!has_66_pfx);
10418
10419 modrm = getIByte(delta+3);
10420 if (epartIsReg(modrm)) {
10421 delta += 3+1;
10422 assign(f64lo, getXMMRegLane64F(eregOfRM(modrm), 0));
10423 DIP("cvt%ssd2si %s,%s\n", r2zero ? "t" : "",
10424 nameXMMReg(eregOfRM(modrm)),
10425 nameIReg(4, gregOfRM(modrm)));
10426 } else {
10427 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
10428 assign(f64lo, loadLE(Ity_F64, mkexpr(addr)));
10429 delta += 3+alen;
10430 DIP("cvt%ssd2si %s,%s\n", r2zero ? "t" : "",
10431 dis_buf,
10432 nameIReg(4, gregOfRM(modrm)));
10433 }
10434
10435 if (r2zero) {
10436 assign( rmode, mkU32((UInt)Irrm_ZERO) );
10437 } else {
10438 assign( rmode, get_sse_roundingmode() );
10439 }
10440
10441 putIReg(4, gregOfRM(modrm),
10442 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64lo)) );
10443
10444 goto decode_success;
10445 }
10446
10447 /* F2 0F 5A = CVTSD2SS -- convert F64 in mem/low half xmm to F32 in
10448 low 1/4 xmm(G), according to prevailing SSE rounding mode */
10449 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x5A) {
10450 IRTemp rmode = newTemp(Ity_I32);
10451 IRTemp f64lo = newTemp(Ity_F64);
10452 vassert(!has_66_pfx);
10453
10454 modrm = getIByte(delta+3);
10455 if (epartIsReg(modrm)) {
10456 delta += 3+1;
10457 assign(f64lo, getXMMRegLane64F(eregOfRM(modrm), 0));
10458 DIP("cvtsd2ss %s,%s\n", nameXMMReg(eregOfRM(modrm)),
10459 nameXMMReg(gregOfRM(modrm)));
10460 } else {
10461 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
10462 assign(f64lo, loadLE(Ity_F64, mkexpr(addr)));
10463 delta += 3+alen;
10464 DIP("cvtsd2ss %s,%s\n", dis_buf,
10465 nameXMMReg(gregOfRM(modrm)));
10466 }
10467
10468 assign( rmode, get_sse_roundingmode() );
10469 putXMMRegLane32F(
10470 gregOfRM(modrm), 0,
10471 binop( Iop_F64toF32, mkexpr(rmode), mkexpr(f64lo) )
10472 );
10473
10474 goto decode_success;
10475 }
10476
10477 /* F2 0F 2A = CVTSI2SD -- convert I32 in mem/ireg to F64 in low
10478 half xmm */
10479 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x2A) {
10480 IRTemp arg32 = newTemp(Ity_I32);
10481 vassert(!has_66_pfx);
10482
10483 modrm = getIByte(delta+3);
10484 if (epartIsReg(modrm)) {
10485 assign( arg32, getIReg(4, eregOfRM(modrm)) );
10486 delta += 3+1;
10487 DIP("cvtsi2sd %s,%s\n", nameIReg(4, eregOfRM(modrm)),
10488 nameXMMReg(gregOfRM(modrm)));
10489 } else {
10490 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
10491 assign( arg32, loadLE(Ity_I32, mkexpr(addr)) );
10492 delta += 3+alen;
10493 DIP("cvtsi2sd %s,%s\n", dis_buf,
10494 nameXMMReg(gregOfRM(modrm)) );
10495 }
10496
10497 putXMMRegLane64F(
10498 gregOfRM(modrm), 0,
10499 unop(Iop_I32StoF64, mkexpr(arg32)) );
10500
10501 goto decode_success;
10502 }
10503
10504 /* F3 0F 5A = CVTSS2SD -- convert F32 in mem/low 1/4 xmm to F64 in
10505 low half xmm(G) */
10506 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5A) {
10507 IRTemp f32lo = newTemp(Ity_F32);
10508 vassert(!has_66_pfx);
10509
10510 modrm = getIByte(delta+3);
10511 if (epartIsReg(modrm)) {
10512 delta += 3+1;
10513 assign(f32lo, getXMMRegLane32F(eregOfRM(modrm), 0));
10514 DIP("cvtss2sd %s,%s\n", nameXMMReg(eregOfRM(modrm)),
10515 nameXMMReg(gregOfRM(modrm)));
10516 } else {
10517 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
10518 assign(f32lo, loadLE(Ity_F32, mkexpr(addr)));
10519 delta += 3+alen;
10520 DIP("cvtss2sd %s,%s\n", dis_buf,
10521 nameXMMReg(gregOfRM(modrm)));
10522 }
10523
10524 putXMMRegLane64F( gregOfRM(modrm), 0,
10525 unop( Iop_F32toF64, mkexpr(f32lo) ) );
10526
10527 goto decode_success;
10528 }
10529
10530 /* 66 0F E6 = CVTTPD2DQ -- convert 2 x F64 in mem/xmm to 2 x I32 in
10531 lo half xmm(G), and zero upper half, rounding towards zero */
10532 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0xE6) {
10533 IRTemp argV = newTemp(Ity_V128);
10534 IRTemp rmode = newTemp(Ity_I32);
10535
10536 modrm = getIByte(delta+2);
10537 if (epartIsReg(modrm)) {
10538 assign( argV, getXMMReg(eregOfRM(modrm)) );
10539 delta += 2+1;
10540 DIP("cvttpd2dq %s,%s\n", nameXMMReg(eregOfRM(modrm)),
10541 nameXMMReg(gregOfRM(modrm)));
10542 } else {
10543 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
10544 assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
10545 delta += 2+alen;
10546 DIP("cvttpd2dq %s,%s\n", dis_buf,
10547 nameXMMReg(gregOfRM(modrm)) );
10548 }
10549
10550 assign( rmode, mkU32((UInt)Irrm_ZERO) );
10551
10552 t0 = newTemp(Ity_F64);
10553 t1 = newTemp(Ity_F64);
10554 assign( t0, unop(Iop_ReinterpI64asF64,
10555 unop(Iop_V128to64, mkexpr(argV))) );
10556 assign( t1, unop(Iop_ReinterpI64asF64,
10557 unop(Iop_V128HIto64, mkexpr(argV))) );
10558
10559 # define CVT(_t) binop( Iop_F64toI32S, \
10560 mkexpr(rmode), \
10561 mkexpr(_t) )
10562
10563 putXMMRegLane32( gregOfRM(modrm), 3, mkU32(0) );
10564 putXMMRegLane32( gregOfRM(modrm), 2, mkU32(0) );
10565 putXMMRegLane32( gregOfRM(modrm), 1, CVT(t1) );
10566 putXMMRegLane32( gregOfRM(modrm), 0, CVT(t0) );
10567
10568 # undef CVT
10569
10570 goto decode_success;
10571 }
10572
10573 /* F3 0F 5B = CVTTPS2DQ -- convert 4 x F32 in mem/xmm to 4 x I32 in
10574 xmm(G), rounding towards zero */
10575 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5B) {
10576 IRTemp argV = newTemp(Ity_V128);
10577 IRTemp rmode = newTemp(Ity_I32);
10578 vassert(!has_66_pfx);
10579
10580 modrm = getIByte(delta+3);
10581 if (epartIsReg(modrm)) {
10582 assign( argV, getXMMReg(eregOfRM(modrm)) );
10583 delta += 3+1;
10584 DIP("cvttps2dq %s,%s\n", nameXMMReg(eregOfRM(modrm)),
10585 nameXMMReg(gregOfRM(modrm)));
10586 } else {
10587 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
10588 assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
10589 delta += 3+alen;
10590 DIP("cvttps2dq %s,%s\n", dis_buf,
10591 nameXMMReg(gregOfRM(modrm)) );
10592 }
10593
10594 assign( rmode, mkU32((UInt)Irrm_ZERO) );
10595 breakup128to32s( argV, &t3, &t2, &t1, &t0 );
10596
10597 /* This is less than ideal. If it turns out to be a performance
10598 bottleneck it can be improved. */
10599 # define CVT(_t) \
10600 binop( Iop_F64toI32S, \
10601 mkexpr(rmode), \
10602 unop( Iop_F32toF64, \
10603 unop( Iop_ReinterpI32asF32, mkexpr(_t))) )
10604
10605 putXMMRegLane32( gregOfRM(modrm), 3, CVT(t3) );
10606 putXMMRegLane32( gregOfRM(modrm), 2, CVT(t2) );
10607 putXMMRegLane32( gregOfRM(modrm), 1, CVT(t1) );
10608 putXMMRegLane32( gregOfRM(modrm), 0, CVT(t0) );
10609
10610 # undef CVT
10611
10612 goto decode_success;
10613 }
10614
10615 /* 66 0F 5E = DIVPD -- div 64Fx2 from R/M to R */
10616 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0x5E) {
10617 delta = dis_SSE_E_to_G_all( sorb, delta+2, "divpd", Iop_Div64Fx2 );
10618 goto decode_success;
10619 }
10620
10621 /* F2 0F 5E = DIVSD -- div 64F0x2 from R/M to R */
10622 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x5E) {
10623 vassert(!has_66_pfx);
10624 delta = dis_SSE_E_to_G_lo64( sorb, delta+3, "divsd", Iop_Div64F0x2 );
10625 goto decode_success;
10626 }
10627
10628 /* 0F AE /5 = LFENCE -- flush pending operations to memory */
10629 /* 0F AE /6 = MFENCE -- flush pending operations to memory */
10630 if (insn[0] == 0x0F && insn[1] == 0xAE
10631 && epartIsReg(insn[2])
10632 && (gregOfRM(insn[2]) == 5 || gregOfRM(insn[2]) == 6)) {
10633 vassert(!has_66_pfx);
10634 delta += 3;
10635 /* Insert a memory fence. It's sometimes important that these
10636 are carried through to the generated code. */
10637 stmt( IRStmt_MBE(Imbe_Fence) );
10638 DIP("%sfence\n", gregOfRM(insn[2])==5 ? "l" : "m");
10639 goto decode_success;
10640 }
10641
10642 /* 66 0F 5F = MAXPD -- max 64Fx2 from R/M to R */
10643 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0x5F) {
10644 delta = dis_SSE_E_to_G_all( sorb, delta+2, "maxpd", Iop_Max64Fx2 );
10645 goto decode_success;
10646 }
10647
10648 /* F2 0F 5F = MAXSD -- max 64F0x2 from R/M to R */
10649 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x5F) {
10650 vassert(!has_66_pfx);
10651 delta = dis_SSE_E_to_G_lo64( sorb, delta+3, "maxsd", Iop_Max64F0x2 );
10652 goto decode_success;
10653 }
10654
10655 /* 66 0F 5D = MINPD -- min 64Fx2 from R/M to R */
10656 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0x5D) {
10657 delta = dis_SSE_E_to_G_all( sorb, delta+2, "minpd", Iop_Min64Fx2 );
10658 goto decode_success;
10659 }
10660
10661 /* F2 0F 5D = MINSD -- min 64F0x2 from R/M to R */
10662 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x5D) {
10663 vassert(!has_66_pfx);
10664 delta = dis_SSE_E_to_G_lo64( sorb, delta+3, "minsd", Iop_Min64F0x2 );
10665 goto decode_success;
10666 }
10667
10668 /* 66 0F 28 = MOVAPD -- move from E (mem or xmm) to G (xmm). */
10669 /* 66 0F 10 = MOVUPD -- move from E (mem or xmm) to G (xmm). */
10670 /* 66 0F 6F = MOVDQA -- move from E (mem or xmm) to G (xmm). */
10671 if (has_66_pfx && insn[0] == 0x0F
10672 && (insn[1] == 0x28 || insn[1] == 0x10 || insn[1] == 0x6F)) {
10673 const HChar* wot = insn[1]==0x28 ? "apd" :
10674 insn[1]==0x10 ? "upd" : "dqa";
10675 modrm = getIByte(delta+2);
10676 if (epartIsReg(modrm)) {
10677 putXMMReg( gregOfRM(modrm),
10678 getXMMReg( eregOfRM(modrm) ));
10679 DIP("mov%s %s,%s\n", wot, nameXMMReg(eregOfRM(modrm)),
10680 nameXMMReg(gregOfRM(modrm)));
10681 delta += 2+1;
10682 } else {
10683 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
10684 if (insn[1] == 0x28/*movapd*/ || insn[1] == 0x6F/*movdqa*/)
10685 gen_SEGV_if_not_16_aligned( addr );
10686 putXMMReg( gregOfRM(modrm),
10687 loadLE(Ity_V128, mkexpr(addr)) );
10688 DIP("mov%s %s,%s\n", wot, dis_buf,
10689 nameXMMReg(gregOfRM(modrm)));
10690 delta += 2+alen;
10691 }
10692 goto decode_success;
10693 }
10694
10695 /* 66 0F 29 = MOVAPD -- move from G (xmm) to E (mem or xmm). */
10696 /* 66 0F 11 = MOVUPD -- move from G (xmm) to E (mem or xmm). */
10697 if (has_66_pfx && insn[0] == 0x0F
10698 && (insn[1] == 0x29 || insn[1] == 0x11)) {
10699 const HChar* wot = insn[1]==0x29 ? "apd" : "upd";
10700 modrm = getIByte(delta+2);
10701 if (epartIsReg(modrm)) {
10702 /* fall through; awaiting test case */
10703 } else {
10704 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
10705 if (insn[1] == 0x29/*movapd*/)
10706 gen_SEGV_if_not_16_aligned( addr );
10707 storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) );
10708 DIP("mov%s %s,%s\n", wot, nameXMMReg(gregOfRM(modrm)),
10709 dis_buf );
10710 delta += 2+alen;
10711 goto decode_success;
10712 }
10713 }
10714
10715 /* 66 0F 6E = MOVD from r/m32 to xmm, zeroing high 3/4 of xmm. */
10716 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0x6E) {
10717 modrm = getIByte(delta+2);
10718 if (epartIsReg(modrm)) {
10719 delta += 2+1;
10720 putXMMReg(
10721 gregOfRM(modrm),
10722 unop( Iop_32UtoV128, getIReg(4, eregOfRM(modrm)) )
10723 );
10724 DIP("movd %s, %s\n",
10725 nameIReg(4,eregOfRM(modrm)), nameXMMReg(gregOfRM(modrm)));
10726 } else {
10727 addr = disAMode( &alen, sorb, delta+2, dis_buf );
10728 delta += 2+alen;
10729 putXMMReg(
10730 gregOfRM(modrm),
10731 unop( Iop_32UtoV128,loadLE(Ity_I32, mkexpr(addr)) )
10732 );
10733 DIP("movd %s, %s\n", dis_buf, nameXMMReg(gregOfRM(modrm)));
10734 }
10735 goto decode_success;
10736 }
10737
10738 /* 66 0F 7E = MOVD from xmm low 1/4 to r/m32. */
10739 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0x7E) {
10740 modrm = getIByte(delta+2);
10741 if (epartIsReg(modrm)) {
10742 delta += 2+1;
10743 putIReg( 4, eregOfRM(modrm),
10744 getXMMRegLane32(gregOfRM(modrm), 0) );
10745 DIP("movd %s, %s\n",
10746 nameXMMReg(gregOfRM(modrm)), nameIReg(4,eregOfRM(modrm)));
10747 } else {
10748 addr = disAMode( &alen, sorb, delta+2, dis_buf );
10749 delta += 2+alen;
10750 storeLE( mkexpr(addr),
10751 getXMMRegLane32(gregOfRM(modrm), 0) );
10752 DIP("movd %s, %s\n", nameXMMReg(gregOfRM(modrm)), dis_buf);
10753 }
10754 goto decode_success;
10755 }
10756
10757 /* 66 0F 7F = MOVDQA -- move from G (xmm) to E (mem or xmm). */
10758 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0x7F) {
10759 modrm = getIByte(delta+2);
10760 if (epartIsReg(modrm)) {
10761 delta += 2+1;
10762 putXMMReg( eregOfRM(modrm),
10763 getXMMReg(gregOfRM(modrm)) );
10764 DIP("movdqa %s, %s\n", nameXMMReg(gregOfRM(modrm)),
10765 nameXMMReg(eregOfRM(modrm)));
10766 } else {
10767 addr = disAMode( &alen, sorb, delta+2, dis_buf );
10768 delta += 2+alen;
10769 gen_SEGV_if_not_16_aligned( addr );
10770 storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) );
10771 DIP("movdqa %s, %s\n", nameXMMReg(gregOfRM(modrm)), dis_buf);
10772 }
10773 goto decode_success;
10774 }
10775
10776 /* F3 0F 6F = MOVDQU -- move from E (mem or xmm) to G (xmm). */
10777 /* Unfortunately can't simply use the MOVDQA case since the
10778 prefix lengths are different (66 vs F3) */
10779 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x6F) {
10780 vassert(!has_66_pfx);
10781 modrm = getIByte(delta+3);
10782 if (epartIsReg(modrm)) {
10783 putXMMReg( gregOfRM(modrm),
10784 getXMMReg( eregOfRM(modrm) ));
10785 DIP("movdqu %s,%s\n", nameXMMReg(eregOfRM(modrm)),
10786 nameXMMReg(gregOfRM(modrm)));
10787 delta += 3+1;
10788 } else {
10789 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
10790 putXMMReg( gregOfRM(modrm),
10791 loadLE(Ity_V128, mkexpr(addr)) );
10792 DIP("movdqu %s,%s\n", dis_buf,
10793 nameXMMReg(gregOfRM(modrm)));
10794 delta += 3+alen;
10795 }
10796 goto decode_success;
10797 }
10798
10799 /* F3 0F 7F = MOVDQU -- move from G (xmm) to E (mem or xmm). */
10800 /* Unfortunately can't simply use the MOVDQA case since the
10801 prefix lengths are different (66 vs F3) */
10802 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x7F) {
10803 vassert(!has_66_pfx);
10804 modrm = getIByte(delta+3);
10805 if (epartIsReg(modrm)) {
10806 delta += 3+1;
10807 putXMMReg( eregOfRM(modrm),
10808 getXMMReg(gregOfRM(modrm)) );
10809 DIP("movdqu %s, %s\n", nameXMMReg(gregOfRM(modrm)),
10810 nameXMMReg(eregOfRM(modrm)));
10811 } else {
10812 addr = disAMode( &alen, sorb, delta+3, dis_buf );
10813 delta += 3+alen;
10814 storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) );
10815 DIP("movdqu %s, %s\n", nameXMMReg(gregOfRM(modrm)), dis_buf);
10816 }
10817 goto decode_success;
10818 }
10819
10820 /* F2 0F D6 = MOVDQ2Q -- move from E (lo half xmm, not mem) to G (mmx). */
10821 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0xD6) {
10822 vassert(!has_66_pfx);
10823 modrm = getIByte(delta+3);
10824 if (epartIsReg(modrm)) {
10825 do_MMX_preamble();
10826 putMMXReg( gregOfRM(modrm),
10827 getXMMRegLane64( eregOfRM(modrm), 0 ));
10828 DIP("movdq2q %s,%s\n", nameXMMReg(eregOfRM(modrm)),
10829 nameMMXReg(gregOfRM(modrm)));
10830 delta += 3+1;
10831 goto decode_success;
10832 } else {
10833 /* fall through, apparently no mem case for this insn */
10834 }
10835 }
10836
10837 /* 66 0F 16 = MOVHPD -- move from mem to high half of XMM. */
10838 /* These seems identical to MOVHPS. This instruction encoding is
10839 completely crazy. */
10840 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0x16) {
10841 modrm = getIByte(delta+2);
10842 if (epartIsReg(modrm)) {
10843 /* fall through; apparently reg-reg is not possible */
10844 } else {
10845 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
10846 delta += 2+alen;
10847 putXMMRegLane64( gregOfRM(modrm), 1/*upper lane*/,
10848 loadLE(Ity_I64, mkexpr(addr)) );
10849 DIP("movhpd %s,%s\n", dis_buf,
10850 nameXMMReg( gregOfRM(modrm) ));
10851 goto decode_success;
10852 }
10853 }
10854
10855 /* 66 0F 17 = MOVHPD -- move from high half of XMM to mem. */
10856 /* Again, this seems identical to MOVHPS. */
10857 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0x17) {
10858 if (!epartIsReg(insn[2])) {
10859 delta += 2;
10860 addr = disAMode ( &alen, sorb, delta, dis_buf );
10861 delta += alen;
10862 storeLE( mkexpr(addr),
10863 getXMMRegLane64( gregOfRM(insn[2]),
10864 1/*upper lane*/ ) );
10865 DIP("movhpd %s,%s\n", nameXMMReg( gregOfRM(insn[2]) ),
10866 dis_buf);
10867 goto decode_success;
10868 }
10869 /* else fall through */
10870 }
10871
10872 /* 66 0F 12 = MOVLPD -- move from mem to low half of XMM. */
10873 /* Identical to MOVLPS ? */
10874 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0x12) {
10875 modrm = getIByte(delta+2);
10876 if (epartIsReg(modrm)) {
10877 /* fall through; apparently reg-reg is not possible */
10878 } else {
10879 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
10880 delta += 2+alen;
10881 putXMMRegLane64( gregOfRM(modrm), 0/*lower lane*/,
10882 loadLE(Ity_I64, mkexpr(addr)) );
10883 DIP("movlpd %s, %s\n",
10884 dis_buf, nameXMMReg( gregOfRM(modrm) ));
10885 goto decode_success;
10886 }
10887 }
10888
10889 /* 66 0F 13 = MOVLPD -- move from low half of XMM to mem. */
10890 /* Identical to MOVLPS ? */
10891 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0x13) {
10892 if (!epartIsReg(insn[2])) {
10893 delta += 2;
10894 addr = disAMode ( &alen, sorb, delta, dis_buf );
10895 delta += alen;
10896 storeLE( mkexpr(addr),
10897 getXMMRegLane64( gregOfRM(insn[2]),
10898 0/*lower lane*/ ) );
10899 DIP("movlpd %s, %s\n", nameXMMReg( gregOfRM(insn[2]) ),
10900 dis_buf);
10901 goto decode_success;
10902 }
10903 /* else fall through */
10904 }
10905
10906 /* 66 0F 50 = MOVMSKPD - move 2 sign bits from 2 x F64 in xmm(E) to
10907 2 lowest bits of ireg(G) */
10908 if (insn[0] == 0x0F && insn[1] == 0x50) {
10909 modrm = getIByte(delta+2);
10910 if (has_66_pfx && epartIsReg(modrm)) {
10911 Int src;
10912 t0 = newTemp(Ity_I32);
10913 t1 = newTemp(Ity_I32);
10914 delta += 2+1;
10915 src = eregOfRM(modrm);
10916 assign( t0, binop( Iop_And32,
10917 binop(Iop_Shr32, getXMMRegLane32(src,1), mkU8(31)),
10918 mkU32(1) ));
10919 assign( t1, binop( Iop_And32,
10920 binop(Iop_Shr32, getXMMRegLane32(src,3), mkU8(30)),
10921 mkU32(2) ));
10922 putIReg(4, gregOfRM(modrm),
10923 binop(Iop_Or32, mkexpr(t0), mkexpr(t1))
10924 );
10925 DIP("movmskpd %s,%s\n", nameXMMReg(src),
10926 nameIReg(4, gregOfRM(modrm)));
10927 goto decode_success;
10928 }
10929 /* else fall through */
10930 }
10931
10932 /* 66 0F F7 = MASKMOVDQU -- store selected bytes of double quadword */
10933 if (insn[0] == 0x0F && insn[1] == 0xF7) {
10934 modrm = getIByte(delta+2);
10935 if (has_66_pfx && epartIsReg(modrm)) {
10936 IRTemp regD = newTemp(Ity_V128);
10937 IRTemp mask = newTemp(Ity_V128);
10938 IRTemp olddata = newTemp(Ity_V128);
10939 IRTemp newdata = newTemp(Ity_V128);
10940 addr = newTemp(Ity_I32);
10941
10942 assign( addr, handleSegOverride( sorb, getIReg(4, R_EDI) ));
10943 assign( regD, getXMMReg( gregOfRM(modrm) ));
10944
10945 /* Unfortunately can't do the obvious thing with SarN8x16
10946 here since that can't be re-emitted as SSE2 code - no such
10947 insn. */
10948 assign(
10949 mask,
10950 binop(Iop_64HLtoV128,
10951 binop(Iop_SarN8x8,
10952 getXMMRegLane64( eregOfRM(modrm), 1 ),
10953 mkU8(7) ),
10954 binop(Iop_SarN8x8,
10955 getXMMRegLane64( eregOfRM(modrm), 0 ),
10956 mkU8(7) ) ));
10957 assign( olddata, loadLE( Ity_V128, mkexpr(addr) ));
10958 assign( newdata,
10959 binop(Iop_OrV128,
10960 binop(Iop_AndV128,
10961 mkexpr(regD),
10962 mkexpr(mask) ),
10963 binop(Iop_AndV128,
10964 mkexpr(olddata),
10965 unop(Iop_NotV128, mkexpr(mask)))) );
10966 storeLE( mkexpr(addr), mkexpr(newdata) );
10967
10968 delta += 2+1;
10969 DIP("maskmovdqu %s,%s\n", nameXMMReg( eregOfRM(modrm) ),
10970 nameXMMReg( gregOfRM(modrm) ) );
10971 goto decode_success;
10972 }
10973 /* else fall through */
10974 }
10975
10976 /* 66 0F E7 = MOVNTDQ -- for us, just a plain SSE store. */
10977 if (insn[0] == 0x0F && insn[1] == 0xE7) {
10978 modrm = getIByte(delta+2);
10979 if (has_66_pfx && !epartIsReg(modrm)) {
10980 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
10981 gen_SEGV_if_not_16_aligned( addr );
10982 storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) );
10983 DIP("movntdq %s,%s\n", dis_buf,
10984 nameXMMReg(gregOfRM(modrm)));
10985 delta += 2+alen;
10986 goto decode_success;
10987 }
10988 /* else fall through */
10989 }
10990
10991 /* 0F C3 = MOVNTI -- for us, just a plain ireg store. */
10992 if (insn[0] == 0x0F && insn[1] == 0xC3) {
10993 vassert(!has_66_pfx);
10994 modrm = getIByte(delta+2);
10995 if (!epartIsReg(modrm)) {
10996 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
10997 storeLE( mkexpr(addr), getIReg(4, gregOfRM(modrm)) );
10998 DIP("movnti %s,%s\n", dis_buf,
10999 nameIReg(4, gregOfRM(modrm)));
11000 delta += 2+alen;
11001 goto decode_success;
11002 }
11003 /* else fall through */
11004 }
11005
11006 /* 66 0F D6 = MOVQ -- move 64 bits from G (lo half xmm) to E (mem
11007 or lo half xmm). */
11008 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0xD6) {
11009 modrm = getIByte(delta+2);
11010 if (epartIsReg(modrm)) {
11011 /* fall through, awaiting test case */
11012 /* dst: lo half copied, hi half zeroed */
11013 } else {
11014 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
11015 storeLE( mkexpr(addr),
11016 getXMMRegLane64( gregOfRM(modrm), 0 ));
11017 DIP("movq %s,%s\n", nameXMMReg(gregOfRM(modrm)), dis_buf );
11018 delta += 2+alen;
11019 goto decode_success;
11020 }
11021 }
11022
11023 /* F3 0F D6 = MOVQ2DQ -- move from E (mmx) to G (lo half xmm, zero
11024 hi half). */
11025 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0xD6) {
11026 vassert(!has_66_pfx);
11027 modrm = getIByte(delta+3);
11028 if (epartIsReg(modrm)) {
11029 do_MMX_preamble();
11030 putXMMReg( gregOfRM(modrm),
11031 unop(Iop_64UtoV128, getMMXReg( eregOfRM(modrm) )) );
11032 DIP("movq2dq %s,%s\n", nameMMXReg(eregOfRM(modrm)),
11033 nameXMMReg(gregOfRM(modrm)));
11034 delta += 3+1;
11035 goto decode_success;
11036 } else {
11037 /* fall through, apparently no mem case for this insn */
11038 }
11039 }
11040
11041 /* F3 0F 7E = MOVQ -- move 64 bits from E (mem or lo half xmm) to
11042 G (lo half xmm). Upper half of G is zeroed out. */
11043 /* F2 0F 10 = MOVSD -- move 64 bits from E (mem or lo half xmm) to
11044 G (lo half xmm). If E is mem, upper half of G is zeroed out.
11045 If E is reg, upper half of G is unchanged. */
11046 if ((insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x10)
11047 || (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x7E)) {
11048 vassert(!has_66_pfx);
11049 modrm = getIByte(delta+3);
11050 if (epartIsReg(modrm)) {
11051 putXMMRegLane64( gregOfRM(modrm), 0,
11052 getXMMRegLane64( eregOfRM(modrm), 0 ));
11053 if (insn[0] == 0xF3/*MOVQ*/) {
11054 /* zero bits 127:64 */
11055 putXMMRegLane64( gregOfRM(modrm), 1, mkU64(0) );
11056 }
11057 DIP("movsd %s,%s\n", nameXMMReg(eregOfRM(modrm)),
11058 nameXMMReg(gregOfRM(modrm)));
11059 delta += 3+1;
11060 } else {
11061 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
11062 /* zero bits 127:64 */
11063 putXMMRegLane64( gregOfRM(modrm), 1, mkU64(0) );
11064 /* write bits 63:0 */
11065 putXMMRegLane64( gregOfRM(modrm), 0,
11066 loadLE(Ity_I64, mkexpr(addr)) );
11067 DIP("movsd %s,%s\n", dis_buf,
11068 nameXMMReg(gregOfRM(modrm)));
11069 delta += 3+alen;
11070 }
11071 goto decode_success;
11072 }
11073
11074 /* F2 0F 11 = MOVSD -- move 64 bits from G (lo half xmm) to E (mem
11075 or lo half xmm). */
11076 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x11) {
11077 vassert(!has_66_pfx);
11078 modrm = getIByte(delta+3);
11079 if (epartIsReg(modrm)) {
11080 putXMMRegLane64( eregOfRM(modrm), 0,
11081 getXMMRegLane64( gregOfRM(modrm), 0 ));
11082 DIP("movsd %s,%s\n", nameXMMReg(gregOfRM(modrm)),
11083 nameXMMReg(eregOfRM(modrm)));
11084 delta += 3+1;
11085 } else {
11086 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
11087 storeLE( mkexpr(addr),
11088 getXMMRegLane64(gregOfRM(modrm), 0) );
11089 DIP("movsd %s,%s\n", nameXMMReg(gregOfRM(modrm)),
11090 dis_buf);
11091 delta += 3+alen;
11092 }
11093 goto decode_success;
11094 }
11095
11096 /* 66 0F 59 = MULPD -- mul 64Fx2 from R/M to R */
11097 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0x59) {
11098 delta = dis_SSE_E_to_G_all( sorb, delta+2, "mulpd", Iop_Mul64Fx2 );
11099 goto decode_success;
11100 }
11101
11102 /* F2 0F 59 = MULSD -- mul 64F0x2 from R/M to R */
11103 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x59) {
11104 vassert(!has_66_pfx);
11105 delta = dis_SSE_E_to_G_lo64( sorb, delta+3, "mulsd", Iop_Mul64F0x2 );
11106 goto decode_success;
11107 }
11108
11109 /* 66 0F 56 = ORPD -- G = G and E */
11110 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0x56) {
11111 delta = dis_SSE_E_to_G_all( sorb, delta+2, "orpd", Iop_OrV128 );
11112 goto decode_success;
11113 }
11114
11115 /* 66 0F C6 /r ib = SHUFPD -- shuffle packed F64s */
11116 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0xC6) {
11117 Int select;
11118 IRTemp sV = newTemp(Ity_V128);
11119 IRTemp dV = newTemp(Ity_V128);
11120 IRTemp s1 = newTemp(Ity_I64);
11121 IRTemp s0 = newTemp(Ity_I64);
11122 IRTemp d1 = newTemp(Ity_I64);
11123 IRTemp d0 = newTemp(Ity_I64);
11124
11125 modrm = insn[2];
11126 assign( dV, getXMMReg(gregOfRM(modrm)) );
11127
11128 if (epartIsReg(modrm)) {
11129 assign( sV, getXMMReg(eregOfRM(modrm)) );
11130 select = (Int)insn[3];
11131 delta += 2+2;
11132 DIP("shufpd $%d,%s,%s\n", select,
11133 nameXMMReg(eregOfRM(modrm)),
11134 nameXMMReg(gregOfRM(modrm)));
11135 } else {
11136 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
11137 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
11138 select = (Int)insn[2+alen];
11139 delta += 3+alen;
11140 DIP("shufpd $%d,%s,%s\n", select,
11141 dis_buf,
11142 nameXMMReg(gregOfRM(modrm)));
11143 }
11144
11145 assign( d1, unop(Iop_V128HIto64, mkexpr(dV)) );
11146 assign( d0, unop(Iop_V128to64, mkexpr(dV)) );
11147 assign( s1, unop(Iop_V128HIto64, mkexpr(sV)) );
11148 assign( s0, unop(Iop_V128to64, mkexpr(sV)) );
11149
11150 # define SELD(n) mkexpr((n)==0 ? d0 : d1)
11151 # define SELS(n) mkexpr((n)==0 ? s0 : s1)
11152
11153 putXMMReg(
11154 gregOfRM(modrm),
11155 binop(Iop_64HLtoV128, SELS((select>>1)&1), SELD((select>>0)&1) )
11156 );
11157
11158 # undef SELD
11159 # undef SELS
11160
11161 goto decode_success;
11162 }
11163
11164 /* 66 0F 51 = SQRTPD -- approx sqrt 64Fx2 from R/M to R */
11165 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0x51) {
11166 delta = dis_SSE_E_to_G_unary_all( sorb, delta+2,
11167 "sqrtpd", Iop_Sqrt64Fx2 );
11168 goto decode_success;
11169 }
11170
11171 /* F2 0F 51 = SQRTSD -- approx sqrt 64F0x2 from R/M to R */
11172 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x51) {
11173 vassert(!has_66_pfx);
11174 delta = dis_SSE_E_to_G_unary_lo64( sorb, delta+3,
11175 "sqrtsd", Iop_Sqrt64F0x2 );
11176 goto decode_success;
11177 }
11178
11179 /* 66 0F 5C = SUBPD -- sub 64Fx2 from R/M to R */
11180 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0x5C) {
11181 delta = dis_SSE_E_to_G_all( sorb, delta+2, "subpd", Iop_Sub64Fx2 );
11182 goto decode_success;
11183 }
11184
11185 /* F2 0F 5C = SUBSD -- sub 64F0x2 from R/M to R */
11186 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x5C) {
11187 vassert(!has_66_pfx);
11188 delta = dis_SSE_E_to_G_lo64( sorb, delta+3, "subsd", Iop_Sub64F0x2 );
11189 goto decode_success;
11190 }
11191
11192 /* 66 0F 15 = UNPCKHPD -- unpack and interleave high part F64s */
11193 /* 66 0F 14 = UNPCKLPD -- unpack and interleave low part F64s */
11194 /* These just appear to be special cases of SHUFPS */
11195 if (has_66_pfx && insn[0] == 0x0F && (insn[1] == 0x15 || insn[1] == 0x14)) {
11196 IRTemp s1 = newTemp(Ity_I64);
11197 IRTemp s0 = newTemp(Ity_I64);
11198 IRTemp d1 = newTemp(Ity_I64);
11199 IRTemp d0 = newTemp(Ity_I64);
11200 IRTemp sV = newTemp(Ity_V128);
11201 IRTemp dV = newTemp(Ity_V128);
11202 Bool hi = toBool(insn[1] == 0x15);
11203
11204 modrm = insn[2];
11205 assign( dV, getXMMReg(gregOfRM(modrm)) );
11206
11207 if (epartIsReg(modrm)) {
11208 assign( sV, getXMMReg(eregOfRM(modrm)) );
11209 delta += 2+1;
11210 DIP("unpck%sps %s,%s\n", hi ? "h" : "l",
11211 nameXMMReg(eregOfRM(modrm)),
11212 nameXMMReg(gregOfRM(modrm)));
11213 } else {
11214 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
11215 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
11216 delta += 2+alen;
11217 DIP("unpck%sps %s,%s\n", hi ? "h" : "l",
11218 dis_buf,
11219 nameXMMReg(gregOfRM(modrm)));
11220 }
11221
11222 assign( d1, unop(Iop_V128HIto64, mkexpr(dV)) );
11223 assign( d0, unop(Iop_V128to64, mkexpr(dV)) );
11224 assign( s1, unop(Iop_V128HIto64, mkexpr(sV)) );
11225 assign( s0, unop(Iop_V128to64, mkexpr(sV)) );
11226
11227 if (hi) {
11228 putXMMReg( gregOfRM(modrm),
11229 binop(Iop_64HLtoV128, mkexpr(s1), mkexpr(d1)) );
11230 } else {
11231 putXMMReg( gregOfRM(modrm),
11232 binop(Iop_64HLtoV128, mkexpr(s0), mkexpr(d0)) );
11233 }
11234
11235 goto decode_success;
11236 }
11237
11238 /* 66 0F 57 = XORPD -- G = G and E */
11239 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0x57) {
11240 delta = dis_SSE_E_to_G_all( sorb, delta+2, "xorpd", Iop_XorV128 );
11241 goto decode_success;
11242 }
11243
11244 /* 66 0F 6B = PACKSSDW */
11245 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0x6B) {
11246 delta = dis_SSEint_E_to_G( sorb, delta+2,
11247 "packssdw",
11248 Iop_QNarrowBin32Sto16Sx8, True );
11249 goto decode_success;
11250 }
11251
11252 /* 66 0F 63 = PACKSSWB */
11253 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0x63) {
11254 delta = dis_SSEint_E_to_G( sorb, delta+2,
11255 "packsswb",
11256 Iop_QNarrowBin16Sto8Sx16, True );
11257 goto decode_success;
11258 }
11259
11260 /* 66 0F 67 = PACKUSWB */
11261 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0x67) {
11262 delta = dis_SSEint_E_to_G( sorb, delta+2,
11263 "packuswb",
11264 Iop_QNarrowBin16Sto8Ux16, True );
11265 goto decode_success;
11266 }
11267
11268 /* 66 0F FC = PADDB */
11269 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0xFC) {
11270 delta = dis_SSEint_E_to_G( sorb, delta+2,
11271 "paddb", Iop_Add8x16, False );
11272 goto decode_success;
11273 }
11274
11275 /* 66 0F FE = PADDD */
11276 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0xFE) {
11277 delta = dis_SSEint_E_to_G( sorb, delta+2,
11278 "paddd", Iop_Add32x4, False );
11279 goto decode_success;
11280 }
11281
11282 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */
11283 /* 0F D4 = PADDQ -- add 64x1 */
11284 if (!has_66_pfx && insn[0] == 0x0F && insn[1] == 0xD4) {
11285 do_MMX_preamble();
11286 delta = dis_MMXop_regmem_to_reg (
11287 sorb, delta+2, insn[1], "paddq", False );
11288 goto decode_success;
11289 }
11290
11291 /* 66 0F D4 = PADDQ */
11292 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0xD4) {
11293 delta = dis_SSEint_E_to_G( sorb, delta+2,
11294 "paddq", Iop_Add64x2, False );
11295 goto decode_success;
11296 }
11297
11298 /* 66 0F FD = PADDW */
11299 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0xFD) {
11300 delta = dis_SSEint_E_to_G( sorb, delta+2,
11301 "paddw", Iop_Add16x8, False );
11302 goto decode_success;
11303 }
11304
11305 /* 66 0F EC = PADDSB */
11306 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0xEC) {
11307 delta = dis_SSEint_E_to_G( sorb, delta+2,
11308 "paddsb", Iop_QAdd8Sx16, False );
11309 goto decode_success;
11310 }
11311
11312 /* 66 0F ED = PADDSW */
11313 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0xED) {
11314 delta = dis_SSEint_E_to_G( sorb, delta+2,
11315 "paddsw", Iop_QAdd16Sx8, False );
11316 goto decode_success;
11317 }
11318
11319 /* 66 0F DC = PADDUSB */
11320 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0xDC) {
11321 delta = dis_SSEint_E_to_G( sorb, delta+2,
11322 "paddusb", Iop_QAdd8Ux16, False );
11323 goto decode_success;
11324 }
11325
11326 /* 66 0F DD = PADDUSW */
11327 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0xDD) {
11328 delta = dis_SSEint_E_to_G( sorb, delta+2,
11329 "paddusw", Iop_QAdd16Ux8, False );
11330 goto decode_success;
11331 }
11332
11333 /* 66 0F DB = PAND */
11334 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0xDB) {
11335 delta = dis_SSE_E_to_G_all( sorb, delta+2, "pand", Iop_AndV128 );
11336 goto decode_success;
11337 }
11338
11339 /* 66 0F DF = PANDN */
11340 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0xDF) {
11341 delta = dis_SSE_E_to_G_all_invG( sorb, delta+2, "pandn", Iop_AndV128 );
11342 goto decode_success;
11343 }
11344
11345 /* 66 0F E0 = PAVGB */
11346 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0xE0) {
11347 delta = dis_SSEint_E_to_G( sorb, delta+2,
11348 "pavgb", Iop_Avg8Ux16, False );
11349 goto decode_success;
11350 }
11351
11352 /* 66 0F E3 = PAVGW */
11353 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0xE3) {
11354 delta = dis_SSEint_E_to_G( sorb, delta+2,
11355 "pavgw", Iop_Avg16Ux8, False );
11356 goto decode_success;
11357 }
11358
11359 /* 66 0F 74 = PCMPEQB */
11360 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0x74) {
11361 delta = dis_SSEint_E_to_G( sorb, delta+2,
11362 "pcmpeqb", Iop_CmpEQ8x16, False );
11363 goto decode_success;
11364 }
11365
11366 /* 66 0F 76 = PCMPEQD */
11367 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0x76) {
11368 delta = dis_SSEint_E_to_G( sorb, delta+2,
11369 "pcmpeqd", Iop_CmpEQ32x4, False );
11370 goto decode_success;
11371 }
11372
11373 /* 66 0F 75 = PCMPEQW */
11374 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0x75) {
11375 delta = dis_SSEint_E_to_G( sorb, delta+2,
11376 "pcmpeqw", Iop_CmpEQ16x8, False );
11377 goto decode_success;
11378 }
11379
11380 /* 66 0F 64 = PCMPGTB */
11381 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0x64) {
11382 delta = dis_SSEint_E_to_G( sorb, delta+2,
11383 "pcmpgtb", Iop_CmpGT8Sx16, False );
11384 goto decode_success;
11385 }
11386
11387 /* 66 0F 66 = PCMPGTD */
11388 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0x66) {
11389 delta = dis_SSEint_E_to_G( sorb, delta+2,
11390 "pcmpgtd", Iop_CmpGT32Sx4, False );
11391 goto decode_success;
11392 }
11393
11394 /* 66 0F 65 = PCMPGTW */
11395 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0x65) {
11396 delta = dis_SSEint_E_to_G( sorb, delta+2,
11397 "pcmpgtw", Iop_CmpGT16Sx8, False );
11398 goto decode_success;
11399 }
11400
11401 /* 66 0F C5 = PEXTRW -- extract 16-bit field from xmm(E) and put
11402 zero-extend of it in ireg(G). */
11403 if (insn[0] == 0x0F && insn[1] == 0xC5) {
11404 modrm = insn[2];
11405 if (has_66_pfx && epartIsReg(modrm)) {
11406 t5 = newTemp(Ity_V128);
11407 t4 = newTemp(Ity_I16);
11408 assign(t5, getXMMReg(eregOfRM(modrm)));
11409 breakup128to32s( t5, &t3, &t2, &t1, &t0 );
11410 switch (insn[3] & 7) {
11411 case 0: assign(t4, unop(Iop_32to16, mkexpr(t0))); break;
11412 case 1: assign(t4, unop(Iop_32HIto16, mkexpr(t0))); break;
11413 case 2: assign(t4, unop(Iop_32to16, mkexpr(t1))); break;
11414 case 3: assign(t4, unop(Iop_32HIto16, mkexpr(t1))); break;
11415 case 4: assign(t4, unop(Iop_32to16, mkexpr(t2))); break;
11416 case 5: assign(t4, unop(Iop_32HIto16, mkexpr(t2))); break;
11417 case 6: assign(t4, unop(Iop_32to16, mkexpr(t3))); break;
11418 case 7: assign(t4, unop(Iop_32HIto16, mkexpr(t3))); break;
11419 default: vassert(0); /*NOTREACHED*/
11420 }
11421 putIReg(4, gregOfRM(modrm), unop(Iop_16Uto32, mkexpr(t4)));
11422 DIP("pextrw $%d,%s,%s\n",
11423 (Int)insn[3], nameXMMReg(eregOfRM(modrm)),
11424 nameIReg(4,gregOfRM(modrm)));
11425 delta += 4;
11426 goto decode_success;
11427 }
11428 /* else fall through */
11429 }
11430
11431 /* 66 0F C4 = PINSRW -- get 16 bits from E(mem or low half ireg) and
11432 put it into the specified lane of xmm(G). */
11433 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0xC4) {
11434 Int lane;
11435 t4 = newTemp(Ity_I16);
11436 modrm = insn[2];
11437
11438 if (epartIsReg(modrm)) {
11439 assign(t4, getIReg(2, eregOfRM(modrm)));
11440 delta += 3+1;
11441 lane = insn[3+1-1];
11442 DIP("pinsrw $%d,%s,%s\n", lane,
11443 nameIReg(2,eregOfRM(modrm)),
11444 nameXMMReg(gregOfRM(modrm)));
11445 } else {
11446 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
11447 delta += 3+alen;
11448 lane = insn[3+alen-1];
11449 assign(t4, loadLE(Ity_I16, mkexpr(addr)));
11450 DIP("pinsrw $%d,%s,%s\n", lane,
11451 dis_buf,
11452 nameXMMReg(gregOfRM(modrm)));
11453 }
11454
11455 putXMMRegLane16( gregOfRM(modrm), lane & 7, mkexpr(t4) );
11456 goto decode_success;
11457 }
11458
11459 /* 66 0F F5 = PMADDWD -- Multiply and add packed integers from
11460 E(xmm or mem) to G(xmm) */
11461 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0xF5) {
11462 IRTemp s1V = newTemp(Ity_V128);
11463 IRTemp s2V = newTemp(Ity_V128);
11464 IRTemp dV = newTemp(Ity_V128);
11465 IRTemp s1Hi = newTemp(Ity_I64);
11466 IRTemp s1Lo = newTemp(Ity_I64);
11467 IRTemp s2Hi = newTemp(Ity_I64);
11468 IRTemp s2Lo = newTemp(Ity_I64);
11469 IRTemp dHi = newTemp(Ity_I64);
11470 IRTemp dLo = newTemp(Ity_I64);
11471 modrm = insn[2];
11472 if (epartIsReg(modrm)) {
11473 assign( s1V, getXMMReg(eregOfRM(modrm)) );
11474 delta += 2+1;
11475 DIP("pmaddwd %s,%s\n", nameXMMReg(eregOfRM(modrm)),
11476 nameXMMReg(gregOfRM(modrm)));
11477 } else {
11478 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
11479 assign( s1V, loadLE(Ity_V128, mkexpr(addr)) );
11480 delta += 2+alen;
11481 DIP("pmaddwd %s,%s\n", dis_buf,
11482 nameXMMReg(gregOfRM(modrm)));
11483 }
11484 assign( s2V, getXMMReg(gregOfRM(modrm)) );
11485 assign( s1Hi, unop(Iop_V128HIto64, mkexpr(s1V)) );
11486 assign( s1Lo, unop(Iop_V128to64, mkexpr(s1V)) );
11487 assign( s2Hi, unop(Iop_V128HIto64, mkexpr(s2V)) );
11488 assign( s2Lo, unop(Iop_V128to64, mkexpr(s2V)) );
11489 assign( dHi, mkIRExprCCall(
11490 Ity_I64, 0/*regparms*/,
11491 "x86g_calculate_mmx_pmaddwd",
11492 &x86g_calculate_mmx_pmaddwd,
11493 mkIRExprVec_2( mkexpr(s1Hi), mkexpr(s2Hi))
11494 ));
11495 assign( dLo, mkIRExprCCall(
11496 Ity_I64, 0/*regparms*/,
11497 "x86g_calculate_mmx_pmaddwd",
11498 &x86g_calculate_mmx_pmaddwd,
11499 mkIRExprVec_2( mkexpr(s1Lo), mkexpr(s2Lo))
11500 ));
11501 assign( dV, binop(Iop_64HLtoV128, mkexpr(dHi), mkexpr(dLo))) ;
11502 putXMMReg(gregOfRM(modrm), mkexpr(dV));
11503 goto decode_success;
11504 }
11505
11506 /* 66 0F EE = PMAXSW -- 16x8 signed max */
11507 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0xEE) {
11508 delta = dis_SSEint_E_to_G( sorb, delta+2,
11509 "pmaxsw", Iop_Max16Sx8, False );
11510 goto decode_success;
11511 }
11512
11513 /* 66 0F DE = PMAXUB -- 8x16 unsigned max */
11514 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0xDE) {
11515 delta = dis_SSEint_E_to_G( sorb, delta+2,
11516 "pmaxub", Iop_Max8Ux16, False );
11517 goto decode_success;
11518 }
11519
11520 /* 66 0F EA = PMINSW -- 16x8 signed min */
11521 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0xEA) {
11522 delta = dis_SSEint_E_to_G( sorb, delta+2,
11523 "pminsw", Iop_Min16Sx8, False );
11524 goto decode_success;
11525 }
11526
11527 /* 66 0F DA = PMINUB -- 8x16 unsigned min */
11528 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0xDA) {
11529 delta = dis_SSEint_E_to_G( sorb, delta+2,
11530 "pminub", Iop_Min8Ux16, False );
11531 goto decode_success;
11532 }
11533
11534 /* 66 0F D7 = PMOVMSKB -- extract sign bits from each of 16 lanes
11535 in xmm(E), turn them into a byte, and put zero-extend of it in
11536 ireg(G). */
11537 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0xD7) {
11538 modrm = insn[2];
11539 if (epartIsReg(modrm)) {
11540 t0 = newTemp(Ity_I64);
11541 t1 = newTemp(Ity_I64);
11542 assign(t0, getXMMRegLane64(eregOfRM(modrm), 0));
11543 assign(t1, getXMMRegLane64(eregOfRM(modrm), 1));
11544 t5 = newTemp(Ity_I32);
11545 assign(t5,
11546 unop(Iop_16Uto32,
11547 binop(Iop_8HLto16,
11548 unop(Iop_GetMSBs8x8, mkexpr(t1)),
11549 unop(Iop_GetMSBs8x8, mkexpr(t0)))));
11550 putIReg(4, gregOfRM(modrm), mkexpr(t5));
11551 DIP("pmovmskb %s,%s\n", nameXMMReg(eregOfRM(modrm)),
11552 nameIReg(4,gregOfRM(modrm)));
11553 delta += 3;
11554 goto decode_success;
11555 }
11556 /* else fall through */
11557 }
11558
11559 /* 66 0F E4 = PMULHUW -- 16x8 hi-half of unsigned widening multiply */
11560 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0xE4) {
11561 delta = dis_SSEint_E_to_G( sorb, delta+2,
11562 "pmulhuw", Iop_MulHi16Ux8, False );
11563 goto decode_success;
11564 }
11565
11566 /* 66 0F E5 = PMULHW -- 16x8 hi-half of signed widening multiply */
11567 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0xE5) {
11568 delta = dis_SSEint_E_to_G( sorb, delta+2,
11569 "pmulhw", Iop_MulHi16Sx8, False );
11570 goto decode_success;
11571 }
11572
11573 /* 66 0F D5 = PMULHL -- 16x8 multiply */
11574 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0xD5) {
11575 delta = dis_SSEint_E_to_G( sorb, delta+2,
11576 "pmullw", Iop_Mul16x8, False );
11577 goto decode_success;
11578 }
11579
11580 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */
11581 /* 0F F4 = PMULUDQ -- unsigned widening multiply of 32-lanes 0 x
11582 0 to form 64-bit result */
11583 if (!has_66_pfx && insn[0] == 0x0F && insn[1] == 0xF4) {
11584 IRTemp sV = newTemp(Ity_I64);
11585 IRTemp dV = newTemp(Ity_I64);
11586 t1 = newTemp(Ity_I32);
11587 t0 = newTemp(Ity_I32);
11588 modrm = insn[2];
11589
11590 do_MMX_preamble();
11591 assign( dV, getMMXReg(gregOfRM(modrm)) );
11592
11593 if (epartIsReg(modrm)) {
11594 assign( sV, getMMXReg(eregOfRM(modrm)) );
11595 delta += 2+1;
11596 DIP("pmuludq %s,%s\n", nameMMXReg(eregOfRM(modrm)),
11597 nameMMXReg(gregOfRM(modrm)));
11598 } else {
11599 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
11600 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
11601 delta += 2+alen;
11602 DIP("pmuludq %s,%s\n", dis_buf,
11603 nameMMXReg(gregOfRM(modrm)));
11604 }
11605
11606 assign( t0, unop(Iop_64to32, mkexpr(dV)) );
11607 assign( t1, unop(Iop_64to32, mkexpr(sV)) );
11608 putMMXReg( gregOfRM(modrm),
11609 binop( Iop_MullU32, mkexpr(t0), mkexpr(t1) ) );
11610 goto decode_success;
11611 }
11612
11613 /* 66 0F F4 = PMULUDQ -- unsigned widening multiply of 32-lanes 0 x
11614 0 to form lower 64-bit half and lanes 2 x 2 to form upper 64-bit
11615 half */
11616 /* This is a really poor translation -- could be improved if
11617 performance critical */
11618 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0xF4) {
11619 IRTemp sV, dV;
11620 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
11621 sV = newTemp(Ity_V128);
11622 dV = newTemp(Ity_V128);
11623 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
11624 t1 = newTemp(Ity_I64);
11625 t0 = newTemp(Ity_I64);
11626 modrm = insn[2];
11627 assign( dV, getXMMReg(gregOfRM(modrm)) );
11628
11629 if (epartIsReg(modrm)) {
11630 assign( sV, getXMMReg(eregOfRM(modrm)) );
11631 delta += 2+1;
11632 DIP("pmuludq %s,%s\n", nameXMMReg(eregOfRM(modrm)),
11633 nameXMMReg(gregOfRM(modrm)));
11634 } else {
11635 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
11636 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
11637 delta += 2+alen;
11638 DIP("pmuludq %s,%s\n", dis_buf,
11639 nameXMMReg(gregOfRM(modrm)));
11640 }
11641
11642 breakup128to32s( dV, &d3, &d2, &d1, &d0 );
11643 breakup128to32s( sV, &s3, &s2, &s1, &s0 );
11644
11645 assign( t0, binop( Iop_MullU32, mkexpr(d0), mkexpr(s0)) );
11646 putXMMRegLane64( gregOfRM(modrm), 0, mkexpr(t0) );
11647 assign( t1, binop( Iop_MullU32, mkexpr(d2), mkexpr(s2)) );
11648 putXMMRegLane64( gregOfRM(modrm), 1, mkexpr(t1) );
11649 goto decode_success;
11650 }
11651
11652 /* 66 0F EB = POR */
11653 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0xEB) {
11654 delta = dis_SSE_E_to_G_all( sorb, delta+2, "por", Iop_OrV128 );
11655 goto decode_success;
11656 }
11657
11658 /* 66 0F F6 = PSADBW -- 2 x (8x8 -> 48 zeroes ++ u16) Sum Abs Diffs
11659 from E(xmm or mem) to G(xmm) */
11660 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0xF6) {
11661 IRTemp s1V = newTemp(Ity_V128);
11662 IRTemp s2V = newTemp(Ity_V128);
11663 IRTemp dV = newTemp(Ity_V128);
11664 IRTemp s1Hi = newTemp(Ity_I64);
11665 IRTemp s1Lo = newTemp(Ity_I64);
11666 IRTemp s2Hi = newTemp(Ity_I64);
11667 IRTemp s2Lo = newTemp(Ity_I64);
11668 IRTemp dHi = newTemp(Ity_I64);
11669 IRTemp dLo = newTemp(Ity_I64);
11670 modrm = insn[2];
11671 if (epartIsReg(modrm)) {
11672 assign( s1V, getXMMReg(eregOfRM(modrm)) );
11673 delta += 2+1;
11674 DIP("psadbw %s,%s\n", nameXMMReg(eregOfRM(modrm)),
11675 nameXMMReg(gregOfRM(modrm)));
11676 } else {
11677 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
11678 assign( s1V, loadLE(Ity_V128, mkexpr(addr)) );
11679 delta += 2+alen;
11680 DIP("psadbw %s,%s\n", dis_buf,
11681 nameXMMReg(gregOfRM(modrm)));
11682 }
11683 assign( s2V, getXMMReg(gregOfRM(modrm)) );
11684 assign( s1Hi, unop(Iop_V128HIto64, mkexpr(s1V)) );
11685 assign( s1Lo, unop(Iop_V128to64, mkexpr(s1V)) );
11686 assign( s2Hi, unop(Iop_V128HIto64, mkexpr(s2V)) );
11687 assign( s2Lo, unop(Iop_V128to64, mkexpr(s2V)) );
11688 assign( dHi, mkIRExprCCall(
11689 Ity_I64, 0/*regparms*/,
11690 "x86g_calculate_mmx_psadbw",
11691 &x86g_calculate_mmx_psadbw,
11692 mkIRExprVec_2( mkexpr(s1Hi), mkexpr(s2Hi))
11693 ));
11694 assign( dLo, mkIRExprCCall(
11695 Ity_I64, 0/*regparms*/,
11696 "x86g_calculate_mmx_psadbw",
11697 &x86g_calculate_mmx_psadbw,
11698 mkIRExprVec_2( mkexpr(s1Lo), mkexpr(s2Lo))
11699 ));
11700 assign( dV, binop(Iop_64HLtoV128, mkexpr(dHi), mkexpr(dLo))) ;
11701 putXMMReg(gregOfRM(modrm), mkexpr(dV));
11702 goto decode_success;
11703 }
11704
11705 /* 66 0F 70 = PSHUFD -- rearrange 4x32 from E(xmm or mem) to G(xmm) */
11706 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0x70) {
11707 Int order;
11708 IRTemp sV, dV, s3, s2, s1, s0;
11709 s3 = s2 = s1 = s0 = IRTemp_INVALID;
11710 sV = newTemp(Ity_V128);
11711 dV = newTemp(Ity_V128);
11712 modrm = insn[2];
11713 if (epartIsReg(modrm)) {
11714 assign( sV, getXMMReg(eregOfRM(modrm)) );
11715 order = (Int)insn[3];
11716 delta += 2+2;
11717 DIP("pshufd $%d,%s,%s\n", order,
11718 nameXMMReg(eregOfRM(modrm)),
11719 nameXMMReg(gregOfRM(modrm)));
11720 } else {
11721 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
11722 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
11723 order = (Int)insn[2+alen];
11724 delta += 3+alen;
11725 DIP("pshufd $%d,%s,%s\n", order,
11726 dis_buf,
11727 nameXMMReg(gregOfRM(modrm)));
11728 }
11729 breakup128to32s( sV, &s3, &s2, &s1, &s0 );
11730
11731 # define SEL(n) \
11732 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
11733 assign(dV,
11734 mk128from32s( SEL((order>>6)&3), SEL((order>>4)&3),
11735 SEL((order>>2)&3), SEL((order>>0)&3) )
11736 );
11737 putXMMReg(gregOfRM(modrm), mkexpr(dV));
11738 # undef SEL
11739 goto decode_success;
11740 }
11741
11742 /* F3 0F 70 = PSHUFHW -- rearrange upper half 4x16 from E(xmm or
11743 mem) to G(xmm), and copy lower half */
11744 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x70) {
11745 Int order;
11746 IRTemp sVhi, dVhi, sV, dV, s3, s2, s1, s0;
11747 s3 = s2 = s1 = s0 = IRTemp_INVALID;
11748 sV = newTemp(Ity_V128);
11749 dV = newTemp(Ity_V128);
11750 sVhi = newTemp(Ity_I64);
11751 dVhi = newTemp(Ity_I64);
11752 modrm = insn[3];
11753 if (epartIsReg(modrm)) {
11754 assign( sV, getXMMReg(eregOfRM(modrm)) );
11755 order = (Int)insn[4];
11756 delta += 4+1;
11757 DIP("pshufhw $%d,%s,%s\n", order,
11758 nameXMMReg(eregOfRM(modrm)),
11759 nameXMMReg(gregOfRM(modrm)));
11760 } else {
11761 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
11762 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
11763 order = (Int)insn[3+alen];
11764 delta += 4+alen;
11765 DIP("pshufhw $%d,%s,%s\n", order,
11766 dis_buf,
11767 nameXMMReg(gregOfRM(modrm)));
11768 }
11769 assign( sVhi, unop(Iop_V128HIto64, mkexpr(sV)) );
11770 breakup64to16s( sVhi, &s3, &s2, &s1, &s0 );
11771
11772 # define SEL(n) \
11773 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
11774 assign(dVhi,
11775 mk64from16s( SEL((order>>6)&3), SEL((order>>4)&3),
11776 SEL((order>>2)&3), SEL((order>>0)&3) )
11777 );
11778 assign(dV, binop( Iop_64HLtoV128,
11779 mkexpr(dVhi),
11780 unop(Iop_V128to64, mkexpr(sV))) );
11781 putXMMReg(gregOfRM(modrm), mkexpr(dV));
11782 # undef SEL
11783 goto decode_success;
11784 }
11785
11786 /* F2 0F 70 = PSHUFLW -- rearrange lower half 4x16 from E(xmm or
11787 mem) to G(xmm), and copy upper half */
11788 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x70) {
11789 Int order;
11790 IRTemp sVlo, dVlo, sV, dV, s3, s2, s1, s0;
11791 s3 = s2 = s1 = s0 = IRTemp_INVALID;
11792 sV = newTemp(Ity_V128);
11793 dV = newTemp(Ity_V128);
11794 sVlo = newTemp(Ity_I64);
11795 dVlo = newTemp(Ity_I64);
11796 modrm = insn[3];
11797 if (epartIsReg(modrm)) {
11798 assign( sV, getXMMReg(eregOfRM(modrm)) );
11799 order = (Int)insn[4];
11800 delta += 4+1;
11801 DIP("pshuflw $%d,%s,%s\n", order,
11802 nameXMMReg(eregOfRM(modrm)),
11803 nameXMMReg(gregOfRM(modrm)));
11804 } else {
11805 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
11806 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
11807 order = (Int)insn[3+alen];
11808 delta += 4+alen;
11809 DIP("pshuflw $%d,%s,%s\n", order,
11810 dis_buf,
11811 nameXMMReg(gregOfRM(modrm)));
11812 }
11813 assign( sVlo, unop(Iop_V128to64, mkexpr(sV)) );
11814 breakup64to16s( sVlo, &s3, &s2, &s1, &s0 );
11815
11816 # define SEL(n) \
11817 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
11818 assign(dVlo,
11819 mk64from16s( SEL((order>>6)&3), SEL((order>>4)&3),
11820 SEL((order>>2)&3), SEL((order>>0)&3) )
11821 );
11822 assign(dV, binop( Iop_64HLtoV128,
11823 unop(Iop_V128HIto64, mkexpr(sV)),
11824 mkexpr(dVlo) ) );
11825 putXMMReg(gregOfRM(modrm), mkexpr(dV));
11826 # undef SEL
11827 goto decode_success;
11828 }
11829
11830 /* 66 0F 72 /6 ib = PSLLD by immediate */
11831 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0x72
11832 && epartIsReg(insn[2])
11833 && gregOfRM(insn[2]) == 6) {
11834 delta = dis_SSE_shiftE_imm( delta+2, "pslld", Iop_ShlN32x4 );
11835 goto decode_success;
11836 }
11837
11838 /* 66 0F F2 = PSLLD by E */
11839 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0xF2) {
11840 delta = dis_SSE_shiftG_byE( sorb, delta+2, "pslld", Iop_ShlN32x4 );
11841 goto decode_success;
11842 }
11843
11844 /* 66 0F 73 /7 ib = PSLLDQ by immediate */
11845 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0x73
11846 && epartIsReg(insn[2])
11847 && gregOfRM(insn[2]) == 7) {
11848 IRTemp sV, dV, hi64, lo64, hi64r, lo64r;
11849 Int imm = (Int)insn[3];
11850 Int reg = eregOfRM(insn[2]);
11851 DIP("pslldq $%d,%s\n", imm, nameXMMReg(reg));
11852 vassert(imm >= 0 && imm <= 255);
11853 delta += 4;
11854
11855 sV = newTemp(Ity_V128);
11856 dV = newTemp(Ity_V128);
11857 hi64 = newTemp(Ity_I64);
11858 lo64 = newTemp(Ity_I64);
11859 hi64r = newTemp(Ity_I64);
11860 lo64r = newTemp(Ity_I64);
11861
11862 if (imm >= 16) {
11863 putXMMReg(reg, mkV128(0x0000));
11864 goto decode_success;
11865 }
11866
11867 assign( sV, getXMMReg(reg) );
11868 assign( hi64, unop(Iop_V128HIto64, mkexpr(sV)) );
11869 assign( lo64, unop(Iop_V128to64, mkexpr(sV)) );
11870
11871 if (imm == 0) {
11872 assign( lo64r, mkexpr(lo64) );
11873 assign( hi64r, mkexpr(hi64) );
11874 }
11875 else
11876 if (imm == 8) {
11877 assign( lo64r, mkU64(0) );
11878 assign( hi64r, mkexpr(lo64) );
11879 }
11880 else
11881 if (imm > 8) {
11882 assign( lo64r, mkU64(0) );
11883 assign( hi64r, binop( Iop_Shl64,
11884 mkexpr(lo64),
11885 mkU8( 8*(imm-8) ) ));
11886 } else {
11887 assign( lo64r, binop( Iop_Shl64,
11888 mkexpr(lo64),
11889 mkU8(8 * imm) ));
11890 assign( hi64r,
11891 binop( Iop_Or64,
11892 binop(Iop_Shl64, mkexpr(hi64),
11893 mkU8(8 * imm)),
11894 binop(Iop_Shr64, mkexpr(lo64),
11895 mkU8(8 * (8 - imm)) )
11896 )
11897 );
11898 }
11899 assign( dV, binop(Iop_64HLtoV128, mkexpr(hi64r), mkexpr(lo64r)) );
11900 putXMMReg(reg, mkexpr(dV));
11901 goto decode_success;
11902 }
11903
11904 /* 66 0F 73 /6 ib = PSLLQ by immediate */
11905 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0x73
11906 && epartIsReg(insn[2])
11907 && gregOfRM(insn[2]) == 6) {
11908 delta = dis_SSE_shiftE_imm( delta+2, "psllq", Iop_ShlN64x2 );
11909 goto decode_success;
11910 }
11911
11912 /* 66 0F F3 = PSLLQ by E */
11913 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0xF3) {
11914 delta = dis_SSE_shiftG_byE( sorb, delta+2, "psllq", Iop_ShlN64x2 );
11915 goto decode_success;
11916 }
11917
11918 /* 66 0F 71 /6 ib = PSLLW by immediate */
11919 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0x71
11920 && epartIsReg(insn[2])
11921 && gregOfRM(insn[2]) == 6) {
11922 delta = dis_SSE_shiftE_imm( delta+2, "psllw", Iop_ShlN16x8 );
11923 goto decode_success;
11924 }
11925
11926 /* 66 0F F1 = PSLLW by E */
11927 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0xF1) {
11928 delta = dis_SSE_shiftG_byE( sorb, delta+2, "psllw", Iop_ShlN16x8 );
11929 goto decode_success;
11930 }
11931
11932 /* 66 0F 72 /4 ib = PSRAD by immediate */
11933 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0x72
11934 && epartIsReg(insn[2])
11935 && gregOfRM(insn[2]) == 4) {
11936 delta = dis_SSE_shiftE_imm( delta+2, "psrad", Iop_SarN32x4 );
11937 goto decode_success;
11938 }
11939
11940 /* 66 0F E2 = PSRAD by E */
11941 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0xE2) {
11942 delta = dis_SSE_shiftG_byE( sorb, delta+2, "psrad", Iop_SarN32x4 );
11943 goto decode_success;
11944 }
11945
11946 /* 66 0F 71 /4 ib = PSRAW by immediate */
11947 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0x71
11948 && epartIsReg(insn[2])
11949 && gregOfRM(insn[2]) == 4) {
11950 delta = dis_SSE_shiftE_imm( delta+2, "psraw", Iop_SarN16x8 );
11951 goto decode_success;
11952 }
11953
11954 /* 66 0F E1 = PSRAW by E */
11955 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0xE1) {
11956 delta = dis_SSE_shiftG_byE( sorb, delta+2, "psraw", Iop_SarN16x8 );
11957 goto decode_success;
11958 }
11959
11960 /* 66 0F 72 /2 ib = PSRLD by immediate */
11961 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0x72
11962 && epartIsReg(insn[2])
11963 && gregOfRM(insn[2]) == 2) {
11964 delta = dis_SSE_shiftE_imm( delta+2, "psrld", Iop_ShrN32x4 );
11965 goto decode_success;
11966 }
11967
11968 /* 66 0F D2 = PSRLD by E */
11969 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0xD2) {
11970 delta = dis_SSE_shiftG_byE( sorb, delta+2, "psrld", Iop_ShrN32x4 );
11971 goto decode_success;
11972 }
11973
11974 /* 66 0F 73 /3 ib = PSRLDQ by immediate */
11975 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0x73
11976 && epartIsReg(insn[2])
11977 && gregOfRM(insn[2]) == 3) {
11978 IRTemp sV, dV, hi64, lo64, hi64r, lo64r;
11979 Int imm = (Int)insn[3];
11980 Int reg = eregOfRM(insn[2]);
11981 DIP("psrldq $%d,%s\n", imm, nameXMMReg(reg));
11982 vassert(imm >= 0 && imm <= 255);
11983 delta += 4;
11984
11985 sV = newTemp(Ity_V128);
11986 dV = newTemp(Ity_V128);
11987 hi64 = newTemp(Ity_I64);
11988 lo64 = newTemp(Ity_I64);
11989 hi64r = newTemp(Ity_I64);
11990 lo64r = newTemp(Ity_I64);
11991
11992 if (imm >= 16) {
11993 putXMMReg(reg, mkV128(0x0000));
11994 goto decode_success;
11995 }
11996
11997 assign( sV, getXMMReg(reg) );
11998 assign( hi64, unop(Iop_V128HIto64, mkexpr(sV)) );
11999 assign( lo64, unop(Iop_V128to64, mkexpr(sV)) );
12000
12001 if (imm == 0) {
12002 assign( lo64r, mkexpr(lo64) );
12003 assign( hi64r, mkexpr(hi64) );
12004 }
12005 else
12006 if (imm == 8) {
12007 assign( hi64r, mkU64(0) );
12008 assign( lo64r, mkexpr(hi64) );
12009 }
12010 else
12011 if (imm > 8) {
12012 assign( hi64r, mkU64(0) );
12013 assign( lo64r, binop( Iop_Shr64,
12014 mkexpr(hi64),
12015 mkU8( 8*(imm-8) ) ));
12016 } else {
12017 assign( hi64r, binop( Iop_Shr64,
12018 mkexpr(hi64),
12019 mkU8(8 * imm) ));
12020 assign( lo64r,
12021 binop( Iop_Or64,
12022 binop(Iop_Shr64, mkexpr(lo64),
12023 mkU8(8 * imm)),
12024 binop(Iop_Shl64, mkexpr(hi64),
12025 mkU8(8 * (8 - imm)) )
12026 )
12027 );
12028 }
12029
12030 assign( dV, binop(Iop_64HLtoV128, mkexpr(hi64r), mkexpr(lo64r)) );
12031 putXMMReg(reg, mkexpr(dV));
12032 goto decode_success;
12033 }
12034
12035 /* 66 0F 73 /2 ib = PSRLQ by immediate */
12036 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0x73
12037 && epartIsReg(insn[2])
12038 && gregOfRM(insn[2]) == 2) {
12039 delta = dis_SSE_shiftE_imm( delta+2, "psrlq", Iop_ShrN64x2 );
12040 goto decode_success;
12041 }
12042
12043 /* 66 0F D3 = PSRLQ by E */
12044 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0xD3) {
12045 delta = dis_SSE_shiftG_byE( sorb, delta+2, "psrlq", Iop_ShrN64x2 );
12046 goto decode_success;
12047 }
12048
12049 /* 66 0F 71 /2 ib = PSRLW by immediate */
12050 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0x71
12051 && epartIsReg(insn[2])
12052 && gregOfRM(insn[2]) == 2) {
12053 delta = dis_SSE_shiftE_imm( delta+2, "psrlw", Iop_ShrN16x8 );
12054 goto decode_success;
12055 }
12056
12057 /* 66 0F D1 = PSRLW by E */
12058 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0xD1) {
12059 delta = dis_SSE_shiftG_byE( sorb, delta+2, "psrlw", Iop_ShrN16x8 );
12060 goto decode_success;
12061 }
12062
12063 /* 66 0F F8 = PSUBB */
12064 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0xF8) {
12065 delta = dis_SSEint_E_to_G( sorb, delta+2,
12066 "psubb", Iop_Sub8x16, False );
12067 goto decode_success;
12068 }
12069
12070 /* 66 0F FA = PSUBD */
12071 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0xFA) {
12072 delta = dis_SSEint_E_to_G( sorb, delta+2,
12073 "psubd", Iop_Sub32x4, False );
12074 goto decode_success;
12075 }
12076
12077 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */
12078 /* 0F FB = PSUBQ -- sub 64x1 */
12079 if (!has_66_pfx && insn[0] == 0x0F && insn[1] == 0xFB) {
12080 do_MMX_preamble();
12081 delta = dis_MMXop_regmem_to_reg (
12082 sorb, delta+2, insn[1], "psubq", False );
12083 goto decode_success;
12084 }
12085
12086 /* 66 0F FB = PSUBQ */
12087 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0xFB) {
12088 delta = dis_SSEint_E_to_G( sorb, delta+2,
12089 "psubq", Iop_Sub64x2, False );
12090 goto decode_success;
12091 }
12092
12093 /* 66 0F F9 = PSUBW */
12094 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0xF9) {
12095 delta = dis_SSEint_E_to_G( sorb, delta+2,
12096 "psubw", Iop_Sub16x8, False );
12097 goto decode_success;
12098 }
12099
12100 /* 66 0F E8 = PSUBSB */
12101 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0xE8) {
12102 delta = dis_SSEint_E_to_G( sorb, delta+2,
12103 "psubsb", Iop_QSub8Sx16, False );
12104 goto decode_success;
12105 }
12106
12107 /* 66 0F E9 = PSUBSW */
12108 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0xE9) {
12109 delta = dis_SSEint_E_to_G( sorb, delta+2,
12110 "psubsw", Iop_QSub16Sx8, False );
12111 goto decode_success;
12112 }
12113
12114 /* 66 0F D8 = PSUBSB */
12115 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0xD8) {
12116 delta = dis_SSEint_E_to_G( sorb, delta+2,
12117 "psubusb", Iop_QSub8Ux16, False );
12118 goto decode_success;
12119 }
12120
12121 /* 66 0F D9 = PSUBSW */
12122 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0xD9) {
12123 delta = dis_SSEint_E_to_G( sorb, delta+2,
12124 "psubusw", Iop_QSub16Ux8, False );
12125 goto decode_success;
12126 }
12127
12128 /* 66 0F 68 = PUNPCKHBW */
12129 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0x68) {
12130 delta = dis_SSEint_E_to_G( sorb, delta+2,
12131 "punpckhbw",
12132 Iop_InterleaveHI8x16, True );
12133 goto decode_success;
12134 }
12135
12136 /* 66 0F 6A = PUNPCKHDQ */
12137 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0x6A) {
12138 delta = dis_SSEint_E_to_G( sorb, delta+2,
12139 "punpckhdq",
12140 Iop_InterleaveHI32x4, True );
12141 goto decode_success;
12142 }
12143
12144 /* 66 0F 6D = PUNPCKHQDQ */
12145 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0x6D) {
12146 delta = dis_SSEint_E_to_G( sorb, delta+2,
12147 "punpckhqdq",
12148 Iop_InterleaveHI64x2, True );
12149 goto decode_success;
12150 }
12151
12152 /* 66 0F 69 = PUNPCKHWD */
12153 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0x69) {
12154 delta = dis_SSEint_E_to_G( sorb, delta+2,
12155 "punpckhwd",
12156 Iop_InterleaveHI16x8, True );
12157 goto decode_success;
12158 }
12159
12160 /* 66 0F 60 = PUNPCKLBW */
12161 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0x60) {
12162 delta = dis_SSEint_E_to_G( sorb, delta+2,
12163 "punpcklbw",
12164 Iop_InterleaveLO8x16, True );
12165 goto decode_success;
12166 }
12167
12168 /* 66 0F 62 = PUNPCKLDQ */
12169 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0x62) {
12170 delta = dis_SSEint_E_to_G( sorb, delta+2,
12171 "punpckldq",
12172 Iop_InterleaveLO32x4, True );
12173 goto decode_success;
12174 }
12175
12176 /* 66 0F 6C = PUNPCKLQDQ */
12177 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0x6C) {
12178 delta = dis_SSEint_E_to_G( sorb, delta+2,
12179 "punpcklqdq",
12180 Iop_InterleaveLO64x2, True );
12181 goto decode_success;
12182 }
12183
12184 /* 66 0F 61 = PUNPCKLWD */
12185 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0x61) {
12186 delta = dis_SSEint_E_to_G( sorb, delta+2,
12187 "punpcklwd",
12188 Iop_InterleaveLO16x8, True );
12189 goto decode_success;
12190 }
12191
12192 /* 66 0F EF = PXOR */
12193 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0xEF) {
12194 delta = dis_SSE_E_to_G_all( sorb, delta+2, "pxor", Iop_XorV128 );
12195 goto decode_success;
12196 }
12197
12198 //-- /* FXSAVE/FXRSTOR m32 -- load/store the FPU/MMX/SSE state. */
12199 //-- if (insn[0] == 0x0F && insn[1] == 0xAE
12200 //-- && (!epartIsReg(insn[2]))
12201 //-- && (gregOfRM(insn[2]) == 1 || gregOfRM(insn[2]) == 0) ) {
12202 //-- Bool store = gregOfRM(insn[2]) == 0;
12203 //-- vg_assert(!has_66_pfx);
12204 //-- pair = disAMode ( cb, sorb, eip+2, dis_buf );
12205 //-- t1 = LOW24(pair);
12206 //-- eip += 2+HI8(pair);
12207 //-- uInstr3(cb, store ? SSE2a_MemWr : SSE2a_MemRd, 512,
12208 //-- Lit16, (((UShort)insn[0]) << 8) | (UShort)insn[1],
12209 //-- Lit16, (UShort)insn[2],
12210 //-- TempReg, t1 );
12211 //-- DIP("fx%s %s\n", store ? "save" : "rstor", dis_buf );
12212 //-- goto decode_success;
12213 //-- }
12214
12215 /* 0F AE /7 = CLFLUSH -- flush cache line */
12216 if (!has_66_pfx && insn[0] == 0x0F && insn[1] == 0xAE
12217 && !epartIsReg(insn[2]) && gregOfRM(insn[2]) == 7) {
12218
12219 /* This is something of a hack. We need to know the size of the
12220 cache line containing addr. Since we don't (easily), assume
12221 256 on the basis that no real cache would have a line that
12222 big. It's safe to invalidate more stuff than we need, just
12223 inefficient. */
12224 UInt lineszB = 256;
12225
12226 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
12227 delta += 2+alen;
12228
12229 /* Round addr down to the start of the containing block. */
12230 stmt( IRStmt_Put(
12231 OFFB_CMSTART,
12232 binop( Iop_And32,
12233 mkexpr(addr),
12234 mkU32( ~(lineszB-1) ))) );
12235
12236 stmt( IRStmt_Put(OFFB_CMLEN, mkU32(lineszB) ) );
12237
12238 jmp_lit(&dres, Ijk_InvalICache, (Addr32)(guest_EIP_bbstart+delta));
12239
12240 DIP("clflush %s\n", dis_buf);
12241 goto decode_success;
12242 }
12243
12244 /* ---------------------------------------------------- */
12245 /* --- end of the SSE2 decoder. --- */
12246 /* ---------------------------------------------------- */
12247
12248 /* ---------------------------------------------------- */
12249 /* --- start of the SSE3 decoder. --- */
12250 /* ---------------------------------------------------- */
12251
12252 /* Skip parts of the decoder which don't apply given the stated
12253 guest subarchitecture. */
12254 if (0 == (archinfo->hwcaps & VEX_HWCAPS_X86_SSE3))
12255 goto after_sse_decoders; /* no SSE3 capabilities */
12256
12257 insn = &guest_code[delta];
12258
12259 /* F3 0F 12 = MOVSLDUP -- move from E (mem or xmm) to G (xmm),
12260 duplicating some lanes (2:2:0:0). */
12261 /* F3 0F 16 = MOVSHDUP -- move from E (mem or xmm) to G (xmm),
12262 duplicating some lanes (3:3:1:1). */
12263 if (!has_66_pfx && insn[0] == 0xF3 && insn[1] == 0x0F
12264 && (insn[2] == 0x12 || insn[2] == 0x16)) {
12265 IRTemp s3, s2, s1, s0;
12266 IRTemp sV = newTemp(Ity_V128);
12267 Bool isH = insn[2] == 0x16;
12268 s3 = s2 = s1 = s0 = IRTemp_INVALID;
12269
12270 modrm = insn[3];
12271 if (epartIsReg(modrm)) {
12272 assign( sV, getXMMReg( eregOfRM(modrm)) );
12273 DIP("movs%cdup %s,%s\n", isH ? 'h' : 'l',
12274 nameXMMReg(eregOfRM(modrm)),
12275 nameXMMReg(gregOfRM(modrm)));
12276 delta += 3+1;
12277 } else {
12278 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12279 gen_SEGV_if_not_16_aligned( addr );
12280 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
12281 DIP("movs%cdup %s,%s\n", isH ? 'h' : 'l',
12282 dis_buf,
12283 nameXMMReg(gregOfRM(modrm)));
12284 delta += 3+alen;
12285 }
12286
12287 breakup128to32s( sV, &s3, &s2, &s1, &s0 );
12288 putXMMReg( gregOfRM(modrm),
12289 isH ? mk128from32s( s3, s3, s1, s1 )
12290 : mk128from32s( s2, s2, s0, s0 ) );
12291 goto decode_success;
12292 }
12293
12294 /* F2 0F 12 = MOVDDUP -- move from E (mem or xmm) to G (xmm),
12295 duplicating some lanes (0:1:0:1). */
12296 if (!has_66_pfx && insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x12) {
12297 IRTemp sV = newTemp(Ity_V128);
12298 IRTemp d0 = newTemp(Ity_I64);
12299
12300 modrm = insn[3];
12301 if (epartIsReg(modrm)) {
12302 assign( sV, getXMMReg( eregOfRM(modrm)) );
12303 DIP("movddup %s,%s\n", nameXMMReg(eregOfRM(modrm)),
12304 nameXMMReg(gregOfRM(modrm)));
12305 delta += 3+1;
12306 assign ( d0, unop(Iop_V128to64, mkexpr(sV)) );
12307 } else {
12308 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12309 assign( d0, loadLE(Ity_I64, mkexpr(addr)) );
12310 DIP("movddup %s,%s\n", dis_buf,
12311 nameXMMReg(gregOfRM(modrm)));
12312 delta += 3+alen;
12313 }
12314
12315 putXMMReg( gregOfRM(modrm), binop(Iop_64HLtoV128,mkexpr(d0),mkexpr(d0)) );
12316 goto decode_success;
12317 }
12318
12319 /* F2 0F D0 = ADDSUBPS -- 32x4 +/-/+/- from E (mem or xmm) to G (xmm). */
12320 if (!has_66_pfx && insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0xD0) {
12321 IRTemp a3, a2, a1, a0, s3, s2, s1, s0;
12322 IRTemp eV = newTemp(Ity_V128);
12323 IRTemp gV = newTemp(Ity_V128);
12324 IRTemp addV = newTemp(Ity_V128);
12325 IRTemp subV = newTemp(Ity_V128);
12326 IRTemp rm = newTemp(Ity_I32);
12327 a3 = a2 = a1 = a0 = s3 = s2 = s1 = s0 = IRTemp_INVALID;
12328
12329 modrm = insn[3];
12330 if (epartIsReg(modrm)) {
12331 assign( eV, getXMMReg( eregOfRM(modrm)) );
12332 DIP("addsubps %s,%s\n", nameXMMReg(eregOfRM(modrm)),
12333 nameXMMReg(gregOfRM(modrm)));
12334 delta += 3+1;
12335 } else {
12336 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12337 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
12338 DIP("addsubps %s,%s\n", dis_buf,
12339 nameXMMReg(gregOfRM(modrm)));
12340 delta += 3+alen;
12341 }
12342
12343 assign( gV, getXMMReg(gregOfRM(modrm)) );
12344
12345 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
12346 assign( addV, triop(Iop_Add32Fx4, mkexpr(rm), mkexpr(gV), mkexpr(eV)) );
12347 assign( subV, triop(Iop_Sub32Fx4, mkexpr(rm), mkexpr(gV), mkexpr(eV)) );
12348
12349 breakup128to32s( addV, &a3, &a2, &a1, &a0 );
12350 breakup128to32s( subV, &s3, &s2, &s1, &s0 );
12351
12352 putXMMReg( gregOfRM(modrm), mk128from32s( a3, s2, a1, s0 ));
12353 goto decode_success;
12354 }
12355
12356 /* 66 0F D0 = ADDSUBPD -- 64x4 +/- from E (mem or xmm) to G (xmm). */
12357 if (has_66_pfx && insn[0] == 0x0F && insn[1] == 0xD0) {
12358 IRTemp eV = newTemp(Ity_V128);
12359 IRTemp gV = newTemp(Ity_V128);
12360 IRTemp addV = newTemp(Ity_V128);
12361 IRTemp subV = newTemp(Ity_V128);
12362 IRTemp a1 = newTemp(Ity_I64);
12363 IRTemp s0 = newTemp(Ity_I64);
12364 IRTemp rm = newTemp(Ity_I32);
12365
12366 modrm = insn[2];
12367 if (epartIsReg(modrm)) {
12368 assign( eV, getXMMReg( eregOfRM(modrm)) );
12369 DIP("addsubpd %s,%s\n", nameXMMReg(eregOfRM(modrm)),
12370 nameXMMReg(gregOfRM(modrm)));
12371 delta += 2+1;
12372 } else {
12373 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
12374 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
12375 DIP("addsubpd %s,%s\n", dis_buf,
12376 nameXMMReg(gregOfRM(modrm)));
12377 delta += 2+alen;
12378 }
12379
12380 assign( gV, getXMMReg(gregOfRM(modrm)) );
12381
12382 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
12383 assign( addV, triop(Iop_Add64Fx2, mkexpr(rm), mkexpr(gV), mkexpr(eV)) );
12384 assign( subV, triop(Iop_Sub64Fx2, mkexpr(rm), mkexpr(gV), mkexpr(eV)) );
12385
12386 assign( a1, unop(Iop_V128HIto64, mkexpr(addV) ));
12387 assign( s0, unop(Iop_V128to64, mkexpr(subV) ));
12388
12389 putXMMReg( gregOfRM(modrm),
12390 binop(Iop_64HLtoV128, mkexpr(a1), mkexpr(s0)) );
12391 goto decode_success;
12392 }
12393
12394 /* F2 0F 7D = HSUBPS -- 32x4 sub across from E (mem or xmm) to G (xmm). */
12395 /* F2 0F 7C = HADDPS -- 32x4 add across from E (mem or xmm) to G (xmm). */
12396 if (!has_66_pfx && insn[0] == 0xF2 && insn[1] == 0x0F
12397 && (insn[2] == 0x7C || insn[2] == 0x7D)) {
12398 IRTemp e3, e2, e1, e0, g3, g2, g1, g0;
12399 IRTemp eV = newTemp(Ity_V128);
12400 IRTemp gV = newTemp(Ity_V128);
12401 IRTemp leftV = newTemp(Ity_V128);
12402 IRTemp rightV = newTemp(Ity_V128);
12403 IRTemp rm = newTemp(Ity_I32);
12404 Bool isAdd = insn[2] == 0x7C;
12405 const HChar* str = isAdd ? "add" : "sub";
12406 e3 = e2 = e1 = e0 = g3 = g2 = g1 = g0 = IRTemp_INVALID;
12407
12408 modrm = insn[3];
12409 if (epartIsReg(modrm)) {
12410 assign( eV, getXMMReg( eregOfRM(modrm)) );
12411 DIP("h%sps %s,%s\n", str, nameXMMReg(eregOfRM(modrm)),
12412 nameXMMReg(gregOfRM(modrm)));
12413 delta += 3+1;
12414 } else {
12415 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12416 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
12417 DIP("h%sps %s,%s\n", str, dis_buf,
12418 nameXMMReg(gregOfRM(modrm)));
12419 delta += 3+alen;
12420 }
12421
12422 assign( gV, getXMMReg(gregOfRM(modrm)) );
12423
12424 breakup128to32s( eV, &e3, &e2, &e1, &e0 );
12425 breakup128to32s( gV, &g3, &g2, &g1, &g0 );
12426
12427 assign( leftV, mk128from32s( e2, e0, g2, g0 ) );
12428 assign( rightV, mk128from32s( e3, e1, g3, g1 ) );
12429
12430 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
12431 putXMMReg( gregOfRM(modrm),
12432 triop(isAdd ? Iop_Add32Fx4 : Iop_Sub32Fx4,
12433 mkexpr(rm), mkexpr(leftV), mkexpr(rightV) ) );
12434 goto decode_success;
12435 }
12436
12437 /* 66 0F 7D = HSUBPD -- 64x2 sub across from E (mem or xmm) to G (xmm). */
12438 /* 66 0F 7C = HADDPD -- 64x2 add across from E (mem or xmm) to G (xmm). */
12439 if (has_66_pfx && insn[0] == 0x0F && (insn[1] == 0x7C || insn[1] == 0x7D)) {
12440 IRTemp e1 = newTemp(Ity_I64);
12441 IRTemp e0 = newTemp(Ity_I64);
12442 IRTemp g1 = newTemp(Ity_I64);
12443 IRTemp g0 = newTemp(Ity_I64);
12444 IRTemp eV = newTemp(Ity_V128);
12445 IRTemp gV = newTemp(Ity_V128);
12446 IRTemp leftV = newTemp(Ity_V128);
12447 IRTemp rightV = newTemp(Ity_V128);
12448 IRTemp rm = newTemp(Ity_I32);
12449 Bool isAdd = insn[1] == 0x7C;
12450 const HChar* str = isAdd ? "add" : "sub";
12451
12452 modrm = insn[2];
12453 if (epartIsReg(modrm)) {
12454 assign( eV, getXMMReg( eregOfRM(modrm)) );
12455 DIP("h%spd %s,%s\n", str, nameXMMReg(eregOfRM(modrm)),
12456 nameXMMReg(gregOfRM(modrm)));
12457 delta += 2+1;
12458 } else {
12459 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
12460 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
12461 DIP("h%spd %s,%s\n", str, dis_buf,
12462 nameXMMReg(gregOfRM(modrm)));
12463 delta += 2+alen;
12464 }
12465
12466 assign( gV, getXMMReg(gregOfRM(modrm)) );
12467
12468 assign( e1, unop(Iop_V128HIto64, mkexpr(eV) ));
12469 assign( e0, unop(Iop_V128to64, mkexpr(eV) ));
12470 assign( g1, unop(Iop_V128HIto64, mkexpr(gV) ));
12471 assign( g0, unop(Iop_V128to64, mkexpr(gV) ));
12472
12473 assign( leftV, binop(Iop_64HLtoV128, mkexpr(e0),mkexpr(g0)) );
12474 assign( rightV, binop(Iop_64HLtoV128, mkexpr(e1),mkexpr(g1)) );
12475
12476 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
12477 putXMMReg( gregOfRM(modrm),
12478 triop(isAdd ? Iop_Add64Fx2 : Iop_Sub64Fx2,
12479 mkexpr(rm), mkexpr(leftV), mkexpr(rightV) ) );
12480 goto decode_success;
12481 }
12482
12483 /* F2 0F F0 = LDDQU -- move from E (mem or xmm) to G (xmm). */
12484 if (!has_66_pfx && insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0xF0) {
12485 modrm = getIByte(delta+3);
12486 if (epartIsReg(modrm)) {
12487 goto decode_failure;
12488 } else {
12489 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12490 putXMMReg( gregOfRM(modrm),
12491 loadLE(Ity_V128, mkexpr(addr)) );
12492 DIP("lddqu %s,%s\n", dis_buf,
12493 nameXMMReg(gregOfRM(modrm)));
12494 delta += 3+alen;
12495 }
12496 goto decode_success;
12497 }
12498
12499 /* ---------------------------------------------------- */
12500 /* --- end of the SSE3 decoder. --- */
12501 /* ---------------------------------------------------- */
12502
12503 /* ---------------------------------------------------- */
12504 /* --- start of the SSSE3 decoder. --- */
12505 /* ---------------------------------------------------- */
12506
12507 /* 0F 38 04 = PMADDUBSW -- Multiply and Add Packed Signed and
12508 Unsigned Bytes (MMX) */
12509 if (!has_66_pfx
12510 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x04) {
12511 IRTemp sV = newTemp(Ity_I64);
12512 IRTemp dV = newTemp(Ity_I64);
12513 IRTemp sVoddsSX = newTemp(Ity_I64);
12514 IRTemp sVevensSX = newTemp(Ity_I64);
12515 IRTemp dVoddsZX = newTemp(Ity_I64);
12516 IRTemp dVevensZX = newTemp(Ity_I64);
12517
12518 modrm = insn[3];
12519 do_MMX_preamble();
12520 assign( dV, getMMXReg(gregOfRM(modrm)) );
12521
12522 if (epartIsReg(modrm)) {
12523 assign( sV, getMMXReg(eregOfRM(modrm)) );
12524 delta += 3+1;
12525 DIP("pmaddubsw %s,%s\n", nameMMXReg(eregOfRM(modrm)),
12526 nameMMXReg(gregOfRM(modrm)));
12527 } else {
12528 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12529 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
12530 delta += 3+alen;
12531 DIP("pmaddubsw %s,%s\n", dis_buf,
12532 nameMMXReg(gregOfRM(modrm)));
12533 }
12534
12535 /* compute dV unsigned x sV signed */
12536 assign( sVoddsSX,
12537 binop(Iop_SarN16x4, mkexpr(sV), mkU8(8)) );
12538 assign( sVevensSX,
12539 binop(Iop_SarN16x4,
12540 binop(Iop_ShlN16x4, mkexpr(sV), mkU8(8)),
12541 mkU8(8)) );
12542 assign( dVoddsZX,
12543 binop(Iop_ShrN16x4, mkexpr(dV), mkU8(8)) );
12544 assign( dVevensZX,
12545 binop(Iop_ShrN16x4,
12546 binop(Iop_ShlN16x4, mkexpr(dV), mkU8(8)),
12547 mkU8(8)) );
12548
12549 putMMXReg(
12550 gregOfRM(modrm),
12551 binop(Iop_QAdd16Sx4,
12552 binop(Iop_Mul16x4, mkexpr(sVoddsSX), mkexpr(dVoddsZX)),
12553 binop(Iop_Mul16x4, mkexpr(sVevensSX), mkexpr(dVevensZX))
12554 )
12555 );
12556 goto decode_success;
12557 }
12558
12559 /* 66 0F 38 04 = PMADDUBSW -- Multiply and Add Packed Signed and
12560 Unsigned Bytes (XMM) */
12561 if (has_66_pfx
12562 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x04) {
12563 IRTemp sV = newTemp(Ity_V128);
12564 IRTemp dV = newTemp(Ity_V128);
12565 IRTemp sVoddsSX = newTemp(Ity_V128);
12566 IRTemp sVevensSX = newTemp(Ity_V128);
12567 IRTemp dVoddsZX = newTemp(Ity_V128);
12568 IRTemp dVevensZX = newTemp(Ity_V128);
12569
12570 modrm = insn[3];
12571 assign( dV, getXMMReg(gregOfRM(modrm)) );
12572
12573 if (epartIsReg(modrm)) {
12574 assign( sV, getXMMReg(eregOfRM(modrm)) );
12575 delta += 3+1;
12576 DIP("pmaddubsw %s,%s\n", nameXMMReg(eregOfRM(modrm)),
12577 nameXMMReg(gregOfRM(modrm)));
12578 } else {
12579 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12580 gen_SEGV_if_not_16_aligned( addr );
12581 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
12582 delta += 3+alen;
12583 DIP("pmaddubsw %s,%s\n", dis_buf,
12584 nameXMMReg(gregOfRM(modrm)));
12585 }
12586
12587 /* compute dV unsigned x sV signed */
12588 assign( sVoddsSX,
12589 binop(Iop_SarN16x8, mkexpr(sV), mkU8(8)) );
12590 assign( sVevensSX,
12591 binop(Iop_SarN16x8,
12592 binop(Iop_ShlN16x8, mkexpr(sV), mkU8(8)),
12593 mkU8(8)) );
12594 assign( dVoddsZX,
12595 binop(Iop_ShrN16x8, mkexpr(dV), mkU8(8)) );
12596 assign( dVevensZX,
12597 binop(Iop_ShrN16x8,
12598 binop(Iop_ShlN16x8, mkexpr(dV), mkU8(8)),
12599 mkU8(8)) );
12600
12601 putXMMReg(
12602 gregOfRM(modrm),
12603 binop(Iop_QAdd16Sx8,
12604 binop(Iop_Mul16x8, mkexpr(sVoddsSX), mkexpr(dVoddsZX)),
12605 binop(Iop_Mul16x8, mkexpr(sVevensSX), mkexpr(dVevensZX))
12606 )
12607 );
12608 goto decode_success;
12609 }
12610
12611 /* ***--- these are MMX class insns introduced in SSSE3 ---*** */
12612 /* 0F 38 03 = PHADDSW -- 16x4 signed qadd across from E (mem or
12613 mmx) and G to G (mmx). */
12614 /* 0F 38 07 = PHSUBSW -- 16x4 signed qsub across from E (mem or
12615 mmx) and G to G (mmx). */
12616 /* 0F 38 01 = PHADDW -- 16x4 add across from E (mem or mmx) and G
12617 to G (mmx). */
12618 /* 0F 38 05 = PHSUBW -- 16x4 sub across from E (mem or mmx) and G
12619 to G (mmx). */
12620 /* 0F 38 02 = PHADDD -- 32x2 add across from E (mem or mmx) and G
12621 to G (mmx). */
12622 /* 0F 38 06 = PHSUBD -- 32x2 sub across from E (mem or mmx) and G
12623 to G (mmx). */
12624
12625 if (!has_66_pfx
12626 && insn[0] == 0x0F && insn[1] == 0x38
12627 && (insn[2] == 0x03 || insn[2] == 0x07 || insn[2] == 0x01
12628 || insn[2] == 0x05 || insn[2] == 0x02 || insn[2] == 0x06)) {
12629 const HChar* str = "???";
12630 IROp opV64 = Iop_INVALID;
12631 IROp opCatO = Iop_CatOddLanes16x4;
12632 IROp opCatE = Iop_CatEvenLanes16x4;
12633 IRTemp sV = newTemp(Ity_I64);
12634 IRTemp dV = newTemp(Ity_I64);
12635
12636 modrm = insn[3];
12637
12638 switch (insn[2]) {
12639 case 0x03: opV64 = Iop_QAdd16Sx4; str = "addsw"; break;
12640 case 0x07: opV64 = Iop_QSub16Sx4; str = "subsw"; break;
12641 case 0x01: opV64 = Iop_Add16x4; str = "addw"; break;
12642 case 0x05: opV64 = Iop_Sub16x4; str = "subw"; break;
12643 case 0x02: opV64 = Iop_Add32x2; str = "addd"; break;
12644 case 0x06: opV64 = Iop_Sub32x2; str = "subd"; break;
12645 default: vassert(0);
12646 }
12647 if (insn[2] == 0x02 || insn[2] == 0x06) {
12648 opCatO = Iop_InterleaveHI32x2;
12649 opCatE = Iop_InterleaveLO32x2;
12650 }
12651
12652 do_MMX_preamble();
12653 assign( dV, getMMXReg(gregOfRM(modrm)) );
12654
12655 if (epartIsReg(modrm)) {
12656 assign( sV, getMMXReg(eregOfRM(modrm)) );
12657 delta += 3+1;
12658 DIP("ph%s %s,%s\n", str, nameMMXReg(eregOfRM(modrm)),
12659 nameMMXReg(gregOfRM(modrm)));
12660 } else {
12661 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12662 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
12663 delta += 3+alen;
12664 DIP("ph%s %s,%s\n", str, dis_buf,
12665 nameMMXReg(gregOfRM(modrm)));
12666 }
12667
12668 putMMXReg(
12669 gregOfRM(modrm),
12670 binop(opV64,
12671 binop(opCatE,mkexpr(sV),mkexpr(dV)),
12672 binop(opCatO,mkexpr(sV),mkexpr(dV))
12673 )
12674 );
12675 goto decode_success;
12676 }
12677
12678 /* 66 0F 38 03 = PHADDSW -- 16x8 signed qadd across from E (mem or
12679 xmm) and G to G (xmm). */
12680 /* 66 0F 38 07 = PHSUBSW -- 16x8 signed qsub across from E (mem or
12681 xmm) and G to G (xmm). */
12682 /* 66 0F 38 01 = PHADDW -- 16x8 add across from E (mem or xmm) and
12683 G to G (xmm). */
12684 /* 66 0F 38 05 = PHSUBW -- 16x8 sub across from E (mem or xmm) and
12685 G to G (xmm). */
12686 /* 66 0F 38 02 = PHADDD -- 32x4 add across from E (mem or xmm) and
12687 G to G (xmm). */
12688 /* 66 0F 38 06 = PHSUBD -- 32x4 sub across from E (mem or xmm) and
12689 G to G (xmm). */
12690
12691 if (has_66_pfx
12692 && insn[0] == 0x0F && insn[1] == 0x38
12693 && (insn[2] == 0x03 || insn[2] == 0x07 || insn[2] == 0x01
12694 || insn[2] == 0x05 || insn[2] == 0x02 || insn[2] == 0x06)) {
12695 const HChar* str = "???";
12696 IROp opV64 = Iop_INVALID;
12697 IROp opCatO = Iop_CatOddLanes16x4;
12698 IROp opCatE = Iop_CatEvenLanes16x4;
12699 IRTemp sV = newTemp(Ity_V128);
12700 IRTemp dV = newTemp(Ity_V128);
12701 IRTemp sHi = newTemp(Ity_I64);
12702 IRTemp sLo = newTemp(Ity_I64);
12703 IRTemp dHi = newTemp(Ity_I64);
12704 IRTemp dLo = newTemp(Ity_I64);
12705
12706 modrm = insn[3];
12707
12708 switch (insn[2]) {
12709 case 0x03: opV64 = Iop_QAdd16Sx4; str = "addsw"; break;
12710 case 0x07: opV64 = Iop_QSub16Sx4; str = "subsw"; break;
12711 case 0x01: opV64 = Iop_Add16x4; str = "addw"; break;
12712 case 0x05: opV64 = Iop_Sub16x4; str = "subw"; break;
12713 case 0x02: opV64 = Iop_Add32x2; str = "addd"; break;
12714 case 0x06: opV64 = Iop_Sub32x2; str = "subd"; break;
12715 default: vassert(0);
12716 }
12717 if (insn[2] == 0x02 || insn[2] == 0x06) {
12718 opCatO = Iop_InterleaveHI32x2;
12719 opCatE = Iop_InterleaveLO32x2;
12720 }
12721
12722 assign( dV, getXMMReg(gregOfRM(modrm)) );
12723
12724 if (epartIsReg(modrm)) {
12725 assign( sV, getXMMReg( eregOfRM(modrm)) );
12726 DIP("ph%s %s,%s\n", str, nameXMMReg(eregOfRM(modrm)),
12727 nameXMMReg(gregOfRM(modrm)));
12728 delta += 3+1;
12729 } else {
12730 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12731 gen_SEGV_if_not_16_aligned( addr );
12732 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
12733 DIP("ph%s %s,%s\n", str, dis_buf,
12734 nameXMMReg(gregOfRM(modrm)));
12735 delta += 3+alen;
12736 }
12737
12738 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
12739 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
12740 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
12741 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
12742
12743 /* This isn't a particularly efficient way to compute the
12744 result, but at least it avoids a proliferation of IROps,
12745 hence avoids complication all the backends. */
12746 putXMMReg(
12747 gregOfRM(modrm),
12748 binop(Iop_64HLtoV128,
12749 binop(opV64,
12750 binop(opCatE,mkexpr(sHi),mkexpr(sLo)),
12751 binop(opCatO,mkexpr(sHi),mkexpr(sLo))
12752 ),
12753 binop(opV64,
12754 binop(opCatE,mkexpr(dHi),mkexpr(dLo)),
12755 binop(opCatO,mkexpr(dHi),mkexpr(dLo))
12756 )
12757 )
12758 );
12759 goto decode_success;
12760 }
12761
12762 /* 0F 38 0B = PMULHRSW -- Packed Multiply High with Round and Scale
12763 (MMX) */
12764 if (!has_66_pfx
12765 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x0B) {
12766 IRTemp sV = newTemp(Ity_I64);
12767 IRTemp dV = newTemp(Ity_I64);
12768
12769 modrm = insn[3];
12770 do_MMX_preamble();
12771 assign( dV, getMMXReg(gregOfRM(modrm)) );
12772
12773 if (epartIsReg(modrm)) {
12774 assign( sV, getMMXReg(eregOfRM(modrm)) );
12775 delta += 3+1;
12776 DIP("pmulhrsw %s,%s\n", nameMMXReg(eregOfRM(modrm)),
12777 nameMMXReg(gregOfRM(modrm)));
12778 } else {
12779 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12780 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
12781 delta += 3+alen;
12782 DIP("pmulhrsw %s,%s\n", dis_buf,
12783 nameMMXReg(gregOfRM(modrm)));
12784 }
12785
12786 putMMXReg(
12787 gregOfRM(modrm),
12788 dis_PMULHRSW_helper( mkexpr(sV), mkexpr(dV) )
12789 );
12790 goto decode_success;
12791 }
12792
12793 /* 66 0F 38 0B = PMULHRSW -- Packed Multiply High with Round and
12794 Scale (XMM) */
12795 if (has_66_pfx
12796 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x0B) {
12797 IRTemp sV = newTemp(Ity_V128);
12798 IRTemp dV = newTemp(Ity_V128);
12799 IRTemp sHi = newTemp(Ity_I64);
12800 IRTemp sLo = newTemp(Ity_I64);
12801 IRTemp dHi = newTemp(Ity_I64);
12802 IRTemp dLo = newTemp(Ity_I64);
12803
12804 modrm = insn[3];
12805 assign( dV, getXMMReg(gregOfRM(modrm)) );
12806
12807 if (epartIsReg(modrm)) {
12808 assign( sV, getXMMReg(eregOfRM(modrm)) );
12809 delta += 3+1;
12810 DIP("pmulhrsw %s,%s\n", nameXMMReg(eregOfRM(modrm)),
12811 nameXMMReg(gregOfRM(modrm)));
12812 } else {
12813 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12814 gen_SEGV_if_not_16_aligned( addr );
12815 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
12816 delta += 3+alen;
12817 DIP("pmulhrsw %s,%s\n", dis_buf,
12818 nameXMMReg(gregOfRM(modrm)));
12819 }
12820
12821 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
12822 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
12823 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
12824 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
12825
12826 putXMMReg(
12827 gregOfRM(modrm),
12828 binop(Iop_64HLtoV128,
12829 dis_PMULHRSW_helper( mkexpr(sHi), mkexpr(dHi) ),
12830 dis_PMULHRSW_helper( mkexpr(sLo), mkexpr(dLo) )
12831 )
12832 );
12833 goto decode_success;
12834 }
12835
12836 /* 0F 38 08 = PSIGNB -- Packed Sign 8x8 (MMX) */
12837 /* 0F 38 09 = PSIGNW -- Packed Sign 16x4 (MMX) */
12838 /* 0F 38 09 = PSIGND -- Packed Sign 32x2 (MMX) */
12839 if (!has_66_pfx
12840 && insn[0] == 0x0F && insn[1] == 0x38
12841 && (insn[2] == 0x08 || insn[2] == 0x09 || insn[2] == 0x0A)) {
12842 IRTemp sV = newTemp(Ity_I64);
12843 IRTemp dV = newTemp(Ity_I64);
12844 const HChar* str = "???";
12845 Int laneszB = 0;
12846
12847 switch (insn[2]) {
12848 case 0x08: laneszB = 1; str = "b"; break;
12849 case 0x09: laneszB = 2; str = "w"; break;
12850 case 0x0A: laneszB = 4; str = "d"; break;
12851 default: vassert(0);
12852 }
12853
12854 modrm = insn[3];
12855 do_MMX_preamble();
12856 assign( dV, getMMXReg(gregOfRM(modrm)) );
12857
12858 if (epartIsReg(modrm)) {
12859 assign( sV, getMMXReg(eregOfRM(modrm)) );
12860 delta += 3+1;
12861 DIP("psign%s %s,%s\n", str, nameMMXReg(eregOfRM(modrm)),
12862 nameMMXReg(gregOfRM(modrm)));
12863 } else {
12864 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12865 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
12866 delta += 3+alen;
12867 DIP("psign%s %s,%s\n", str, dis_buf,
12868 nameMMXReg(gregOfRM(modrm)));
12869 }
12870
12871 putMMXReg(
12872 gregOfRM(modrm),
12873 dis_PSIGN_helper( mkexpr(sV), mkexpr(dV), laneszB )
12874 );
12875 goto decode_success;
12876 }
12877
12878 /* 66 0F 38 08 = PSIGNB -- Packed Sign 8x16 (XMM) */
12879 /* 66 0F 38 09 = PSIGNW -- Packed Sign 16x8 (XMM) */
12880 /* 66 0F 38 09 = PSIGND -- Packed Sign 32x4 (XMM) */
12881 if (has_66_pfx
12882 && insn[0] == 0x0F && insn[1] == 0x38
12883 && (insn[2] == 0x08 || insn[2] == 0x09 || insn[2] == 0x0A)) {
12884 IRTemp sV = newTemp(Ity_V128);
12885 IRTemp dV = newTemp(Ity_V128);
12886 IRTemp sHi = newTemp(Ity_I64);
12887 IRTemp sLo = newTemp(Ity_I64);
12888 IRTemp dHi = newTemp(Ity_I64);
12889 IRTemp dLo = newTemp(Ity_I64);
12890 const HChar* str = "???";
12891 Int laneszB = 0;
12892
12893 switch (insn[2]) {
12894 case 0x08: laneszB = 1; str = "b"; break;
12895 case 0x09: laneszB = 2; str = "w"; break;
12896 case 0x0A: laneszB = 4; str = "d"; break;
12897 default: vassert(0);
12898 }
12899
12900 modrm = insn[3];
12901 assign( dV, getXMMReg(gregOfRM(modrm)) );
12902
12903 if (epartIsReg(modrm)) {
12904 assign( sV, getXMMReg(eregOfRM(modrm)) );
12905 delta += 3+1;
12906 DIP("psign%s %s,%s\n", str, nameXMMReg(eregOfRM(modrm)),
12907 nameXMMReg(gregOfRM(modrm)));
12908 } else {
12909 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12910 gen_SEGV_if_not_16_aligned( addr );
12911 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
12912 delta += 3+alen;
12913 DIP("psign%s %s,%s\n", str, dis_buf,
12914 nameXMMReg(gregOfRM(modrm)));
12915 }
12916
12917 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
12918 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
12919 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
12920 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
12921
12922 putXMMReg(
12923 gregOfRM(modrm),
12924 binop(Iop_64HLtoV128,
12925 dis_PSIGN_helper( mkexpr(sHi), mkexpr(dHi), laneszB ),
12926 dis_PSIGN_helper( mkexpr(sLo), mkexpr(dLo), laneszB )
12927 )
12928 );
12929 goto decode_success;
12930 }
12931
12932 /* 0F 38 1C = PABSB -- Packed Absolute Value 8x8 (MMX) */
12933 /* 0F 38 1D = PABSW -- Packed Absolute Value 16x4 (MMX) */
12934 /* 0F 38 1E = PABSD -- Packed Absolute Value 32x2 (MMX) */
12935 if (!has_66_pfx
12936 && insn[0] == 0x0F && insn[1] == 0x38
12937 && (insn[2] == 0x1C || insn[2] == 0x1D || insn[2] == 0x1E)) {
12938 IRTemp sV = newTemp(Ity_I64);
12939 const HChar* str = "???";
12940 Int laneszB = 0;
12941
12942 switch (insn[2]) {
12943 case 0x1C: laneszB = 1; str = "b"; break;
12944 case 0x1D: laneszB = 2; str = "w"; break;
12945 case 0x1E: laneszB = 4; str = "d"; break;
12946 default: vassert(0);
12947 }
12948
12949 modrm = insn[3];
12950 do_MMX_preamble();
12951
12952 if (epartIsReg(modrm)) {
12953 assign( sV, getMMXReg(eregOfRM(modrm)) );
12954 delta += 3+1;
12955 DIP("pabs%s %s,%s\n", str, nameMMXReg(eregOfRM(modrm)),
12956 nameMMXReg(gregOfRM(modrm)));
12957 } else {
12958 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12959 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
12960 delta += 3+alen;
12961 DIP("pabs%s %s,%s\n", str, dis_buf,
12962 nameMMXReg(gregOfRM(modrm)));
12963 }
12964
12965 putMMXReg(
12966 gregOfRM(modrm),
12967 dis_PABS_helper( mkexpr(sV), laneszB )
12968 );
12969 goto decode_success;
12970 }
12971
12972 /* 66 0F 38 1C = PABSB -- Packed Absolute Value 8x16 (XMM) */
12973 /* 66 0F 38 1D = PABSW -- Packed Absolute Value 16x8 (XMM) */
12974 /* 66 0F 38 1E = PABSD -- Packed Absolute Value 32x4 (XMM) */
12975 if (has_66_pfx
12976 && insn[0] == 0x0F && insn[1] == 0x38
12977 && (insn[2] == 0x1C || insn[2] == 0x1D || insn[2] == 0x1E)) {
12978 IRTemp sV = newTemp(Ity_V128);
12979 IRTemp sHi = newTemp(Ity_I64);
12980 IRTemp sLo = newTemp(Ity_I64);
12981 const HChar* str = "???";
12982 Int laneszB = 0;
12983
12984 switch (insn[2]) {
12985 case 0x1C: laneszB = 1; str = "b"; break;
12986 case 0x1D: laneszB = 2; str = "w"; break;
12987 case 0x1E: laneszB = 4; str = "d"; break;
12988 default: vassert(0);
12989 }
12990
12991 modrm = insn[3];
12992
12993 if (epartIsReg(modrm)) {
12994 assign( sV, getXMMReg(eregOfRM(modrm)) );
12995 delta += 3+1;
12996 DIP("pabs%s %s,%s\n", str, nameXMMReg(eregOfRM(modrm)),
12997 nameXMMReg(gregOfRM(modrm)));
12998 } else {
12999 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
13000 gen_SEGV_if_not_16_aligned( addr );
13001 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
13002 delta += 3+alen;
13003 DIP("pabs%s %s,%s\n", str, dis_buf,
13004 nameXMMReg(gregOfRM(modrm)));
13005 }
13006
13007 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
13008 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
13009
13010 putXMMReg(
13011 gregOfRM(modrm),
13012 binop(Iop_64HLtoV128,
13013 dis_PABS_helper( mkexpr(sHi), laneszB ),
13014 dis_PABS_helper( mkexpr(sLo), laneszB )
13015 )
13016 );
13017 goto decode_success;
13018 }
13019
13020 /* 0F 3A 0F = PALIGNR -- Packed Align Right (MMX) */
13021 if (!has_66_pfx
13022 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x0F) {
13023 IRTemp sV = newTemp(Ity_I64);
13024 IRTemp dV = newTemp(Ity_I64);
13025 IRTemp res = newTemp(Ity_I64);
13026
13027 modrm = insn[3];
13028 do_MMX_preamble();
13029 assign( dV, getMMXReg(gregOfRM(modrm)) );
13030
13031 if (epartIsReg(modrm)) {
13032 assign( sV, getMMXReg(eregOfRM(modrm)) );
13033 d32 = (UInt)insn[3+1];
13034 delta += 3+1+1;
13035 DIP("palignr $%u,%s,%s\n", d32,
13036 nameMMXReg(eregOfRM(modrm)),
13037 nameMMXReg(gregOfRM(modrm)));
13038 } else {
13039 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
13040 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
13041 d32 = (UInt)insn[3+alen];
13042 delta += 3+alen+1;
13043 DIP("palignr $%u%s,%s\n", d32,
13044 dis_buf,
13045 nameMMXReg(gregOfRM(modrm)));
13046 }
13047
13048 if (d32 == 0) {
13049 assign( res, mkexpr(sV) );
13050 }
13051 else if (d32 >= 1 && d32 <= 7) {
13052 assign(res,
13053 binop(Iop_Or64,
13054 binop(Iop_Shr64, mkexpr(sV), mkU8(8*d32)),
13055 binop(Iop_Shl64, mkexpr(dV), mkU8(8*(8-d32))
13056 )));
13057 }
13058 else if (d32 == 8) {
13059 assign( res, mkexpr(dV) );
13060 }
13061 else if (d32 >= 9 && d32 <= 15) {
13062 assign( res, binop(Iop_Shr64, mkexpr(dV), mkU8(8*(d32-8))) );
13063 }
13064 else if (d32 >= 16 && d32 <= 255) {
13065 assign( res, mkU64(0) );
13066 }
13067 else
13068 vassert(0);
13069
13070 putMMXReg( gregOfRM(modrm), mkexpr(res) );
13071 goto decode_success;
13072 }
13073
13074 /* 66 0F 3A 0F = PALIGNR -- Packed Align Right (XMM) */
13075 if (has_66_pfx
13076 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x0F) {
13077 IRTemp sV = newTemp(Ity_V128);
13078 IRTemp dV = newTemp(Ity_V128);
13079 IRTemp sHi = newTemp(Ity_I64);
13080 IRTemp sLo = newTemp(Ity_I64);
13081 IRTemp dHi = newTemp(Ity_I64);
13082 IRTemp dLo = newTemp(Ity_I64);
13083 IRTemp rHi = newTemp(Ity_I64);
13084 IRTemp rLo = newTemp(Ity_I64);
13085
13086 modrm = insn[3];
13087 assign( dV, getXMMReg(gregOfRM(modrm)) );
13088
13089 if (epartIsReg(modrm)) {
13090 assign( sV, getXMMReg(eregOfRM(modrm)) );
13091 d32 = (UInt)insn[3+1];
13092 delta += 3+1+1;
13093 DIP("palignr $%u,%s,%s\n", d32,
13094 nameXMMReg(eregOfRM(modrm)),
13095 nameXMMReg(gregOfRM(modrm)));
13096 } else {
13097 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
13098 gen_SEGV_if_not_16_aligned( addr );
13099 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
13100 d32 = (UInt)insn[3+alen];
13101 delta += 3+alen+1;
13102 DIP("palignr $%u,%s,%s\n", d32,
13103 dis_buf,
13104 nameXMMReg(gregOfRM(modrm)));
13105 }
13106
13107 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
13108 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
13109 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
13110 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
13111
13112 if (d32 == 0) {
13113 assign( rHi, mkexpr(sHi) );
13114 assign( rLo, mkexpr(sLo) );
13115 }
13116 else if (d32 >= 1 && d32 <= 7) {
13117 assign( rHi, dis_PALIGNR_XMM_helper(dLo, sHi, d32) );
13118 assign( rLo, dis_PALIGNR_XMM_helper(sHi, sLo, d32) );
13119 }
13120 else if (d32 == 8) {
13121 assign( rHi, mkexpr(dLo) );
13122 assign( rLo, mkexpr(sHi) );
13123 }
13124 else if (d32 >= 9 && d32 <= 15) {
13125 assign( rHi, dis_PALIGNR_XMM_helper(dHi, dLo, d32-8) );
13126 assign( rLo, dis_PALIGNR_XMM_helper(dLo, sHi, d32-8) );
13127 }
13128 else if (d32 == 16) {
13129 assign( rHi, mkexpr(dHi) );
13130 assign( rLo, mkexpr(dLo) );
13131 }
13132 else if (d32 >= 17 && d32 <= 23) {
13133 assign( rHi, binop(Iop_Shr64, mkexpr(dHi), mkU8(8*(d32-16))) );
13134 assign( rLo, dis_PALIGNR_XMM_helper(dHi, dLo, d32-16) );
13135 }
13136 else if (d32 == 24) {
13137 assign( rHi, mkU64(0) );
13138 assign( rLo, mkexpr(dHi) );
13139 }
13140 else if (d32 >= 25 && d32 <= 31) {
13141 assign( rHi, mkU64(0) );
13142 assign( rLo, binop(Iop_Shr64, mkexpr(dHi), mkU8(8*(d32-24))) );
13143 }
13144 else if (d32 >= 32 && d32 <= 255) {
13145 assign( rHi, mkU64(0) );
13146 assign( rLo, mkU64(0) );
13147 }
13148 else
13149 vassert(0);
13150
13151 putXMMReg(
13152 gregOfRM(modrm),
13153 binop(Iop_64HLtoV128, mkexpr(rHi), mkexpr(rLo))
13154 );
13155 goto decode_success;
13156 }
13157
13158 /* 0F 38 00 = PSHUFB -- Packed Shuffle Bytes 8x8 (MMX) */
13159 if (!has_66_pfx
13160 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x00) {
13161 IRTemp sV = newTemp(Ity_I64);
13162 IRTemp dV = newTemp(Ity_I64);
13163
13164 modrm = insn[3];
13165 do_MMX_preamble();
13166 assign( dV, getMMXReg(gregOfRM(modrm)) );
13167
13168 if (epartIsReg(modrm)) {
13169 assign( sV, getMMXReg(eregOfRM(modrm)) );
13170 delta += 3+1;
13171 DIP("pshufb %s,%s\n", nameMMXReg(eregOfRM(modrm)),
13172 nameMMXReg(gregOfRM(modrm)));
13173 } else {
13174 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
13175 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
13176 delta += 3+alen;
13177 DIP("pshufb %s,%s\n", dis_buf,
13178 nameMMXReg(gregOfRM(modrm)));
13179 }
13180
13181 putMMXReg(
13182 gregOfRM(modrm),
13183 binop(
13184 Iop_And64,
13185 /* permute the lanes */
13186 binop(
13187 Iop_Perm8x8,
13188 mkexpr(dV),
13189 binop(Iop_And64, mkexpr(sV), mkU64(0x0707070707070707ULL))
13190 ),
13191 /* mask off lanes which have (index & 0x80) == 0x80 */
13192 unop(Iop_Not64, binop(Iop_SarN8x8, mkexpr(sV), mkU8(7)))
13193 )
13194 );
13195 goto decode_success;
13196 }
13197
13198 /* 66 0F 38 00 = PSHUFB -- Packed Shuffle Bytes 8x16 (XMM) */
13199 if (has_66_pfx
13200 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x00) {
13201 IRTemp sV = newTemp(Ity_V128);
13202 IRTemp dV = newTemp(Ity_V128);
13203 IRTemp sHi = newTemp(Ity_I64);
13204 IRTemp sLo = newTemp(Ity_I64);
13205 IRTemp dHi = newTemp(Ity_I64);
13206 IRTemp dLo = newTemp(Ity_I64);
13207 IRTemp rHi = newTemp(Ity_I64);
13208 IRTemp rLo = newTemp(Ity_I64);
13209 IRTemp sevens = newTemp(Ity_I64);
13210 IRTemp mask0x80hi = newTemp(Ity_I64);
13211 IRTemp mask0x80lo = newTemp(Ity_I64);
13212 IRTemp maskBit3hi = newTemp(Ity_I64);
13213 IRTemp maskBit3lo = newTemp(Ity_I64);
13214 IRTemp sAnd7hi = newTemp(Ity_I64);
13215 IRTemp sAnd7lo = newTemp(Ity_I64);
13216 IRTemp permdHi = newTemp(Ity_I64);
13217 IRTemp permdLo = newTemp(Ity_I64);
13218
13219 modrm = insn[3];
13220 assign( dV, getXMMReg(gregOfRM(modrm)) );
13221
13222 if (epartIsReg(modrm)) {
13223 assign( sV, getXMMReg(eregOfRM(modrm)) );
13224 delta += 3+1;
13225 DIP("pshufb %s,%s\n", nameXMMReg(eregOfRM(modrm)),
13226 nameXMMReg(gregOfRM(modrm)));
13227 } else {
13228 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
13229 gen_SEGV_if_not_16_aligned( addr );
13230 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
13231 delta += 3+alen;
13232 DIP("pshufb %s,%s\n", dis_buf,
13233 nameXMMReg(gregOfRM(modrm)));
13234 }
13235
13236 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
13237 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
13238 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
13239 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
13240
13241 assign( sevens, mkU64(0x0707070707070707ULL) );
13242
13243 /*
13244 mask0x80hi = Not(SarN8x8(sHi,7))
13245 maskBit3hi = SarN8x8(ShlN8x8(sHi,4),7)
13246 sAnd7hi = And(sHi,sevens)
13247 permdHi = Or( And(Perm8x8(dHi,sAnd7hi),maskBit3hi),
13248 And(Perm8x8(dLo,sAnd7hi),Not(maskBit3hi)) )
13249 rHi = And(permdHi,mask0x80hi)
13250 */
13251 assign(
13252 mask0x80hi,
13253 unop(Iop_Not64, binop(Iop_SarN8x8,mkexpr(sHi),mkU8(7))));
13254
13255 assign(
13256 maskBit3hi,
13257 binop(Iop_SarN8x8,
13258 binop(Iop_ShlN8x8,mkexpr(sHi),mkU8(4)),
13259 mkU8(7)));
13260
13261 assign(sAnd7hi, binop(Iop_And64,mkexpr(sHi),mkexpr(sevens)));
13262
13263 assign(
13264 permdHi,
13265 binop(
13266 Iop_Or64,
13267 binop(Iop_And64,
13268 binop(Iop_Perm8x8,mkexpr(dHi),mkexpr(sAnd7hi)),
13269 mkexpr(maskBit3hi)),
13270 binop(Iop_And64,
13271 binop(Iop_Perm8x8,mkexpr(dLo),mkexpr(sAnd7hi)),
13272 unop(Iop_Not64,mkexpr(maskBit3hi))) ));
13273
13274 assign(rHi, binop(Iop_And64,mkexpr(permdHi),mkexpr(mask0x80hi)) );
13275
13276 /* And the same for the lower half of the result. What fun. */
13277
13278 assign(
13279 mask0x80lo,
13280 unop(Iop_Not64, binop(Iop_SarN8x8,mkexpr(sLo),mkU8(7))));
13281
13282 assign(
13283 maskBit3lo,
13284 binop(Iop_SarN8x8,
13285 binop(Iop_ShlN8x8,mkexpr(sLo),mkU8(4)),
13286 mkU8(7)));
13287
13288 assign(sAnd7lo, binop(Iop_And64,mkexpr(sLo),mkexpr(sevens)));
13289
13290 assign(
13291 permdLo,
13292 binop(
13293 Iop_Or64,
13294 binop(Iop_And64,
13295 binop(Iop_Perm8x8,mkexpr(dHi),mkexpr(sAnd7lo)),
13296 mkexpr(maskBit3lo)),
13297 binop(Iop_And64,
13298 binop(Iop_Perm8x8,mkexpr(dLo),mkexpr(sAnd7lo)),
13299 unop(Iop_Not64,mkexpr(maskBit3lo))) ));
13300
13301 assign(rLo, binop(Iop_And64,mkexpr(permdLo),mkexpr(mask0x80lo)) );
13302
13303 putXMMReg(
13304 gregOfRM(modrm),
13305 binop(Iop_64HLtoV128, mkexpr(rHi), mkexpr(rLo))
13306 );
13307 goto decode_success;
13308 }
13309
13310 /* 0F 38 F0 = MOVBE m16/32(E), r16/32(G) */
13311 /* 0F 38 F1 = MOVBE r16/32(G), m16/32(E) */
13312 if ((sz == 2 || sz == 4)
13313 && insn[0] == 0x0F && insn[1] == 0x38
13314 && (insn[2] == 0xF0 || insn[2] == 0xF1)
13315 && !epartIsReg(insn[3])) {
13316
13317 modrm = insn[3];
13318 addr = disAMode(&alen, sorb, delta + 3, dis_buf);
13319 delta += 3 + alen;
13320 ty = szToITy(sz);
13321 IRTemp src = newTemp(ty);
13322
13323 if (insn[2] == 0xF0) { /* LOAD */
13324 assign(src, loadLE(ty, mkexpr(addr)));
13325 IRTemp dst = math_BSWAP(src, ty);
13326 putIReg(sz, gregOfRM(modrm), mkexpr(dst));
13327 DIP("movbe %s,%s\n", dis_buf, nameIReg(sz, gregOfRM(modrm)));
13328 } else { /* STORE */
13329 assign(src, getIReg(sz, gregOfRM(modrm)));
13330 IRTemp dst = math_BSWAP(src, ty);
13331 storeLE(mkexpr(addr), mkexpr(dst));
13332 DIP("movbe %s,%s\n", nameIReg(sz, gregOfRM(modrm)), dis_buf);
13333 }
13334 goto decode_success;
13335 }
13336
13337 /* ---------------------------------------------------- */
13338 /* --- end of the SSSE3 decoder. --- */
13339 /* ---------------------------------------------------- */
13340
13341 /* ---------------------------------------------------- */
13342 /* --- start of the SSE4 decoder --- */
13343 /* ---------------------------------------------------- */
13344
13345 /* 66 0F 3A 0B /r ib = ROUNDSD imm8, xmm2/m64, xmm1
13346 (Partial implementation only -- only deal with cases where
13347 the rounding mode is specified directly by the immediate byte.)
13348 66 0F 3A 0A /r ib = ROUNDSS imm8, xmm2/m32, xmm1
13349 (Limitations ditto)
13350 */
13351 if (has_66_pfx
13352 && insn[0] == 0x0F && insn[1] == 0x3A
13353 && (insn[2] == 0x0B || insn[2] == 0x0A)) {
13354
13355 Bool isD = insn[2] == 0x0B;
13356 IRTemp src = newTemp(isD ? Ity_F64 : Ity_F32);
13357 IRTemp res = newTemp(isD ? Ity_F64 : Ity_F32);
13358 Int imm = 0;
13359
13360 modrm = insn[3];
13361
13362 if (epartIsReg(modrm)) {
13363 assign( src,
13364 isD ? getXMMRegLane64F( eregOfRM(modrm), 0 )
13365 : getXMMRegLane32F( eregOfRM(modrm), 0 ) );
13366 imm = insn[3+1];
13367 if (imm & ~3) goto decode_failure;
13368 delta += 3+1+1;
13369 DIP( "rounds%c $%d,%s,%s\n",
13370 isD ? 'd' : 's',
13371 imm, nameXMMReg( eregOfRM(modrm) ),
13372 nameXMMReg( gregOfRM(modrm) ) );
13373 } else {
13374 addr = disAMode( &alen, sorb, delta+3, dis_buf );
13375 assign( src, loadLE( isD ? Ity_F64 : Ity_F32, mkexpr(addr) ));
13376 imm = insn[3+alen];
13377 if (imm & ~3) goto decode_failure;
13378 delta += 3+alen+1;
13379 DIP( "roundsd $%d,%s,%s\n",
13380 imm, dis_buf, nameXMMReg( gregOfRM(modrm) ) );
13381 }
13382
13383 /* (imm & 3) contains an Intel-encoded rounding mode. Because
13384 that encoding is the same as the encoding for IRRoundingMode,
13385 we can use that value directly in the IR as a rounding
13386 mode. */
13387 assign(res, binop(isD ? Iop_RoundF64toInt : Iop_RoundF32toInt,
13388 mkU32(imm & 3), mkexpr(src)) );
13389
13390 if (isD)
13391 putXMMRegLane64F( gregOfRM(modrm), 0, mkexpr(res) );
13392 else
13393 putXMMRegLane32F( gregOfRM(modrm), 0, mkexpr(res) );
13394
13395 goto decode_success;
13396 }
13397
13398 /* F3 0F BD -- LZCNT (count leading zeroes. An AMD extension,
13399 which we can only decode if we're sure this is an AMD cpu that
13400 supports LZCNT, since otherwise it's BSR, which behaves
13401 differently. */
13402 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0xBD
13403 && 0 != (archinfo->hwcaps & VEX_HWCAPS_X86_LZCNT)) {
13404 vassert(sz == 2 || sz == 4);
13405 /*IRType*/ ty = szToITy(sz);
13406 IRTemp src = newTemp(ty);
13407 modrm = insn[3];
13408 if (epartIsReg(modrm)) {
13409 assign(src, getIReg(sz, eregOfRM(modrm)));
13410 delta += 3+1;
13411 DIP("lzcnt%c %s, %s\n", nameISize(sz),
13412 nameIReg(sz, eregOfRM(modrm)),
13413 nameIReg(sz, gregOfRM(modrm)));
13414 } else {
13415 addr = disAMode( &alen, sorb, delta+3, dis_buf );
13416 assign(src, loadLE(ty, mkexpr(addr)));
13417 delta += 3+alen;
13418 DIP("lzcnt%c %s, %s\n", nameISize(sz), dis_buf,
13419 nameIReg(sz, gregOfRM(modrm)));
13420 }
13421
13422 IRTemp res = gen_LZCNT(ty, src);
13423 putIReg(sz, gregOfRM(modrm), mkexpr(res));
13424
13425 // Update flags. This is pretty lame .. perhaps can do better
13426 // if this turns out to be performance critical.
13427 // O S A P are cleared. Z is set if RESULT == 0.
13428 // C is set if SRC is zero.
13429 IRTemp src32 = newTemp(Ity_I32);
13430 IRTemp res32 = newTemp(Ity_I32);
13431 assign(src32, widenUto32(mkexpr(src)));
13432 assign(res32, widenUto32(mkexpr(res)));
13433
13434 IRTemp oszacp = newTemp(Ity_I32);
13435 assign(
13436 oszacp,
13437 binop(Iop_Or32,
13438 binop(Iop_Shl32,
13439 unop(Iop_1Uto32,
13440 binop(Iop_CmpEQ32, mkexpr(res32), mkU32(0))),
13441 mkU8(X86G_CC_SHIFT_Z)),
13442 binop(Iop_Shl32,
13443 unop(Iop_1Uto32,
13444 binop(Iop_CmpEQ32, mkexpr(src32), mkU32(0))),
13445 mkU8(X86G_CC_SHIFT_C))
13446 )
13447 );
13448
13449 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
13450 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
13451 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
13452 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(oszacp) ));
13453
13454 goto decode_success;
13455 }
13456
13457 /* ---------------------------------------------------- */
13458 /* --- end of the SSE4 decoder --- */
13459 /* ---------------------------------------------------- */
13460
13461 after_sse_decoders:
13462
13463 /* ---------------------------------------------------- */
13464 /* --- deal with misc 0x67 pfxs (addr size override) -- */
13465 /* ---------------------------------------------------- */
13466
13467 /* 67 E3 = JCXZ (for JECXZ see below) */
13468 if (insn[0] == 0x67 && insn[1] == 0xE3 && sz == 4) {
13469 delta += 2;
13470 d32 = (((Addr32)guest_EIP_bbstart)+delta+1) + getSDisp8(delta);
13471 delta ++;
13472 stmt( IRStmt_Exit(
13473 binop(Iop_CmpEQ16, getIReg(2,R_ECX), mkU16(0)),
13474 Ijk_Boring,
13475 IRConst_U32(d32),
13476 OFFB_EIP
13477 ));
13478 if (vex_control.strict_block_end) {
13479 jmp_lit(&dres, Ijk_Boring, ((Addr32)guest_EIP_bbstart)+delta);
13480 }
13481 DIP("jcxz 0x%x\n", d32);
13482 goto decode_success;
13483 }
13484
13485 /* ---------------------------------------------------- */
13486 /* --- start of the baseline insn decoder -- */
13487 /* ---------------------------------------------------- */
13488
13489 /* Get the primary opcode. */
13490 opc = getIByte(delta); delta++;
13491
13492 /* We get here if the current insn isn't SSE, or this CPU doesn't
13493 support SSE. */
13494
13495 switch (opc) {
13496
13497 /* ------------------------ Control flow --------------- */
13498
13499 case 0xC2: /* RET imm16 */
13500 d32 = getUDisp16(delta);
13501 delta += 2;
13502 dis_ret(&dres, d32);
13503 DIP("ret %u\n", d32);
13504 break;
13505 case 0xC3: /* RET */
13506 dis_ret(&dres, 0);
13507 DIP("ret\n");
13508 break;
13509
13510 case 0xCF: /* IRET */
13511 /* Note, this is an extremely kludgey and limited implementation
13512 of iret. All it really does is:
13513 popl %EIP; popl %CS; popl %EFLAGS.
13514 %CS is set but ignored (as it is in (eg) popw %cs)". */
13515 t1 = newTemp(Ity_I32); /* ESP */
13516 t2 = newTemp(Ity_I32); /* new EIP */
13517 t3 = newTemp(Ity_I32); /* new CS */
13518 t4 = newTemp(Ity_I32); /* new EFLAGS */
13519 assign(t1, getIReg(4,R_ESP));
13520 assign(t2, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t1),mkU32(0) )));
13521 assign(t3, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t1),mkU32(4) )));
13522 assign(t4, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t1),mkU32(8) )));
13523 /* Get stuff off stack */
13524 putIReg(4, R_ESP,binop(Iop_Add32, mkexpr(t1), mkU32(12)));
13525 /* set %CS (which is ignored anyway) */
13526 putSReg( R_CS, unop(Iop_32to16, mkexpr(t3)) );
13527 /* set %EFLAGS */
13528 set_EFLAGS_from_value( t4, False/*!emit_AC_emwarn*/, 0/*unused*/ );
13529 /* goto new EIP value */
13530 jmp_treg(&dres, Ijk_Ret, t2);
13531 vassert(dres.whatNext == Dis_StopHere);
13532 DIP("iret (very kludgey)\n");
13533 break;
13534
13535 case 0xE8: /* CALL J4 */
13536 d32 = getUDisp32(delta); delta += 4;
13537 d32 += (guest_EIP_bbstart+delta);
13538 /* (guest_eip_bbstart+delta) == return-to addr, d32 == call-to addr */
13539 Bool isPICIdiom = d32 == guest_EIP_bbstart+delta
13540 && getIByte(delta) >= 0x58
13541 && getIByte(delta) <= 0x5F;
13542 if (isPICIdiom && vex_control.x86_optimize_callpop_idiom) {
13543 /* Specially treat the position-independent-code idiom
13544 call X
13545 X: popl %reg
13546 as
13547 movl %eip, %reg.
13548 since this generates better code, but for no other reason. */
13549 Int archReg = getIByte(delta) - 0x58;
13550 /* vex_printf("-- fPIC thingy\n"); */
13551 putIReg(4, archReg, mkU32(guest_EIP_bbstart+delta));
13552 delta++; /* Step over the POP */
13553 DIP("call 0x%x ; popl %s\n",d32,nameIReg(4,archReg));
13554 } else {
13555 /* The normal sequence for a call. */
13556 t1 = newTemp(Ity_I32);
13557 assign(t1, binop(Iop_Sub32, getIReg(4,R_ESP), mkU32(4)));
13558 putIReg(4, R_ESP, mkexpr(t1));
13559 storeLE( mkexpr(t1), mkU32(guest_EIP_bbstart+delta));
13560 if (resteerOkFn( callback_opaque, (Addr32)d32 )) {
13561 /* follow into the call target. */
13562 dres.whatNext = Dis_ResteerU;
13563 dres.continueAt = (Addr32)d32;
13564 } else {
13565 jmp_lit(&dres, isPICIdiom ? Ijk_Boring : Ijk_Call, d32);
13566 vassert(dres.whatNext == Dis_StopHere);
13567 }
13568 DIP("call 0x%x\n",d32);
13569 }
13570 break;
13571
13572 //-- case 0xC8: /* ENTER */
13573 //-- d32 = getUDisp16(eip); eip += 2;
13574 //-- abyte = getIByte(delta); delta++;
13575 //--
13576 //-- vg_assert(sz == 4);
13577 //-- vg_assert(abyte == 0);
13578 //--
13579 //-- t1 = newTemp(cb); t2 = newTemp(cb);
13580 //-- uInstr2(cb, GET, sz, ArchReg, R_EBP, TempReg, t1);
13581 //-- uInstr2(cb, GET, 4, ArchReg, R_ESP, TempReg, t2);
13582 //-- uInstr2(cb, SUB, 4, Literal, 0, TempReg, t2);
13583 //-- uLiteral(cb, sz);
13584 //-- uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_ESP);
13585 //-- uInstr2(cb, STORE, 4, TempReg, t1, TempReg, t2);
13586 //-- uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_EBP);
13587 //-- if (d32) {
13588 //-- uInstr2(cb, SUB, 4, Literal, 0, TempReg, t2);
13589 //-- uLiteral(cb, d32);
13590 //-- uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_ESP);
13591 //-- }
13592 //-- DIP("enter 0x%x, 0x%x", d32, abyte);
13593 //-- break;
13594
13595 case 0xC9: /* LEAVE */
13596 vassert(sz == 4);
13597 t1 = newTemp(Ity_I32); t2 = newTemp(Ity_I32);
13598 assign(t1, getIReg(4,R_EBP));
13599 /* First PUT ESP looks redundant, but need it because ESP must
13600 always be up-to-date for Memcheck to work... */
13601 putIReg(4, R_ESP, mkexpr(t1));
13602 assign(t2, loadLE(Ity_I32,mkexpr(t1)));
13603 putIReg(4, R_EBP, mkexpr(t2));
13604 putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(t1), mkU32(4)) );
13605 DIP("leave\n");
13606 break;
13607
13608 /* ---------------- Misc weird-ass insns --------------- */
13609
13610 case 0x27: /* DAA */
13611 case 0x2F: /* DAS */
13612 case 0x37: /* AAA */
13613 case 0x3F: /* AAS */
13614 /* An ugly implementation for some ugly instructions. Oh
13615 well. */
13616 if (sz != 4) goto decode_failure;
13617 t1 = newTemp(Ity_I32);
13618 t2 = newTemp(Ity_I32);
13619 /* Make up a 32-bit value (t1), with the old value of AX in the
13620 bottom 16 bits, and the old OSZACP bitmask in the upper 16
13621 bits. */
13622 assign(t1,
13623 binop(Iop_16HLto32,
13624 unop(Iop_32to16,
13625 mk_x86g_calculate_eflags_all()),
13626 getIReg(2, R_EAX)
13627 ));
13628 /* Call the helper fn, to get a new AX and OSZACP value, and
13629 poke both back into the guest state. Also pass the helper
13630 the actual opcode so it knows which of the 4 instructions it
13631 is doing the computation for. */
13632 vassert(opc == 0x27 || opc == 0x2F || opc == 0x37 || opc == 0x3F);
13633 assign(t2,
13634 mkIRExprCCall(
13635 Ity_I32, 0/*regparm*/, "x86g_calculate_daa_das_aaa_aas",
13636 &x86g_calculate_daa_das_aaa_aas,
13637 mkIRExprVec_2( mkexpr(t1), mkU32( opc & 0xFF) )
13638 ));
13639 putIReg(2, R_EAX, unop(Iop_32to16, mkexpr(t2) ));
13640
13641 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
13642 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
13643 stmt( IRStmt_Put( OFFB_CC_DEP1,
13644 binop(Iop_And32,
13645 binop(Iop_Shr32, mkexpr(t2), mkU8(16)),
13646 mkU32( X86G_CC_MASK_C | X86G_CC_MASK_P
13647 | X86G_CC_MASK_A | X86G_CC_MASK_Z
13648 | X86G_CC_MASK_S| X86G_CC_MASK_O )
13649 )
13650 )
13651 );
13652 /* Set NDEP even though it isn't used. This makes redundant-PUT
13653 elimination of previous stores to this field work better. */
13654 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
13655 switch (opc) {
13656 case 0x27: DIP("daa\n"); break;
13657 case 0x2F: DIP("das\n"); break;
13658 case 0x37: DIP("aaa\n"); break;
13659 case 0x3F: DIP("aas\n"); break;
13660 default: vassert(0);
13661 }
13662 break;
13663
13664 case 0xD4: /* AAM */
13665 case 0xD5: /* AAD */
13666 d32 = getIByte(delta); delta++;
13667 if (sz != 4 || d32 != 10) goto decode_failure;
13668 t1 = newTemp(Ity_I32);
13669 t2 = newTemp(Ity_I32);
13670 /* Make up a 32-bit value (t1), with the old value of AX in the
13671 bottom 16 bits, and the old OSZACP bitmask in the upper 16
13672 bits. */
13673 assign(t1,
13674 binop(Iop_16HLto32,
13675 unop(Iop_32to16,
13676 mk_x86g_calculate_eflags_all()),
13677 getIReg(2, R_EAX)
13678 ));
13679 /* Call the helper fn, to get a new AX and OSZACP value, and
13680 poke both back into the guest state. Also pass the helper
13681 the actual opcode so it knows which of the 2 instructions it
13682 is doing the computation for. */
13683 assign(t2,
13684 mkIRExprCCall(
13685 Ity_I32, 0/*regparm*/, "x86g_calculate_aad_aam",
13686 &x86g_calculate_aad_aam,
13687 mkIRExprVec_2( mkexpr(t1), mkU32( opc & 0xFF) )
13688 ));
13689 putIReg(2, R_EAX, unop(Iop_32to16, mkexpr(t2) ));
13690
13691 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
13692 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
13693 stmt( IRStmt_Put( OFFB_CC_DEP1,
13694 binop(Iop_And32,
13695 binop(Iop_Shr32, mkexpr(t2), mkU8(16)),
13696 mkU32( X86G_CC_MASK_C | X86G_CC_MASK_P
13697 | X86G_CC_MASK_A | X86G_CC_MASK_Z
13698 | X86G_CC_MASK_S| X86G_CC_MASK_O )
13699 )
13700 )
13701 );
13702 /* Set NDEP even though it isn't used. This makes
13703 redundant-PUT elimination of previous stores to this field
13704 work better. */
13705 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
13706
13707 DIP(opc == 0xD4 ? "aam\n" : "aad\n");
13708 break;
13709
13710 /* ------------------------ CWD/CDQ -------------------- */
13711
13712 case 0x98: /* CBW */
13713 if (sz == 4) {
13714 putIReg(4, R_EAX, unop(Iop_16Sto32, getIReg(2, R_EAX)));
13715 DIP("cwde\n");
13716 } else {
13717 vassert(sz == 2);
13718 putIReg(2, R_EAX, unop(Iop_8Sto16, getIReg(1, R_EAX)));
13719 DIP("cbw\n");
13720 }
13721 break;
13722
13723 case 0x99: /* CWD/CDQ */
13724 ty = szToITy(sz);
13725 putIReg(sz, R_EDX,
13726 binop(mkSizedOp(ty,Iop_Sar8),
13727 getIReg(sz, R_EAX),
13728 mkU8(sz == 2 ? 15 : 31)) );
13729 DIP(sz == 2 ? "cwdq\n" : "cdqq\n");
13730 break;
13731
13732 /* ------------------------ FPU ops -------------------- */
13733
13734 case 0x9E: /* SAHF */
13735 codegen_SAHF();
13736 DIP("sahf\n");
13737 break;
13738
13739 case 0x9F: /* LAHF */
13740 codegen_LAHF();
13741 DIP("lahf\n");
13742 break;
13743
13744 case 0x9B: /* FWAIT */
13745 /* ignore? */
13746 DIP("fwait\n");
13747 break;
13748
13749 case 0xD8:
13750 case 0xD9:
13751 case 0xDA:
13752 case 0xDB:
13753 case 0xDC:
13754 case 0xDD:
13755 case 0xDE:
13756 case 0xDF: {
13757 Int delta0 = delta;
13758 Bool decode_OK = False;
13759 delta = dis_FPU ( &decode_OK, sorb, delta );
13760 if (!decode_OK) {
13761 delta = delta0;
13762 goto decode_failure;
13763 }
13764 break;
13765 }
13766
13767 /* ------------------------ INC & DEC ------------------ */
13768
13769 case 0x40: /* INC eAX */
13770 case 0x41: /* INC eCX */
13771 case 0x42: /* INC eDX */
13772 case 0x43: /* INC eBX */
13773 case 0x44: /* INC eSP */
13774 case 0x45: /* INC eBP */
13775 case 0x46: /* INC eSI */
13776 case 0x47: /* INC eDI */
13777 vassert(sz == 2 || sz == 4);
13778 ty = szToITy(sz);
13779 t1 = newTemp(ty);
13780 assign( t1, binop(mkSizedOp(ty,Iop_Add8),
13781 getIReg(sz, (UInt)(opc - 0x40)),
13782 mkU(ty,1)) );
13783 setFlags_INC_DEC( True, t1, ty );
13784 putIReg(sz, (UInt)(opc - 0x40), mkexpr(t1));
13785 DIP("inc%c %s\n", nameISize(sz), nameIReg(sz,opc-0x40));
13786 break;
13787
13788 case 0x48: /* DEC eAX */
13789 case 0x49: /* DEC eCX */
13790 case 0x4A: /* DEC eDX */
13791 case 0x4B: /* DEC eBX */
13792 case 0x4C: /* DEC eSP */
13793 case 0x4D: /* DEC eBP */
13794 case 0x4E: /* DEC eSI */
13795 case 0x4F: /* DEC eDI */
13796 vassert(sz == 2 || sz == 4);
13797 ty = szToITy(sz);
13798 t1 = newTemp(ty);
13799 assign( t1, binop(mkSizedOp(ty,Iop_Sub8),
13800 getIReg(sz, (UInt)(opc - 0x48)),
13801 mkU(ty,1)) );
13802 setFlags_INC_DEC( False, t1, ty );
13803 putIReg(sz, (UInt)(opc - 0x48), mkexpr(t1));
13804 DIP("dec%c %s\n", nameISize(sz), nameIReg(sz,opc-0x48));
13805 break;
13806
13807 /* ------------------------ INT ------------------------ */
13808
13809 case 0xCC: /* INT 3 */
13810 jmp_lit(&dres, Ijk_SigTRAP, ((Addr32)guest_EIP_bbstart)+delta);
13811 vassert(dres.whatNext == Dis_StopHere);
13812 DIP("int $0x3\n");
13813 break;
13814
13815 case 0xCD: /* INT imm8 */
13816 d32 = getIByte(delta); delta++;
13817
13818 /* For any of the cases where we emit a jump (that is, for all
13819 currently handled cases), it's important that all ArchRegs
13820 carry their up-to-date value at this point. So we declare an
13821 end-of-block here, which forces any TempRegs caching ArchRegs
13822 to be flushed. */
13823
13824 /* Handle int $0x3F .. $0x4F by synthesising a segfault and a
13825 restart of this instruction (hence the "-2" two lines below,
13826 to get the restart EIP to be this instruction. This is
13827 probably Linux-specific and it would be more correct to only
13828 do this if the VexAbiInfo says that is what we should do.
13829 This used to handle just 0x40-0x43; Jikes RVM uses a larger
13830 range (0x3F-0x49), and this allows some slack as well. */
13831 if (d32 >= 0x3F && d32 <= 0x4F) {
13832 jmp_lit(&dres, Ijk_SigSEGV, ((Addr32)guest_EIP_bbstart)+delta-2);
13833 vassert(dres.whatNext == Dis_StopHere);
13834 DIP("int $0x%x\n", d32);
13835 break;
13836 }
13837
13838 /* Handle int $0x80 (linux syscalls), int $0x81 and $0x82
13839 (darwin syscalls), int $0x91 (Solaris syscalls) and int $0xD2
13840 (Solaris fasttrap syscalls). As part of this, note where we are, so we
13841 can back up the guest to this point if the syscall needs to
13842 be restarted. */
13843 IRJumpKind jump_kind;
13844 switch (d32) {
13845 case 0x80:
13846 jump_kind = Ijk_Sys_int128;
13847 break;
13848 case 0x81:
13849 jump_kind = Ijk_Sys_int129;
13850 break;
13851 case 0x82:
13852 jump_kind = Ijk_Sys_int130;
13853 break;
13854 case 0x91:
13855 jump_kind = Ijk_Sys_int145;
13856 break;
13857 case 0xD2:
13858 jump_kind = Ijk_Sys_int210;
13859 break;
13860 default:
13861 /* none of the above */
13862 goto decode_failure;
13863 }
13864
13865 stmt( IRStmt_Put( OFFB_IP_AT_SYSCALL,
13866 mkU32(guest_EIP_curr_instr) ) );
13867 jmp_lit(&dres, jump_kind, ((Addr32)guest_EIP_bbstart)+delta);
13868 vassert(dres.whatNext == Dis_StopHere);
13869 DIP("int $0x%x\n", d32);
13870 break;
13871
13872 /* ------------------------ Jcond, byte offset --------- */
13873
13874 case 0xEB: /* Jb (jump, byte offset) */
13875 d32 = (((Addr32)guest_EIP_bbstart)+delta+1) + getSDisp8(delta);
13876 delta++;
13877 if (resteerOkFn( callback_opaque, (Addr32)d32) ) {
13878 dres.whatNext = Dis_ResteerU;
13879 dres.continueAt = (Addr32)d32;
13880 } else {
13881 jmp_lit(&dres, Ijk_Boring, d32);
13882 vassert(dres.whatNext == Dis_StopHere);
13883 }
13884 DIP("jmp-8 0x%x\n", d32);
13885 break;
13886
13887 case 0xEA: {/* jump far, 16/32 address */
13888 vassert(sz == 4 || sz == 2);
13889 UInt addr_offset = getUDisp(sz, delta);
13890 delta += sz;
13891 UInt selector = getUDisp16(delta);
13892 delta += 2;
13893
13894 ty = szToITy(sz);
13895 IRTemp final_addr = newTemp(Ity_I32);
13896 IRTemp tmp_selector = newTemp(Ity_I32);
13897 IRTemp tmp_addr_offset = newTemp(ty);
13898 assign(tmp_selector, mkU32(selector));
13899 assign(tmp_addr_offset, sz == 4 ? mkU32(addr_offset) : mkU16(addr_offset));
13900 assign(final_addr, handleSegOverrideAux(tmp_selector, mkexpr(tmp_addr_offset)));
13901
13902 jmp_treg(&dres, Ijk_Boring, final_addr);
13903 vassert(dres.whatNext == Dis_StopHere);
13904 break;
13905 }
13906 case 0xE9: /* Jv (jump, 16/32 offset) */
13907 vassert(sz == 4 || sz == 2);
13908 d32 = (((Addr32)guest_EIP_bbstart)+delta+sz) + getSDisp(sz,delta);
13909 delta += sz;
13910 if (resteerOkFn( callback_opaque, (Addr32)d32) ) {
13911 dres.whatNext = Dis_ResteerU;
13912 dres.continueAt = (Addr32)d32;
13913 } else {
13914 jmp_lit(&dres, Ijk_Boring, d32);
13915 vassert(dres.whatNext == Dis_StopHere);
13916 }
13917 DIP("jmp 0x%x\n", d32);
13918 break;
13919
13920 case 0x70:
13921 case 0x71:
13922 case 0x72: /* JBb/JNAEb (jump below) */
13923 case 0x73: /* JNBb/JAEb (jump not below) */
13924 case 0x74: /* JZb/JEb (jump zero) */
13925 case 0x75: /* JNZb/JNEb (jump not zero) */
13926 case 0x76: /* JBEb/JNAb (jump below or equal) */
13927 case 0x77: /* JNBEb/JAb (jump not below or equal) */
13928 case 0x78: /* JSb (jump negative) */
13929 case 0x79: /* JSb (jump not negative) */
13930 case 0x7A: /* JP (jump parity even) */
13931 case 0x7B: /* JNP/JPO (jump parity odd) */
13932 case 0x7C: /* JLb/JNGEb (jump less) */
13933 case 0x7D: /* JGEb/JNLb (jump greater or equal) */
13934 case 0x7E: /* JLEb/JNGb (jump less or equal) */
13935 case 0x7F: /* JGb/JNLEb (jump greater) */
13936 { Int jmpDelta;
13937 const HChar* comment = "";
13938 jmpDelta = (Int)getSDisp8(delta);
13939 vassert(-128 <= jmpDelta && jmpDelta < 128);
13940 d32 = (((Addr32)guest_EIP_bbstart)+delta+1) + jmpDelta;
13941 delta++;
13942 if (resteerCisOk
13943 && vex_control.guest_chase_cond
13944 && (Addr32)d32 != (Addr32)guest_EIP_bbstart
13945 && jmpDelta < 0
13946 && resteerOkFn( callback_opaque, (Addr32)d32) ) {
13947 /* Speculation: assume this backward branch is taken. So we
13948 need to emit a side-exit to the insn following this one,
13949 on the negation of the condition, and continue at the
13950 branch target address (d32). If we wind up back at the
13951 first instruction of the trace, just stop; it's better to
13952 let the IR loop unroller handle that case. */
13953 stmt( IRStmt_Exit(
13954 mk_x86g_calculate_condition((X86Condcode)(1 ^ (opc - 0x70))),
13955 Ijk_Boring,
13956 IRConst_U32(guest_EIP_bbstart+delta),
13957 OFFB_EIP ) );
13958 dres.whatNext = Dis_ResteerC;
13959 dres.continueAt = (Addr32)d32;
13960 comment = "(assumed taken)";
13961 }
13962 else
13963 if (resteerCisOk
13964 && vex_control.guest_chase_cond
13965 && (Addr32)d32 != (Addr32)guest_EIP_bbstart
13966 && jmpDelta >= 0
13967 && resteerOkFn( callback_opaque,
13968 (Addr32)(guest_EIP_bbstart+delta)) ) {
13969 /* Speculation: assume this forward branch is not taken. So
13970 we need to emit a side-exit to d32 (the dest) and continue
13971 disassembling at the insn immediately following this
13972 one. */
13973 stmt( IRStmt_Exit(
13974 mk_x86g_calculate_condition((X86Condcode)(opc - 0x70)),
13975 Ijk_Boring,
13976 IRConst_U32(d32),
13977 OFFB_EIP ) );
13978 dres.whatNext = Dis_ResteerC;
13979 dres.continueAt = guest_EIP_bbstart + delta;
13980 comment = "(assumed not taken)";
13981 }
13982 else {
13983 /* Conservative default translation - end the block at this
13984 point. */
13985 jcc_01( &dres, (X86Condcode)(opc - 0x70),
13986 (Addr32)(guest_EIP_bbstart+delta), d32);
13987 vassert(dres.whatNext == Dis_StopHere);
13988 }
13989 DIP("j%s-8 0x%x %s\n", name_X86Condcode(opc - 0x70), d32, comment);
13990 break;
13991 }
13992
13993 case 0xE3: /* JECXZ (for JCXZ see above) */
13994 if (sz != 4) goto decode_failure;
13995 d32 = (((Addr32)guest_EIP_bbstart)+delta+1) + getSDisp8(delta);
13996 delta ++;
13997 stmt( IRStmt_Exit(
13998 binop(Iop_CmpEQ32, getIReg(4,R_ECX), mkU32(0)),
13999 Ijk_Boring,
14000 IRConst_U32(d32),
14001 OFFB_EIP
14002 ));
14003 if (vex_control.strict_block_end) {
14004 jmp_lit(&dres, Ijk_Boring, ((Addr32)guest_EIP_bbstart)+delta);
14005 }
14006 DIP("jecxz 0x%x\n", d32);
14007 break;
14008
14009 case 0xE0: /* LOOPNE disp8: decrement count, jump if count != 0 && ZF==0 */
14010 case 0xE1: /* LOOPE disp8: decrement count, jump if count != 0 && ZF==1 */
14011 case 0xE2: /* LOOP disp8: decrement count, jump if count != 0 */
14012 { /* Again, the docs say this uses ECX/CX as a count depending on
14013 the address size override, not the operand one. Since we
14014 don't handle address size overrides, I guess that means
14015 ECX. */
14016 IRExpr* zbit = NULL;
14017 IRExpr* count = NULL;
14018 IRExpr* cond = NULL;
14019 const HChar* xtra = NULL;
14020
14021 if (sz != 4) goto decode_failure;
14022 d32 = (((Addr32)guest_EIP_bbstart)+delta+1) + getSDisp8(delta);
14023 delta++;
14024 putIReg(4, R_ECX, binop(Iop_Sub32, getIReg(4,R_ECX), mkU32(1)));
14025
14026 count = getIReg(4,R_ECX);
14027 cond = binop(Iop_CmpNE32, count, mkU32(0));
14028 switch (opc) {
14029 case 0xE2:
14030 xtra = "";
14031 break;
14032 case 0xE1:
14033 xtra = "e";
14034 zbit = mk_x86g_calculate_condition( X86CondZ );
14035 cond = mkAnd1(cond, zbit);
14036 break;
14037 case 0xE0:
14038 xtra = "ne";
14039 zbit = mk_x86g_calculate_condition( X86CondNZ );
14040 cond = mkAnd1(cond, zbit);
14041 break;
14042 default:
14043 vassert(0);
14044 }
14045 stmt( IRStmt_Exit(cond, Ijk_Boring, IRConst_U32(d32), OFFB_EIP) );
14046
14047 if (vex_control.strict_block_end) {
14048 jmp_lit(&dres, Ijk_Boring, ((Addr32)guest_EIP_bbstart)+delta);
14049 }
14050
14051 DIP("loop%s 0x%x\n", xtra, d32);
14052 break;
14053 }
14054
14055 /* ------------------------ IMUL ----------------------- */
14056
14057 case 0x69: /* IMUL Iv, Ev, Gv */
14058 delta = dis_imul_I_E_G ( sorb, sz, delta, sz );
14059 break;
14060 case 0x6B: /* IMUL Ib, Ev, Gv */
14061 delta = dis_imul_I_E_G ( sorb, sz, delta, 1 );
14062 break;
14063
14064 /* ------------------------ MOV ------------------------ */
14065
14066 case 0x88: /* MOV Gb,Eb */
14067 delta = dis_mov_G_E(sorb, 1, delta);
14068 break;
14069
14070 case 0x89: /* MOV Gv,Ev */
14071 delta = dis_mov_G_E(sorb, sz, delta);
14072 break;
14073
14074 case 0x8A: /* MOV Eb,Gb */
14075 delta = dis_mov_E_G(sorb, 1, delta);
14076 break;
14077
14078 case 0x8B: /* MOV Ev,Gv */
14079 delta = dis_mov_E_G(sorb, sz, delta);
14080 break;
14081
14082 case 0x8D: /* LEA M,Gv */
14083 if (sz != 4)
14084 goto decode_failure;
14085 modrm = getIByte(delta);
14086 if (epartIsReg(modrm))
14087 goto decode_failure;
14088 /* NOTE! this is the one place where a segment override prefix
14089 has no effect on the address calculation. Therefore we pass
14090 zero instead of sorb here. */
14091 addr = disAMode ( &alen, /*sorb*/ 0, delta, dis_buf );
14092 delta += alen;
14093 putIReg(sz, gregOfRM(modrm), mkexpr(addr));
14094 DIP("lea%c %s, %s\n", nameISize(sz), dis_buf,
14095 nameIReg(sz,gregOfRM(modrm)));
14096 break;
14097
14098 case 0x8C: /* MOV Sw,Ew -- MOV from a SEGMENT REGISTER */
14099 delta = dis_mov_Sw_Ew(sorb, sz, delta);
14100 break;
14101
14102 case 0x8E: /* MOV Ew,Sw -- MOV to a SEGMENT REGISTER */
14103 delta = dis_mov_Ew_Sw(sorb, delta);
14104 break;
14105
14106 case 0xA0: /* MOV Ob,AL */
14107 sz = 1;
14108 /* Fall through ... */
14109 case 0xA1: /* MOV Ov,eAX */
14110 d32 = getUDisp32(delta); delta += 4;
14111 ty = szToITy(sz);
14112 addr = newTemp(Ity_I32);
14113 assign( addr, handleSegOverride(sorb, mkU32(d32)) );
14114 putIReg(sz, R_EAX, loadLE(ty, mkexpr(addr)));
14115 DIP("mov%c %s0x%x, %s\n", nameISize(sz), sorbTxt(sorb),
14116 d32, nameIReg(sz,R_EAX));
14117 break;
14118
14119 case 0xA2: /* MOV Ob,AL */
14120 sz = 1;
14121 /* Fall through ... */
14122 case 0xA3: /* MOV eAX,Ov */
14123 d32 = getUDisp32(delta); delta += 4;
14124 ty = szToITy(sz);
14125 addr = newTemp(Ity_I32);
14126 assign( addr, handleSegOverride(sorb, mkU32(d32)) );
14127 storeLE( mkexpr(addr), getIReg(sz,R_EAX) );
14128 DIP("mov%c %s, %s0x%x\n", nameISize(sz), nameIReg(sz,R_EAX),
14129 sorbTxt(sorb), d32);
14130 break;
14131
14132 case 0xB0: /* MOV imm,AL */
14133 case 0xB1: /* MOV imm,CL */
14134 case 0xB2: /* MOV imm,DL */
14135 case 0xB3: /* MOV imm,BL */
14136 case 0xB4: /* MOV imm,AH */
14137 case 0xB5: /* MOV imm,CH */
14138 case 0xB6: /* MOV imm,DH */
14139 case 0xB7: /* MOV imm,BH */
14140 d32 = getIByte(delta); delta += 1;
14141 putIReg(1, opc-0xB0, mkU8(d32));
14142 DIP("movb $0x%x,%s\n", d32, nameIReg(1,opc-0xB0));
14143 break;
14144
14145 case 0xB8: /* MOV imm,eAX */
14146 case 0xB9: /* MOV imm,eCX */
14147 case 0xBA: /* MOV imm,eDX */
14148 case 0xBB: /* MOV imm,eBX */
14149 case 0xBC: /* MOV imm,eSP */
14150 case 0xBD: /* MOV imm,eBP */
14151 case 0xBE: /* MOV imm,eSI */
14152 case 0xBF: /* MOV imm,eDI */
14153 d32 = getUDisp(sz,delta); delta += sz;
14154 putIReg(sz, opc-0xB8, mkU(szToITy(sz), d32));
14155 DIP("mov%c $0x%x,%s\n", nameISize(sz), d32, nameIReg(sz,opc-0xB8));
14156 break;
14157
14158 case 0xC6: /* C6 /0 = MOV Ib,Eb */
14159 sz = 1;
14160 goto maybe_do_Mov_I_E;
14161 case 0xC7: /* C7 /0 = MOV Iv,Ev */
14162 goto maybe_do_Mov_I_E;
14163
14164 maybe_do_Mov_I_E:
14165 modrm = getIByte(delta);
14166 if (gregOfRM(modrm) == 0) {
14167 if (epartIsReg(modrm)) {
14168 delta++; /* mod/rm byte */
14169 d32 = getUDisp(sz,delta); delta += sz;
14170 putIReg(sz, eregOfRM(modrm), mkU(szToITy(sz), d32));
14171 DIP("mov%c $0x%x, %s\n", nameISize(sz), d32,
14172 nameIReg(sz,eregOfRM(modrm)));
14173 } else {
14174 addr = disAMode ( &alen, sorb, delta, dis_buf );
14175 delta += alen;
14176 d32 = getUDisp(sz,delta); delta += sz;
14177 storeLE(mkexpr(addr), mkU(szToITy(sz), d32));
14178 DIP("mov%c $0x%x, %s\n", nameISize(sz), d32, dis_buf);
14179 }
14180 break;
14181 }
14182 goto decode_failure;
14183
14184 /* ------------------------ opl imm, A ----------------- */
14185
14186 case 0x04: /* ADD Ib, AL */
14187 delta = dis_op_imm_A( 1, False, Iop_Add8, True, delta, "add" );
14188 break;
14189 case 0x05: /* ADD Iv, eAX */
14190 delta = dis_op_imm_A( sz, False, Iop_Add8, True, delta, "add" );
14191 break;
14192
14193 case 0x0C: /* OR Ib, AL */
14194 delta = dis_op_imm_A( 1, False, Iop_Or8, True, delta, "or" );
14195 break;
14196 case 0x0D: /* OR Iv, eAX */
14197 delta = dis_op_imm_A( sz, False, Iop_Or8, True, delta, "or" );
14198 break;
14199
14200 case 0x14: /* ADC Ib, AL */
14201 delta = dis_op_imm_A( 1, True, Iop_Add8, True, delta, "adc" );
14202 break;
14203 case 0x15: /* ADC Iv, eAX */
14204 delta = dis_op_imm_A( sz, True, Iop_Add8, True, delta, "adc" );
14205 break;
14206
14207 case 0x1C: /* SBB Ib, AL */
14208 delta = dis_op_imm_A( 1, True, Iop_Sub8, True, delta, "sbb" );
14209 break;
14210 case 0x1D: /* SBB Iv, eAX */
14211 delta = dis_op_imm_A( sz, True, Iop_Sub8, True, delta, "sbb" );
14212 break;
14213
14214 case 0x24: /* AND Ib, AL */
14215 delta = dis_op_imm_A( 1, False, Iop_And8, True, delta, "and" );
14216 break;
14217 case 0x25: /* AND Iv, eAX */
14218 delta = dis_op_imm_A( sz, False, Iop_And8, True, delta, "and" );
14219 break;
14220
14221 case 0x2C: /* SUB Ib, AL */
14222 delta = dis_op_imm_A( 1, False, Iop_Sub8, True, delta, "sub" );
14223 break;
14224 case 0x2D: /* SUB Iv, eAX */
14225 delta = dis_op_imm_A( sz, False, Iop_Sub8, True, delta, "sub" );
14226 break;
14227
14228 case 0x34: /* XOR Ib, AL */
14229 delta = dis_op_imm_A( 1, False, Iop_Xor8, True, delta, "xor" );
14230 break;
14231 case 0x35: /* XOR Iv, eAX */
14232 delta = dis_op_imm_A( sz, False, Iop_Xor8, True, delta, "xor" );
14233 break;
14234
14235 case 0x3C: /* CMP Ib, AL */
14236 delta = dis_op_imm_A( 1, False, Iop_Sub8, False, delta, "cmp" );
14237 break;
14238 case 0x3D: /* CMP Iv, eAX */
14239 delta = dis_op_imm_A( sz, False, Iop_Sub8, False, delta, "cmp" );
14240 break;
14241
14242 case 0xA8: /* TEST Ib, AL */
14243 delta = dis_op_imm_A( 1, False, Iop_And8, False, delta, "test" );
14244 break;
14245 case 0xA9: /* TEST Iv, eAX */
14246 delta = dis_op_imm_A( sz, False, Iop_And8, False, delta, "test" );
14247 break;
14248
14249 /* ------------------------ opl Ev, Gv ----------------- */
14250
14251 case 0x02: /* ADD Eb,Gb */
14252 delta = dis_op2_E_G ( sorb, False, Iop_Add8, True, 1, delta, "add" );
14253 break;
14254 case 0x03: /* ADD Ev,Gv */
14255 delta = dis_op2_E_G ( sorb, False, Iop_Add8, True, sz, delta, "add" );
14256 break;
14257
14258 case 0x0A: /* OR Eb,Gb */
14259 delta = dis_op2_E_G ( sorb, False, Iop_Or8, True, 1, delta, "or" );
14260 break;
14261 case 0x0B: /* OR Ev,Gv */
14262 delta = dis_op2_E_G ( sorb, False, Iop_Or8, True, sz, delta, "or" );
14263 break;
14264
14265 case 0x12: /* ADC Eb,Gb */
14266 delta = dis_op2_E_G ( sorb, True, Iop_Add8, True, 1, delta, "adc" );
14267 break;
14268 case 0x13: /* ADC Ev,Gv */
14269 delta = dis_op2_E_G ( sorb, True, Iop_Add8, True, sz, delta, "adc" );
14270 break;
14271
14272 case 0x1A: /* SBB Eb,Gb */
14273 delta = dis_op2_E_G ( sorb, True, Iop_Sub8, True, 1, delta, "sbb" );
14274 break;
14275 case 0x1B: /* SBB Ev,Gv */
14276 delta = dis_op2_E_G ( sorb, True, Iop_Sub8, True, sz, delta, "sbb" );
14277 break;
14278
14279 case 0x22: /* AND Eb,Gb */
14280 delta = dis_op2_E_G ( sorb, False, Iop_And8, True, 1, delta, "and" );
14281 break;
14282 case 0x23: /* AND Ev,Gv */
14283 delta = dis_op2_E_G ( sorb, False, Iop_And8, True, sz, delta, "and" );
14284 break;
14285
14286 case 0x2A: /* SUB Eb,Gb */
14287 delta = dis_op2_E_G ( sorb, False, Iop_Sub8, True, 1, delta, "sub" );
14288 break;
14289 case 0x2B: /* SUB Ev,Gv */
14290 delta = dis_op2_E_G ( sorb, False, Iop_Sub8, True, sz, delta, "sub" );
14291 break;
14292
14293 case 0x32: /* XOR Eb,Gb */
14294 delta = dis_op2_E_G ( sorb, False, Iop_Xor8, True, 1, delta, "xor" );
14295 break;
14296 case 0x33: /* XOR Ev,Gv */
14297 delta = dis_op2_E_G ( sorb, False, Iop_Xor8, True, sz, delta, "xor" );
14298 break;
14299
14300 case 0x3A: /* CMP Eb,Gb */
14301 delta = dis_op2_E_G ( sorb, False, Iop_Sub8, False, 1, delta, "cmp" );
14302 break;
14303 case 0x3B: /* CMP Ev,Gv */
14304 delta = dis_op2_E_G ( sorb, False, Iop_Sub8, False, sz, delta, "cmp" );
14305 break;
14306
14307 case 0x84: /* TEST Eb,Gb */
14308 delta = dis_op2_E_G ( sorb, False, Iop_And8, False, 1, delta, "test" );
14309 break;
14310 case 0x85: /* TEST Ev,Gv */
14311 delta = dis_op2_E_G ( sorb, False, Iop_And8, False, sz, delta, "test" );
14312 break;
14313
14314 /* ------------------------ opl Gv, Ev ----------------- */
14315
14316 case 0x00: /* ADD Gb,Eb */
14317 delta = dis_op2_G_E ( sorb, pfx_lock, False,
14318 Iop_Add8, True, 1, delta, "add" );
14319 break;
14320 case 0x01: /* ADD Gv,Ev */
14321 delta = dis_op2_G_E ( sorb, pfx_lock, False,
14322 Iop_Add8, True, sz, delta, "add" );
14323 break;
14324
14325 case 0x08: /* OR Gb,Eb */
14326 delta = dis_op2_G_E ( sorb, pfx_lock, False,
14327 Iop_Or8, True, 1, delta, "or" );
14328 break;
14329 case 0x09: /* OR Gv,Ev */
14330 delta = dis_op2_G_E ( sorb, pfx_lock, False,
14331 Iop_Or8, True, sz, delta, "or" );
14332 break;
14333
14334 case 0x10: /* ADC Gb,Eb */
14335 delta = dis_op2_G_E ( sorb, pfx_lock, True,
14336 Iop_Add8, True, 1, delta, "adc" );
14337 break;
14338 case 0x11: /* ADC Gv,Ev */
14339 delta = dis_op2_G_E ( sorb, pfx_lock, True,
14340 Iop_Add8, True, sz, delta, "adc" );
14341 break;
14342
14343 case 0x18: /* SBB Gb,Eb */
14344 delta = dis_op2_G_E ( sorb, pfx_lock, True,
14345 Iop_Sub8, True, 1, delta, "sbb" );
14346 break;
14347 case 0x19: /* SBB Gv,Ev */
14348 delta = dis_op2_G_E ( sorb, pfx_lock, True,
14349 Iop_Sub8, True, sz, delta, "sbb" );
14350 break;
14351
14352 case 0x20: /* AND Gb,Eb */
14353 delta = dis_op2_G_E ( sorb, pfx_lock, False,
14354 Iop_And8, True, 1, delta, "and" );
14355 break;
14356 case 0x21: /* AND Gv,Ev */
14357 delta = dis_op2_G_E ( sorb, pfx_lock, False,
14358 Iop_And8, True, sz, delta, "and" );
14359 break;
14360
14361 case 0x28: /* SUB Gb,Eb */
14362 delta = dis_op2_G_E ( sorb, pfx_lock, False,
14363 Iop_Sub8, True, 1, delta, "sub" );
14364 break;
14365 case 0x29: /* SUB Gv,Ev */
14366 delta = dis_op2_G_E ( sorb, pfx_lock, False,
14367 Iop_Sub8, True, sz, delta, "sub" );
14368 break;
14369
14370 case 0x30: /* XOR Gb,Eb */
14371 delta = dis_op2_G_E ( sorb, pfx_lock, False,
14372 Iop_Xor8, True, 1, delta, "xor" );
14373 break;
14374 case 0x31: /* XOR Gv,Ev */
14375 delta = dis_op2_G_E ( sorb, pfx_lock, False,
14376 Iop_Xor8, True, sz, delta, "xor" );
14377 break;
14378
14379 case 0x38: /* CMP Gb,Eb */
14380 delta = dis_op2_G_E ( sorb, pfx_lock, False,
14381 Iop_Sub8, False, 1, delta, "cmp" );
14382 break;
14383 case 0x39: /* CMP Gv,Ev */
14384 delta = dis_op2_G_E ( sorb, pfx_lock, False,
14385 Iop_Sub8, False, sz, delta, "cmp" );
14386 break;
14387
14388 /* ------------------------ POP ------------------------ */
14389
14390 case 0x58: /* POP eAX */
14391 case 0x59: /* POP eCX */
14392 case 0x5A: /* POP eDX */
14393 case 0x5B: /* POP eBX */
14394 case 0x5D: /* POP eBP */
14395 case 0x5E: /* POP eSI */
14396 case 0x5F: /* POP eDI */
14397 case 0x5C: /* POP eSP */
14398 vassert(sz == 2 || sz == 4);
14399 t1 = newTemp(szToITy(sz)); t2 = newTemp(Ity_I32);
14400 assign(t2, getIReg(4, R_ESP));
14401 assign(t1, loadLE(szToITy(sz),mkexpr(t2)));
14402 putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(t2), mkU32(sz)));
14403 putIReg(sz, opc-0x58, mkexpr(t1));
14404 DIP("pop%c %s\n", nameISize(sz), nameIReg(sz,opc-0x58));
14405 break;
14406
14407 case 0x9D: /* POPF */
14408 vassert(sz == 2 || sz == 4);
14409 t1 = newTemp(Ity_I32); t2 = newTemp(Ity_I32);
14410 assign(t2, getIReg(4, R_ESP));
14411 assign(t1, widenUto32(loadLE(szToITy(sz),mkexpr(t2))));
14412 putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(t2), mkU32(sz)));
14413
14414 /* Generate IR to set %EFLAGS{O,S,Z,A,C,P,D,ID,AC} from the
14415 value in t1. */
14416 set_EFLAGS_from_value( t1, True/*emit_AC_emwarn*/,
14417 ((Addr32)guest_EIP_bbstart)+delta );
14418
14419 DIP("popf%c\n", nameISize(sz));
14420 break;
14421
14422 case 0x61: /* POPA */
14423 /* This is almost certainly wrong for sz==2. So ... */
14424 if (sz != 4) goto decode_failure;
14425
14426 /* t5 is the old %ESP value. */
14427 t5 = newTemp(Ity_I32);
14428 assign( t5, getIReg(4, R_ESP) );
14429
14430 /* Reload all the registers, except %esp. */
14431 putIReg(4,R_EAX, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32(28)) ));
14432 putIReg(4,R_ECX, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32(24)) ));
14433 putIReg(4,R_EDX, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32(20)) ));
14434 putIReg(4,R_EBX, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32(16)) ));
14435 /* ignore saved %ESP */
14436 putIReg(4,R_EBP, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32( 8)) ));
14437 putIReg(4,R_ESI, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32( 4)) ));
14438 putIReg(4,R_EDI, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32( 0)) ));
14439
14440 /* and move %ESP back up */
14441 putIReg( 4, R_ESP, binop(Iop_Add32, mkexpr(t5), mkU32(8*4)) );
14442
14443 DIP("popa%c\n", nameISize(sz));
14444 break;
14445
14446 case 0x8F: /* POPL/POPW m32 */
14447 { Int len;
14448 UChar rm = getIByte(delta);
14449
14450 /* make sure this instruction is correct POP */
14451 if (epartIsReg(rm) || gregOfRM(rm) != 0)
14452 goto decode_failure;
14453 /* and has correct size */
14454 if (sz != 4 && sz != 2)
14455 goto decode_failure;
14456 ty = szToITy(sz);
14457
14458 t1 = newTemp(Ity_I32); /* stack address */
14459 t3 = newTemp(ty); /* data */
14460 /* set t1 to ESP: t1 = ESP */
14461 assign( t1, getIReg(4, R_ESP) );
14462 /* load M[ESP] to virtual register t3: t3 = M[t1] */
14463 assign( t3, loadLE(ty, mkexpr(t1)) );
14464
14465 /* increase ESP; must be done before the STORE. Intel manual says:
14466 If the ESP register is used as a base register for addressing
14467 a destination operand in memory, the POP instruction computes
14468 the effective address of the operand after it increments the
14469 ESP register.
14470 */
14471 putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(t1), mkU32(sz)) );
14472
14473 /* resolve MODR/M */
14474 addr = disAMode ( &len, sorb, delta, dis_buf);
14475 storeLE( mkexpr(addr), mkexpr(t3) );
14476
14477 DIP("pop%c %s\n", sz==2 ? 'w' : 'l', dis_buf);
14478
14479 delta += len;
14480 break;
14481 }
14482
14483 case 0x1F: /* POP %DS */
14484 dis_pop_segreg( R_DS, sz ); break;
14485 case 0x07: /* POP %ES */
14486 dis_pop_segreg( R_ES, sz ); break;
14487 case 0x17: /* POP %SS */
14488 dis_pop_segreg( R_SS, sz ); break;
14489
14490 /* ------------------------ PUSH ----------------------- */
14491
14492 case 0x50: /* PUSH eAX */
14493 case 0x51: /* PUSH eCX */
14494 case 0x52: /* PUSH eDX */
14495 case 0x53: /* PUSH eBX */
14496 case 0x55: /* PUSH eBP */
14497 case 0x56: /* PUSH eSI */
14498 case 0x57: /* PUSH eDI */
14499 case 0x54: /* PUSH eSP */
14500 /* This is the Right Way, in that the value to be pushed is
14501 established before %esp is changed, so that pushl %esp
14502 correctly pushes the old value. */
14503 vassert(sz == 2 || sz == 4);
14504 ty = sz==2 ? Ity_I16 : Ity_I32;
14505 t1 = newTemp(ty); t2 = newTemp(Ity_I32);
14506 assign(t1, getIReg(sz, opc-0x50));
14507 assign(t2, binop(Iop_Sub32, getIReg(4, R_ESP), mkU32(sz)));
14508 putIReg(4, R_ESP, mkexpr(t2) );
14509 storeLE(mkexpr(t2),mkexpr(t1));
14510 DIP("push%c %s\n", nameISize(sz), nameIReg(sz,opc-0x50));
14511 break;
14512
14513
14514 case 0x68: /* PUSH Iv */
14515 d32 = getUDisp(sz,delta); delta += sz;
14516 goto do_push_I;
14517 case 0x6A: /* PUSH Ib, sign-extended to sz */
14518 d32 = getSDisp8(delta); delta += 1;
14519 goto do_push_I;
14520 do_push_I:
14521 ty = szToITy(sz);
14522 t1 = newTemp(Ity_I32); t2 = newTemp(ty);
14523 assign( t1, binop(Iop_Sub32,getIReg(4,R_ESP),mkU32(sz)) );
14524 putIReg(4, R_ESP, mkexpr(t1) );
14525 /* stop mkU16 asserting if d32 is a negative 16-bit number
14526 (bug #132813) */
14527 if (ty == Ity_I16)
14528 d32 &= 0xFFFF;
14529 storeLE( mkexpr(t1), mkU(ty,d32) );
14530 DIP("push%c $0x%x\n", nameISize(sz), d32);
14531 break;
14532
14533 case 0x9C: /* PUSHF */ {
14534 vassert(sz == 2 || sz == 4);
14535
14536 t1 = newTemp(Ity_I32);
14537 assign( t1, binop(Iop_Sub32,getIReg(4,R_ESP),mkU32(sz)) );
14538 putIReg(4, R_ESP, mkexpr(t1) );
14539
14540 /* Calculate OSZACP, and patch in fixed fields as per
14541 Intel docs.
14542 - bit 1 is always 1
14543 - bit 9 is Interrupt Enable (should always be 1 in user mode?)
14544 */
14545 t2 = newTemp(Ity_I32);
14546 assign( t2, binop(Iop_Or32,
14547 mk_x86g_calculate_eflags_all(),
14548 mkU32( (1<<1)|(1<<9) ) ));
14549
14550 /* Patch in the D flag. This can simply be a copy of bit 10 of
14551 baseBlock[OFFB_DFLAG]. */
14552 t3 = newTemp(Ity_I32);
14553 assign( t3, binop(Iop_Or32,
14554 mkexpr(t2),
14555 binop(Iop_And32,
14556 IRExpr_Get(OFFB_DFLAG,Ity_I32),
14557 mkU32(1<<10)))
14558 );
14559
14560 /* And patch in the ID flag. */
14561 t4 = newTemp(Ity_I32);
14562 assign( t4, binop(Iop_Or32,
14563 mkexpr(t3),
14564 binop(Iop_And32,
14565 binop(Iop_Shl32, IRExpr_Get(OFFB_IDFLAG,Ity_I32),
14566 mkU8(21)),
14567 mkU32(1<<21)))
14568 );
14569
14570 /* And patch in the AC flag. */
14571 t5 = newTemp(Ity_I32);
14572 assign( t5, binop(Iop_Or32,
14573 mkexpr(t4),
14574 binop(Iop_And32,
14575 binop(Iop_Shl32, IRExpr_Get(OFFB_ACFLAG,Ity_I32),
14576 mkU8(18)),
14577 mkU32(1<<18)))
14578 );
14579
14580 /* if sz==2, the stored value needs to be narrowed. */
14581 if (sz == 2)
14582 storeLE( mkexpr(t1), unop(Iop_32to16,mkexpr(t5)) );
14583 else
14584 storeLE( mkexpr(t1), mkexpr(t5) );
14585
14586 DIP("pushf%c\n", nameISize(sz));
14587 break;
14588 }
14589
14590 case 0x60: /* PUSHA */
14591 /* This is almost certainly wrong for sz==2. So ... */
14592 if (sz != 4) goto decode_failure;
14593
14594 /* This is the Right Way, in that the value to be pushed is
14595 established before %esp is changed, so that pusha
14596 correctly pushes the old %esp value. New value of %esp is
14597 pushed at start. */
14598 /* t0 is the %ESP value we're going to push. */
14599 t0 = newTemp(Ity_I32);
14600 assign( t0, getIReg(4, R_ESP) );
14601
14602 /* t5 will be the new %ESP value. */
14603 t5 = newTemp(Ity_I32);
14604 assign( t5, binop(Iop_Sub32, mkexpr(t0), mkU32(8*4)) );
14605
14606 /* Update guest state before prodding memory. */
14607 putIReg(4, R_ESP, mkexpr(t5));
14608
14609 /* Dump all the registers. */
14610 storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(28)), getIReg(4,R_EAX) );
14611 storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(24)), getIReg(4,R_ECX) );
14612 storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(20)), getIReg(4,R_EDX) );
14613 storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(16)), getIReg(4,R_EBX) );
14614 storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(12)), mkexpr(t0) /*esp*/);
14615 storeLE( binop(Iop_Add32,mkexpr(t5),mkU32( 8)), getIReg(4,R_EBP) );
14616 storeLE( binop(Iop_Add32,mkexpr(t5),mkU32( 4)), getIReg(4,R_ESI) );
14617 storeLE( binop(Iop_Add32,mkexpr(t5),mkU32( 0)), getIReg(4,R_EDI) );
14618
14619 DIP("pusha%c\n", nameISize(sz));
14620 break;
14621
14622 case 0x0E: /* PUSH %CS */
14623 dis_push_segreg( R_CS, sz ); break;
14624 case 0x1E: /* PUSH %DS */
14625 dis_push_segreg( R_DS, sz ); break;
14626 case 0x06: /* PUSH %ES */
14627 dis_push_segreg( R_ES, sz ); break;
14628 case 0x16: /* PUSH %SS */
14629 dis_push_segreg( R_SS, sz ); break;
14630
14631 /* ------------------------ SCAS et al ----------------- */
14632
14633 case 0xA4: /* MOVS, no REP prefix */
14634 case 0xA5:
14635 if (sorb != 0)
14636 goto decode_failure; /* else dis_string_op asserts */
14637 dis_string_op( dis_MOVS, ( opc == 0xA4 ? 1 : sz ), "movs", sorb );
14638 break;
14639
14640 case 0xA6: /* CMPSb, no REP prefix */
14641 case 0xA7:
14642 if (sorb != 0)
14643 goto decode_failure; /* else dis_string_op asserts */
14644 dis_string_op( dis_CMPS, ( opc == 0xA6 ? 1 : sz ), "cmps", sorb );
14645 break;
14646
14647 case 0xAA: /* STOS, no REP prefix */
14648 case 0xAB:
14649 if (sorb != 0)
14650 goto decode_failure; /* else dis_string_op asserts */
14651 dis_string_op( dis_STOS, ( opc == 0xAA ? 1 : sz ), "stos", sorb );
14652 break;
14653
14654 case 0xAC: /* LODS, no REP prefix */
14655 case 0xAD:
14656 if (sorb != 0)
14657 goto decode_failure; /* else dis_string_op asserts */
14658 dis_string_op( dis_LODS, ( opc == 0xAC ? 1 : sz ), "lods", sorb );
14659 break;
14660
14661 case 0xAE: /* SCAS, no REP prefix */
14662 case 0xAF:
14663 if (sorb != 0)
14664 goto decode_failure; /* else dis_string_op asserts */
14665 dis_string_op( dis_SCAS, ( opc == 0xAE ? 1 : sz ), "scas", sorb );
14666 break;
14667
14668
14669 case 0xFC: /* CLD */
14670 stmt( IRStmt_Put( OFFB_DFLAG, mkU32(1)) );
14671 DIP("cld\n");
14672 break;
14673
14674 case 0xFD: /* STD */
14675 stmt( IRStmt_Put( OFFB_DFLAG, mkU32(0xFFFFFFFF)) );
14676 DIP("std\n");
14677 break;
14678
14679 case 0xF8: /* CLC */
14680 case 0xF9: /* STC */
14681 case 0xF5: /* CMC */
14682 t0 = newTemp(Ity_I32);
14683 t1 = newTemp(Ity_I32);
14684 assign( t0, mk_x86g_calculate_eflags_all() );
14685 switch (opc) {
14686 case 0xF8:
14687 assign( t1, binop(Iop_And32, mkexpr(t0),
14688 mkU32(~X86G_CC_MASK_C)));
14689 DIP("clc\n");
14690 break;
14691 case 0xF9:
14692 assign( t1, binop(Iop_Or32, mkexpr(t0),
14693 mkU32(X86G_CC_MASK_C)));
14694 DIP("stc\n");
14695 break;
14696 case 0xF5:
14697 assign( t1, binop(Iop_Xor32, mkexpr(t0),
14698 mkU32(X86G_CC_MASK_C)));
14699 DIP("cmc\n");
14700 break;
14701 default:
14702 vpanic("disInstr(x86)(clc/stc/cmc)");
14703 }
14704 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
14705 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
14706 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t1) ));
14707 /* Set NDEP even though it isn't used. This makes redundant-PUT
14708 elimination of previous stores to this field work better. */
14709 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
14710 break;
14711
14712 case 0xD6: /* SALC */
14713 t0 = newTemp(Ity_I32);
14714 t1 = newTemp(Ity_I32);
14715 assign( t0, binop(Iop_And32,
14716 mk_x86g_calculate_eflags_c(),
14717 mkU32(1)) );
14718 assign( t1, binop(Iop_Sar32,
14719 binop(Iop_Shl32, mkexpr(t0), mkU8(31)),
14720 mkU8(31)) );
14721 putIReg(1, R_EAX, unop(Iop_32to8, mkexpr(t1)) );
14722 DIP("salc\n");
14723 break;
14724
14725 /* REPNE prefix insn */
14726 case 0xF2: {
14727 Addr32 eip_orig = guest_EIP_bbstart + delta_start;
14728 if (sorb != 0) goto decode_failure;
14729 abyte = getIByte(delta); delta++;
14730
14731 if (abyte == 0x66) { sz = 2; abyte = getIByte(delta); delta++; }
14732
14733 switch (abyte) {
14734 /* According to the Intel manual, "repne movs" should never occur, but
14735 * in practice it has happened, so allow for it here... */
14736 case 0xA4: sz = 1; /* REPNE MOVS<sz> */
14737 case 0xA5:
14738 dis_REP_op ( &dres, X86CondNZ, dis_MOVS, sz, eip_orig,
14739 guest_EIP_bbstart+delta, "repne movs" );
14740 break;
14741
14742 case 0xA6: sz = 1; /* REPNE CMP<sz> */
14743 case 0xA7:
14744 dis_REP_op ( &dres, X86CondNZ, dis_CMPS, sz, eip_orig,
14745 guest_EIP_bbstart+delta, "repne cmps" );
14746 break;
14747
14748 case 0xAA: sz = 1; /* REPNE STOS<sz> */
14749 case 0xAB:
14750 dis_REP_op ( &dres, X86CondNZ, dis_STOS, sz, eip_orig,
14751 guest_EIP_bbstart+delta, "repne stos" );
14752 break;
14753
14754 case 0xAE: sz = 1; /* REPNE SCAS<sz> */
14755 case 0xAF:
14756 dis_REP_op ( &dres, X86CondNZ, dis_SCAS, sz, eip_orig,
14757 guest_EIP_bbstart+delta, "repne scas" );
14758 break;
14759
14760 case 0xC3: /* REPNE RET, used to help out AMD cpus */
14761 /* identical to normal RET */
14762 dis_ret(&dres, 0);
14763 DIP("repne ret\n");
14764 break;
14765
14766 case 0x70: case 0x71: case 0x72: case 0x73:
14767 case 0x74: case 0x75: case 0x76: case 0x77:
14768 case 0x78: case 0x79: case 0x7A: case 0x7B:
14769 case 0x7C: case 0x7D: case 0x7E: case 0x7F:
14770 /* Jump instructions, same reason as RET */
14771 { Int jmpDelta;
14772 jmpDelta = (Int)getSDisp8(delta);
14773 vassert(-128 <= jmpDelta && jmpDelta < 128);
14774 d32 = (((Addr32)guest_EIP_bbstart)+delta+1) + jmpDelta;
14775 delta++;
14776 jcc_01( &dres, (X86Condcode)(abyte - 0x70),
14777 (Addr32)(guest_EIP_bbstart+delta), d32);
14778 vassert(dres.whatNext == Dis_StopHere);
14779 }
14780 DIP("repne j%s-8 0x%x\n", name_X86Condcode(abyte - 0x70), d32);
14781 break;
14782
14783 case 0xE9: /* Jv (jump, 16/32 offset) */
14784 vassert(sz == 4 || sz == 2);
14785 d32 = (((Addr32)guest_EIP_bbstart)+delta+sz) + getSDisp(sz,delta);
14786 delta += sz;
14787 jmp_lit(&dres, Ijk_Boring, d32);
14788 vassert(dres.whatNext == Dis_StopHere);
14789 DIP("repne jmp 0x%x\n", d32);
14790 break;
14791
14792 default:
14793 goto decode_failure;
14794 }
14795 break;
14796 }
14797
14798 /* REP/REPE prefix insn (for SCAS and CMPS, 0xF3 means REPE,
14799 for the rest, it means REP) */
14800 case 0xF3: {
14801 Addr32 eip_orig = guest_EIP_bbstart + delta_start;
14802 abyte = getIByte(delta); delta++;
14803
14804 if (abyte == 0x66) { sz = 2; abyte = getIByte(delta); delta++; }
14805
14806 if (sorb != 0 && abyte != 0x0F) goto decode_failure;
14807
14808 switch (abyte) {
14809 case 0x0F:
14810 switch (getIByte(delta)) {
14811 /* On older CPUs, TZCNT behaves the same as BSF. */
14812 case 0xBC: /* REP BSF Gv,Ev */
14813 delta = dis_bs_E_G ( sorb, sz, delta + 1, True );
14814 break;
14815 /* On older CPUs, LZCNT behaves the same as BSR. */
14816 case 0xBD: /* REP BSR Gv,Ev */
14817 delta = dis_bs_E_G ( sorb, sz, delta + 1, False );
14818 break;
14819 case 0x1e: /* ENDBR */
14820 delta++;
14821 switch (getIByte(delta++)) {
14822 case 0xfa:
14823 DIP("endbr64");
14824 break;
14825 case 0xfb:
14826 DIP("endbr32");
14827 break;
14828 default:
14829 goto decode_failure;
14830 }
14831 break;
14832 default:
14833 goto decode_failure;
14834 }
14835 break;
14836
14837 case 0xA4: sz = 1; /* REP MOVS<sz> */
14838 case 0xA5:
14839 dis_REP_op ( &dres, X86CondAlways, dis_MOVS, sz, eip_orig,
14840 guest_EIP_bbstart+delta, "rep movs" );
14841 break;
14842
14843 case 0xA6: sz = 1; /* REPE CMP<sz> */
14844 case 0xA7:
14845 dis_REP_op ( &dres, X86CondZ, dis_CMPS, sz, eip_orig,
14846 guest_EIP_bbstart+delta, "repe cmps" );
14847 break;
14848
14849 case 0xAA: sz = 1; /* REP STOS<sz> */
14850 case 0xAB:
14851 dis_REP_op ( &dres, X86CondAlways, dis_STOS, sz, eip_orig,
14852 guest_EIP_bbstart+delta, "rep stos" );
14853 break;
14854
14855 case 0xAC: sz = 1; /* REP LODS<sz> */
14856 case 0xAD:
14857 dis_REP_op ( &dres, X86CondAlways, dis_LODS, sz, eip_orig,
14858 guest_EIP_bbstart+delta, "rep lods" );
14859 break;
14860
14861 case 0xAE: sz = 1; /* REPE SCAS<sz> */
14862 case 0xAF:
14863 dis_REP_op ( &dres, X86CondZ, dis_SCAS, sz, eip_orig,
14864 guest_EIP_bbstart+delta, "repe scas" );
14865 break;
14866
14867 case 0x90: /* REP NOP (PAUSE) */
14868 /* a hint to the P4 re spin-wait loop */
14869 DIP("rep nop (P4 pause)\n");
14870 /* "observe" the hint. The Vex client needs to be careful not
14871 to cause very long delays as a result, though. */
14872 jmp_lit(&dres, Ijk_Yield, ((Addr32)guest_EIP_bbstart)+delta);
14873 vassert(dres.whatNext == Dis_StopHere);
14874 break;
14875
14876 case 0xC3: /* REP RET, used to help out AMD cpus */
14877 dis_ret(&dres, 0);
14878 DIP("rep ret\n");
14879 break;
14880
14881 default:
14882 goto decode_failure;
14883 }
14884 break;
14885 }
14886
14887 /* ------------------------ XCHG ----------------------- */
14888
14889 /* XCHG reg,mem automatically asserts LOCK# even without a LOCK
14890 prefix; hence it must be translated with an IRCAS (at least, the
14891 memory variant). */
14892 case 0x86: /* XCHG Gb,Eb */
14893 sz = 1;
14894 /* Fall through ... */
14895 case 0x87: /* XCHG Gv,Ev */
14896 modrm = getIByte(delta);
14897 ty = szToITy(sz);
14898 t1 = newTemp(ty); t2 = newTemp(ty);
14899 if (epartIsReg(modrm)) {
14900 assign(t1, getIReg(sz, eregOfRM(modrm)));
14901 assign(t2, getIReg(sz, gregOfRM(modrm)));
14902 putIReg(sz, gregOfRM(modrm), mkexpr(t1));
14903 putIReg(sz, eregOfRM(modrm), mkexpr(t2));
14904 delta++;
14905 DIP("xchg%c %s, %s\n",
14906 nameISize(sz), nameIReg(sz,gregOfRM(modrm)),
14907 nameIReg(sz,eregOfRM(modrm)));
14908 } else {
14909 *expect_CAS = True;
14910 addr = disAMode ( &alen, sorb, delta, dis_buf );
14911 assign( t1, loadLE(ty,mkexpr(addr)) );
14912 assign( t2, getIReg(sz,gregOfRM(modrm)) );
14913 casLE( mkexpr(addr),
14914 mkexpr(t1), mkexpr(t2), guest_EIP_curr_instr );
14915 putIReg( sz, gregOfRM(modrm), mkexpr(t1) );
14916 delta += alen;
14917 DIP("xchg%c %s, %s\n", nameISize(sz),
14918 nameIReg(sz,gregOfRM(modrm)), dis_buf);
14919 }
14920 break;
14921
14922 case 0x90: /* XCHG eAX,eAX */
14923 DIP("nop\n");
14924 break;
14925 case 0x91: /* XCHG eAX,eCX */
14926 case 0x92: /* XCHG eAX,eDX */
14927 case 0x93: /* XCHG eAX,eBX */
14928 case 0x94: /* XCHG eAX,eSP */
14929 case 0x95: /* XCHG eAX,eBP */
14930 case 0x96: /* XCHG eAX,eSI */
14931 case 0x97: /* XCHG eAX,eDI */
14932 codegen_xchg_eAX_Reg ( sz, opc - 0x90 );
14933 break;
14934
14935 /* ------------------------ XLAT ----------------------- */
14936
14937 case 0xD7: /* XLAT */
14938 if (sz != 4) goto decode_failure; /* sz == 2 is also allowed (0x66) */
14939 putIReg(
14940 1,
14941 R_EAX/*AL*/,
14942 loadLE(Ity_I8,
14943 handleSegOverride(
14944 sorb,
14945 binop(Iop_Add32,
14946 getIReg(4, R_EBX),
14947 unop(Iop_8Uto32, getIReg(1, R_EAX/*AL*/))))));
14948
14949 DIP("xlat%c [ebx]\n", nameISize(sz));
14950 break;
14951
14952 /* ------------------------ IN / OUT ----------------------- */
14953
14954 case 0xE4: /* IN imm8, AL */
14955 sz = 1;
14956 t1 = newTemp(Ity_I32);
14957 abyte = getIByte(delta); delta++;
14958 assign(t1, mkU32( abyte & 0xFF ));
14959 DIP("in%c $%d,%s\n", nameISize(sz), abyte, nameIReg(sz,R_EAX));
14960 goto do_IN;
14961 case 0xE5: /* IN imm8, eAX */
14962 vassert(sz == 2 || sz == 4);
14963 t1 = newTemp(Ity_I32);
14964 abyte = getIByte(delta); delta++;
14965 assign(t1, mkU32( abyte & 0xFF ));
14966 DIP("in%c $%d,%s\n", nameISize(sz), abyte, nameIReg(sz,R_EAX));
14967 goto do_IN;
14968 case 0xEC: /* IN %DX, AL */
14969 sz = 1;
14970 t1 = newTemp(Ity_I32);
14971 assign(t1, unop(Iop_16Uto32, getIReg(2, R_EDX)));
14972 DIP("in%c %s,%s\n", nameISize(sz), nameIReg(2,R_EDX),
14973 nameIReg(sz,R_EAX));
14974 goto do_IN;
14975 case 0xED: /* IN %DX, eAX */
14976 vassert(sz == 2 || sz == 4);
14977 t1 = newTemp(Ity_I32);
14978 assign(t1, unop(Iop_16Uto32, getIReg(2, R_EDX)));
14979 DIP("in%c %s,%s\n", nameISize(sz), nameIReg(2,R_EDX),
14980 nameIReg(sz,R_EAX));
14981 goto do_IN;
14982 do_IN: {
14983 /* At this point, sz indicates the width, and t1 is a 32-bit
14984 value giving port number. */
14985 IRDirty* d;
14986 vassert(sz == 1 || sz == 2 || sz == 4);
14987 ty = szToITy(sz);
14988 t2 = newTemp(Ity_I32);
14989 d = unsafeIRDirty_1_N(
14990 t2,
14991 0/*regparms*/,
14992 "x86g_dirtyhelper_IN",
14993 &x86g_dirtyhelper_IN,
14994 mkIRExprVec_2( mkexpr(t1), mkU32(sz) )
14995 );
14996 /* do the call, dumping the result in t2. */
14997 stmt( IRStmt_Dirty(d) );
14998 putIReg(sz, R_EAX, narrowTo( ty, mkexpr(t2) ) );
14999 break;
15000 }
15001
15002 case 0xE6: /* OUT AL, imm8 */
15003 sz = 1;
15004 t1 = newTemp(Ity_I32);
15005 abyte = getIByte(delta); delta++;
15006 assign( t1, mkU32( abyte & 0xFF ) );
15007 DIP("out%c %s,$%d\n", nameISize(sz), nameIReg(sz,R_EAX), abyte);
15008 goto do_OUT;
15009 case 0xE7: /* OUT eAX, imm8 */
15010 vassert(sz == 2 || sz == 4);
15011 t1 = newTemp(Ity_I32);
15012 abyte = getIByte(delta); delta++;
15013 assign( t1, mkU32( abyte & 0xFF ) );
15014 DIP("out%c %s,$%d\n", nameISize(sz), nameIReg(sz,R_EAX), abyte);
15015 goto do_OUT;
15016 case 0xEE: /* OUT AL, %DX */
15017 sz = 1;
15018 t1 = newTemp(Ity_I32);
15019 assign( t1, unop(Iop_16Uto32, getIReg(2, R_EDX)) );
15020 DIP("out%c %s,%s\n", nameISize(sz), nameIReg(sz,R_EAX),
15021 nameIReg(2,R_EDX));
15022 goto do_OUT;
15023 case 0xEF: /* OUT eAX, %DX */
15024 vassert(sz == 2 || sz == 4);
15025 t1 = newTemp(Ity_I32);
15026 assign( t1, unop(Iop_16Uto32, getIReg(2, R_EDX)) );
15027 DIP("out%c %s,%s\n", nameISize(sz), nameIReg(sz,R_EAX),
15028 nameIReg(2,R_EDX));
15029 goto do_OUT;
15030 do_OUT: {
15031 /* At this point, sz indicates the width, and t1 is a 32-bit
15032 value giving port number. */
15033 IRDirty* d;
15034 vassert(sz == 1 || sz == 2 || sz == 4);
15035 ty = szToITy(sz);
15036 d = unsafeIRDirty_0_N(
15037 0/*regparms*/,
15038 "x86g_dirtyhelper_OUT",
15039 &x86g_dirtyhelper_OUT,
15040 mkIRExprVec_3( mkexpr(t1),
15041 widenUto32( getIReg(sz, R_EAX) ),
15042 mkU32(sz) )
15043 );
15044 stmt( IRStmt_Dirty(d) );
15045 break;
15046 }
15047
15048 /* ------------------------ (Grp1 extensions) ---------- */
15049
15050 case 0x82: /* Grp1 Ib,Eb too. Apparently this is the same as
15051 case 0x80, but only in 32-bit mode. */
15052 /* fallthru */
15053 case 0x80: /* Grp1 Ib,Eb */
15054 modrm = getIByte(delta);
15055 am_sz = lengthAMode(delta);
15056 sz = 1;
15057 d_sz = 1;
15058 d32 = getUChar(delta + am_sz);
15059 delta = dis_Grp1 ( sorb, pfx_lock, delta, modrm, am_sz, d_sz, sz, d32 );
15060 break;
15061
15062 case 0x81: /* Grp1 Iv,Ev */
15063 modrm = getIByte(delta);
15064 am_sz = lengthAMode(delta);
15065 d_sz = sz;
15066 d32 = getUDisp(d_sz, delta + am_sz);
15067 delta = dis_Grp1 ( sorb, pfx_lock, delta, modrm, am_sz, d_sz, sz, d32 );
15068 break;
15069
15070 case 0x83: /* Grp1 Ib,Ev */
15071 modrm = getIByte(delta);
15072 am_sz = lengthAMode(delta);
15073 d_sz = 1;
15074 d32 = getSDisp8(delta + am_sz);
15075 delta = dis_Grp1 ( sorb, pfx_lock, delta, modrm, am_sz, d_sz, sz, d32 );
15076 break;
15077
15078 /* ------------------------ (Grp2 extensions) ---------- */
15079
15080 case 0xC0: { /* Grp2 Ib,Eb */
15081 Bool decode_OK = True;
15082 modrm = getIByte(delta);
15083 am_sz = lengthAMode(delta);
15084 d_sz = 1;
15085 d32 = getUChar(delta + am_sz);
15086 sz = 1;
15087 delta = dis_Grp2 ( sorb, delta, modrm, am_sz, d_sz, sz,
15088 mkU8(d32 & 0xFF), NULL, &decode_OK );
15089 if (!decode_OK)
15090 goto decode_failure;
15091 break;
15092 }
15093 case 0xC1: { /* Grp2 Ib,Ev */
15094 Bool decode_OK = True;
15095 modrm = getIByte(delta);
15096 am_sz = lengthAMode(delta);
15097 d_sz = 1;
15098 d32 = getUChar(delta + am_sz);
15099 delta = dis_Grp2 ( sorb, delta, modrm, am_sz, d_sz, sz,
15100 mkU8(d32 & 0xFF), NULL, &decode_OK );
15101 if (!decode_OK)
15102 goto decode_failure;
15103 break;
15104 }
15105 case 0xD0: { /* Grp2 1,Eb */
15106 Bool decode_OK = True;
15107 modrm = getIByte(delta);
15108 am_sz = lengthAMode(delta);
15109 d_sz = 0;
15110 d32 = 1;
15111 sz = 1;
15112 delta = dis_Grp2 ( sorb, delta, modrm, am_sz, d_sz, sz,
15113 mkU8(d32), NULL, &decode_OK );
15114 if (!decode_OK)
15115 goto decode_failure;
15116 break;
15117 }
15118 case 0xD1: { /* Grp2 1,Ev */
15119 Bool decode_OK = True;
15120 modrm = getUChar(delta);
15121 am_sz = lengthAMode(delta);
15122 d_sz = 0;
15123 d32 = 1;
15124 delta = dis_Grp2 ( sorb, delta, modrm, am_sz, d_sz, sz,
15125 mkU8(d32), NULL, &decode_OK );
15126 if (!decode_OK)
15127 goto decode_failure;
15128 break;
15129 }
15130 case 0xD2: { /* Grp2 CL,Eb */
15131 Bool decode_OK = True;
15132 modrm = getUChar(delta);
15133 am_sz = lengthAMode(delta);
15134 d_sz = 0;
15135 sz = 1;
15136 delta = dis_Grp2 ( sorb, delta, modrm, am_sz, d_sz, sz,
15137 getIReg(1,R_ECX), "%cl", &decode_OK );
15138 if (!decode_OK)
15139 goto decode_failure;
15140 break;
15141 }
15142 case 0xD3: { /* Grp2 CL,Ev */
15143 Bool decode_OK = True;
15144 modrm = getIByte(delta);
15145 am_sz = lengthAMode(delta);
15146 d_sz = 0;
15147 delta = dis_Grp2 ( sorb, delta, modrm, am_sz, d_sz, sz,
15148 getIReg(1,R_ECX), "%cl", &decode_OK );
15149 if (!decode_OK)
15150 goto decode_failure;
15151 break;
15152 }
15153
15154 /* ------------------------ (Grp3 extensions) ---------- */
15155
15156 case 0xF6: { /* Grp3 Eb */
15157 Bool decode_OK = True;
15158 delta = dis_Grp3 ( sorb, pfx_lock, 1, delta, &decode_OK );
15159 if (!decode_OK)
15160 goto decode_failure;
15161 break;
15162 }
15163 case 0xF7: { /* Grp3 Ev */
15164 Bool decode_OK = True;
15165 delta = dis_Grp3 ( sorb, pfx_lock, sz, delta, &decode_OK );
15166 if (!decode_OK)
15167 goto decode_failure;
15168 break;
15169 }
15170
15171 /* ------------------------ (Grp4 extensions) ---------- */
15172
15173 case 0xFE: { /* Grp4 Eb */
15174 Bool decode_OK = True;
15175 delta = dis_Grp4 ( sorb, pfx_lock, delta, &decode_OK );
15176 if (!decode_OK)
15177 goto decode_failure;
15178 break;
15179 }
15180
15181 /* ------------------------ (Grp5 extensions) ---------- */
15182
15183 case 0xFF: { /* Grp5 Ev */
15184 Bool decode_OK = True;
15185 delta = dis_Grp5 ( sorb, pfx_lock, sz, delta, &dres, &decode_OK );
15186 if (!decode_OK)
15187 goto decode_failure;
15188 break;
15189 }
15190
15191 /* -------------------------- CLI/STI ------------------- */
15192 /* We treat them as NOP */
15193 case 0xFA: {
15194 DIP("cli\n");
15195 /* vvv fallthrough */
15196 }
15197 case 0xFB:
15198 DIP("sti\n");
15199 /* Treated as nop for now. could add actual behavior based on whatever or just an emnote */
15200 jmp_lit(&dres, Ijk_Privileged, ((Addr32)guest_EIP_bbstart) + delta);
15201 vassert(dres.whatNext == Dis_StopHere);
15202 break;
15203
15204 /* -------------------------- halt ---------------------- */
15205 case 0xF4: { /* hlt */
15206 jmp_lit(&dres, Ijk_SigTRAP, ((Addr32)guest_EIP_bbstart)+delta);
15207 vassert(dres.whatNext == Dis_StopHere);
15208 DIP("hlt\n");
15209 break;
15210 }
15211
15212 /* ------------------------ Escapes to 2-byte opcodes -- */
15213
15214 case 0x0F: {
15215 opc = getIByte(delta); delta++;
15216 switch (opc) {
15217
15218 case 0x20: { /* mov r32, crX (X \in \{0, 2, 3, 4}) */
15219 UChar rm = getIByte(delta++);
15220 /* We only support cr0 for the moment */
15221 if (gregOfRM(rm) != 0)
15222 goto decode_failure;
15223 putIReg(4, gregOfRM(rm), mkU32(archinfo->x86_cr0));
15224 break;
15225 }
15226 case 0x22: {/* mov crX (X \in \{0, 2, 3, 4}), r32 */
15227 UChar rm = getIByte(delta++);
15228 /* We only support cr0 for the moment */
15229 if (gregOfRM(rm) != 0)
15230 goto decode_failure;
15231 IRTemp value = newTemp(Ity_I32);
15232 assign(value, getIReg(4, eregOfRM(rm)));
15233 IRDirty* d = unsafeIRDirty_0_N (
15234 0/*regparms*/,
15235 "x86g_dirtyhelper_write_cr0",
15236 &x86g_dirtyhelper_write_cr0,
15237 mkIRExprVec_1( mkexpr(value) )
15238 );
15239 stmt( IRStmt_Dirty(d) );
15240 dres.whatNext = Dis_StopHere;
15241 dres.jk_StopHere = Ijk_Yield;
15242 stmt( IRStmt_Put( OFFB_EIP, mkU32(guest_EIP_bbstart + delta) ) );
15243 break;
15244 }
15245
15246 case 0x09: /* WBINVD */
15247 /* We treat it as NOP */
15248 break;
15249
15250 /* =-=-=-=-=-=-=-=-=- Grp8 =-=-=-=-=-=-=-=-=-=-=-= */
15251
15252 case 0xBA: { /* Grp8 Ib,Ev */
15253 Bool decode_OK = False;
15254 modrm = getUChar(delta);
15255 am_sz = lengthAMode(delta);
15256 d32 = getSDisp8(delta + am_sz);
15257 delta = dis_Grp8_Imm ( sorb, pfx_lock, delta, modrm,
15258 am_sz, sz, d32, &decode_OK );
15259 if (!decode_OK)
15260 goto decode_failure;
15261 break;
15262 }
15263
15264 /* =-=-=-=-=-=-=-=-=- BSF/BSR -=-=-=-=-=-=-=-=-=-= */
15265
15266 case 0xBC: /* BSF Gv,Ev */
15267 delta = dis_bs_E_G ( sorb, sz, delta, True );
15268 break;
15269 case 0xBD: /* BSR Gv,Ev */
15270 delta = dis_bs_E_G ( sorb, sz, delta, False );
15271 break;
15272
15273 /* =-=-=-=-=-=-=-=-=- BSWAP -=-=-=-=-=-=-=-=-=-=-= */
15274
15275 case 0xC8: /* BSWAP %eax */
15276 case 0xC9:
15277 case 0xCA:
15278 case 0xCB:
15279 case 0xCC:
15280 case 0xCD:
15281 case 0xCE:
15282 case 0xCF: /* BSWAP %edi */
15283 /* AFAICS from the Intel docs, this only exists at size 4. */
15284
15285 /* however, we are in the business of emulating stuff, and an
15286 * emulator has no business crashing when it sees an "undefined"
15287 * instruction. My CPU just clears the lowest two bytes of the
15288 * register so let's implement that. */
15289 if (sz == 2) {
15290 putIReg(2, opc-0xC8, mkU16(0));
15291 DIP("bswapw %s (UNDEFINED)\n", nameIReg(2, opc-0xC8));
15292 break;
15293 }
15294
15295 if (sz != 4) goto decode_failure;
15296
15297 t1 = newTemp(Ity_I32);
15298 assign( t1, getIReg(4, opc-0xC8) );
15299 t2 = math_BSWAP(t1, Ity_I32);
15300
15301 putIReg(4, opc-0xC8, mkexpr(t2));
15302 DIP("bswapl %s\n", nameIReg(4, opc-0xC8));
15303 break;
15304
15305 /* =-=-=-=-=-=-=-=-=- BT/BTS/BTR/BTC =-=-=-=-=-=-= */
15306
15307 case 0xA3: /* BT Gv,Ev */
15308 delta = dis_bt_G_E ( vbi, sorb, pfx_lock, sz, delta, BtOpNone );
15309 break;
15310 case 0xB3: /* BTR Gv,Ev */
15311 delta = dis_bt_G_E ( vbi, sorb, pfx_lock, sz, delta, BtOpReset );
15312 break;
15313 case 0xAB: /* BTS Gv,Ev */
15314 delta = dis_bt_G_E ( vbi, sorb, pfx_lock, sz, delta, BtOpSet );
15315 break;
15316 case 0xBB: /* BTC Gv,Ev */
15317 delta = dis_bt_G_E ( vbi, sorb, pfx_lock, sz, delta, BtOpComp );
15318 break;
15319
15320 /* =-=-=-=-=-=-=-=-=- CMOV =-=-=-=-=-=-=-=-=-=-=-= */
15321
15322 case 0x40:
15323 case 0x41:
15324 case 0x42: /* CMOVBb/CMOVNAEb (cmov below) */
15325 case 0x43: /* CMOVNBb/CMOVAEb (cmov not below) */
15326 case 0x44: /* CMOVZb/CMOVEb (cmov zero) */
15327 case 0x45: /* CMOVNZb/CMOVNEb (cmov not zero) */
15328 case 0x46: /* CMOVBEb/CMOVNAb (cmov below or equal) */
15329 case 0x47: /* CMOVNBEb/CMOVAb (cmov not below or equal) */
15330 case 0x48: /* CMOVSb (cmov negative) */
15331 case 0x49: /* CMOVSb (cmov not negative) */
15332 case 0x4A: /* CMOVP (cmov parity even) */
15333 case 0x4B: /* CMOVNP (cmov parity odd) */
15334 case 0x4C: /* CMOVLb/CMOVNGEb (cmov less) */
15335 case 0x4D: /* CMOVGEb/CMOVNLb (cmov greater or equal) */
15336 case 0x4E: /* CMOVLEb/CMOVNGb (cmov less or equal) */
15337 case 0x4F: /* CMOVGb/CMOVNLEb (cmov greater) */
15338 delta = dis_cmov_E_G(sorb, sz, (X86Condcode)(opc - 0x40), delta);
15339 break;
15340
15341 /* =-=-=-=-=-=-=-=-=- CMPXCHG -=-=-=-=-=-=-=-=-=-= */
15342
15343 case 0xB0: /* CMPXCHG Gb,Eb */
15344 delta = dis_cmpxchg_G_E ( sorb, pfx_lock, 1, delta );
15345 break;
15346 case 0xB1: /* CMPXCHG Gv,Ev */
15347 delta = dis_cmpxchg_G_E ( sorb, pfx_lock, sz, delta );
15348 break;
15349
15350 case 0xC7: { /* CMPXCHG8B Gv (0F C7 /1) */
15351 IRTemp expdHi = newTemp(Ity_I32);
15352 IRTemp expdLo = newTemp(Ity_I32);
15353 IRTemp dataHi = newTemp(Ity_I32);
15354 IRTemp dataLo = newTemp(Ity_I32);
15355 IRTemp oldHi = newTemp(Ity_I32);
15356 IRTemp oldLo = newTemp(Ity_I32);
15357 IRTemp flags_old = newTemp(Ity_I32);
15358 IRTemp flags_new = newTemp(Ity_I32);
15359 IRTemp success = newTemp(Ity_I1);
15360
15361 /* Translate this using a DCAS, even if there is no LOCK
15362 prefix. Life is too short to bother with generating two
15363 different translations for the with/without-LOCK-prefix
15364 cases. */
15365 *expect_CAS = True;
15366
15367 /* Decode, and generate address. */
15368 if (sz != 4) goto decode_failure;
15369 modrm = getIByte(delta);
15370 if (epartIsReg(modrm)) goto decode_failure;
15371 if (gregOfRM(modrm) != 1) goto decode_failure;
15372 addr = disAMode ( &alen, sorb, delta, dis_buf );
15373 delta += alen;
15374
15375 /* Get the expected and new values. */
15376 assign( expdHi, getIReg(4,R_EDX) );
15377 assign( expdLo, getIReg(4,R_EAX) );
15378 assign( dataHi, getIReg(4,R_ECX) );
15379 assign( dataLo, getIReg(4,R_EBX) );
15380
15381 /* Do the DCAS */
15382 stmt( IRStmt_CAS(
15383 mkIRCAS( oldHi, oldLo,
15384 Iend_LE, mkexpr(addr),
15385 mkexpr(expdHi), mkexpr(expdLo),
15386 mkexpr(dataHi), mkexpr(dataLo)
15387 )));
15388
15389 /* success when oldHi:oldLo == expdHi:expdLo */
15390 assign( success,
15391 binop(Iop_CasCmpEQ32,
15392 binop(Iop_Or32,
15393 binop(Iop_Xor32, mkexpr(oldHi), mkexpr(expdHi)),
15394 binop(Iop_Xor32, mkexpr(oldLo), mkexpr(expdLo))
15395 ),
15396 mkU32(0)
15397 ));
15398
15399 /* If the DCAS is successful, that is to say oldHi:oldLo ==
15400 expdHi:expdLo, then put expdHi:expdLo back in EDX:EAX,
15401 which is where they came from originally. Both the actual
15402 contents of these two regs, and any shadow values, are
15403 unchanged. If the DCAS fails then we're putting into
15404 EDX:EAX the value seen in memory. */
15405 putIReg(4, R_EDX,
15406 IRExpr_ITE( mkexpr(success),
15407 mkexpr(expdHi), mkexpr(oldHi)
15408 ));
15409 putIReg(4, R_EAX,
15410 IRExpr_ITE( mkexpr(success),
15411 mkexpr(expdLo), mkexpr(oldLo)
15412 ));
15413
15414 /* Copy the success bit into the Z flag and leave the others
15415 unchanged */
15416 assign( flags_old, widenUto32(mk_x86g_calculate_eflags_all()));
15417 assign(
15418 flags_new,
15419 binop(Iop_Or32,
15420 binop(Iop_And32, mkexpr(flags_old),
15421 mkU32(~X86G_CC_MASK_Z)),
15422 binop(Iop_Shl32,
15423 binop(Iop_And32,
15424 unop(Iop_1Uto32, mkexpr(success)), mkU32(1)),
15425 mkU8(X86G_CC_SHIFT_Z)) ));
15426
15427 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
15428 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(flags_new) ));
15429 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
15430 /* Set NDEP even though it isn't used. This makes
15431 redundant-PUT elimination of previous stores to this field
15432 work better. */
15433 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
15434
15435 /* Sheesh. Aren't you glad it was me and not you that had to
15436 write and validate all this grunge? */
15437
15438 DIP("cmpxchg8b %s\n", dis_buf);
15439 break;
15440 }
15441
15442 /* =-=-=-=-=-=-=-=-=- CPUID -=-=-=-=-=-=-=-=-=-=-= */
15443
15444 case 0xA2: { /* CPUID */
15445 /* Uses dirty helper:
15446 void dirtyhelper_CPUID_sse[012] ( VexGuestX86State* )
15447 declared to mod eax, wr ebx, ecx, edx
15448 */
15449 IRDirty* d = NULL;
15450 void* fAddr = NULL;
15451 const HChar* fName = NULL;
15452 if (archinfo->hwcaps & VEX_HWCAPS_X86_SSE3) {
15453 fName = "x86g_dirtyhelper_CPUID_sse3";
15454 fAddr = &x86g_dirtyhelper_CPUID_sse3;
15455 }
15456 else
15457 if (archinfo->hwcaps & VEX_HWCAPS_X86_SSE2) {
15458 fName = "x86g_dirtyhelper_CPUID_sse2";
15459 fAddr = &x86g_dirtyhelper_CPUID_sse2;
15460 }
15461 else
15462 if (archinfo->hwcaps & VEX_HWCAPS_X86_SSE1) {
15463 fName = "x86g_dirtyhelper_CPUID_sse1";
15464 fAddr = &x86g_dirtyhelper_CPUID_sse1;
15465 }
15466 else
15467 if (archinfo->hwcaps & VEX_HWCAPS_X86_MMXEXT) {
15468 fName = "x86g_dirtyhelper_CPUID_mmxext";
15469 fAddr = &x86g_dirtyhelper_CPUID_mmxext;
15470 }
15471 else
15472 if (archinfo->hwcaps == 0/*no SSE*/) {
15473 fName = "x86g_dirtyhelper_CPUID_sse0";
15474 fAddr = &x86g_dirtyhelper_CPUID_sse0;
15475 } else
15476 vpanic("disInstr(x86)(cpuid)");
15477
15478 vassert(fName); vassert(fAddr);
15479 d = unsafeIRDirty_0_N ( 0/*regparms*/,
15480 fName, fAddr, mkIRExprVec_1(IRExpr_GSPTR()) );
15481 /* declare guest state effects */
15482 d->nFxState = 4;
15483 vex_bzero(&d->fxState, sizeof(d->fxState));
15484 d->fxState[0].fx = Ifx_Modify;
15485 d->fxState[0].offset = OFFB_EAX;
15486 d->fxState[0].size = 4;
15487 d->fxState[1].fx = Ifx_Write;
15488 d->fxState[1].offset = OFFB_EBX;
15489 d->fxState[1].size = 4;
15490 d->fxState[2].fx = Ifx_Modify;
15491 d->fxState[2].offset = OFFB_ECX;
15492 d->fxState[2].size = 4;
15493 d->fxState[3].fx = Ifx_Write;
15494 d->fxState[3].offset = OFFB_EDX;
15495 d->fxState[3].size = 4;
15496 /* execute the dirty call, side-effecting guest state */
15497 stmt( IRStmt_Dirty(d) );
15498 /* CPUID is a serialising insn. So, just in case someone is
15499 using it as a memory fence ... */
15500 stmt( IRStmt_MBE(Imbe_Fence) );
15501 DIP("cpuid\n");
15502 break;
15503 }
15504
15505 //-- if (!VG_(cpu_has_feature)(VG_X86_FEAT_CPUID))
15506 //-- goto decode_failure;
15507 //--
15508 //-- t1 = newTemp(cb);
15509 //-- t2 = newTemp(cb);
15510 //-- t3 = newTemp(cb);
15511 //-- t4 = newTemp(cb);
15512 //-- uInstr0(cb, CALLM_S, 0);
15513 //--
15514 //-- uInstr2(cb, GET, 4, ArchReg, R_EAX, TempReg, t1);
15515 //-- uInstr1(cb, PUSH, 4, TempReg, t1);
15516 //--
15517 //-- uInstr2(cb, MOV, 4, Literal, 0, TempReg, t2);
15518 //-- uLiteral(cb, 0);
15519 //-- uInstr1(cb, PUSH, 4, TempReg, t2);
15520 //--
15521 //-- uInstr2(cb, MOV, 4, Literal, 0, TempReg, t3);
15522 //-- uLiteral(cb, 0);
15523 //-- uInstr1(cb, PUSH, 4, TempReg, t3);
15524 //--
15525 //-- uInstr2(cb, MOV, 4, Literal, 0, TempReg, t4);
15526 //-- uLiteral(cb, 0);
15527 //-- uInstr1(cb, PUSH, 4, TempReg, t4);
15528 //--
15529 //-- uInstr1(cb, CALLM, 0, Lit16, VGOFF_(helper_CPUID));
15530 //-- uFlagsRWU(cb, FlagsEmpty, FlagsEmpty, FlagsEmpty);
15531 //--
15532 //-- uInstr1(cb, POP, 4, TempReg, t4);
15533 //-- uInstr2(cb, PUT, 4, TempReg, t4, ArchReg, R_EDX);
15534 //--
15535 //-- uInstr1(cb, POP, 4, TempReg, t3);
15536 //-- uInstr2(cb, PUT, 4, TempReg, t3, ArchReg, R_ECX);
15537 //--
15538 //-- uInstr1(cb, POP, 4, TempReg, t2);
15539 //-- uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_EBX);
15540 //--
15541 //-- uInstr1(cb, POP, 4, TempReg, t1);
15542 //-- uInstr2(cb, PUT, 4, TempReg, t1, ArchReg, R_EAX);
15543 //--
15544 //-- uInstr0(cb, CALLM_E, 0);
15545 //-- DIP("cpuid\n");
15546 //-- break;
15547 //--
15548 /* =-=-=-=-=-=-=-=-=- MOVZX, MOVSX =-=-=-=-=-=-=-= */
15549
15550 case 0xB6: /* MOVZXb Eb,Gv */
15551 if (sz != 2 && sz != 4)
15552 goto decode_failure;
15553 delta = dis_movx_E_G ( sorb, delta, 1, sz, False );
15554 break;
15555
15556 case 0xB7: /* MOVZXw Ew,Gv */
15557 if (sz != 4)
15558 goto decode_failure;
15559 delta = dis_movx_E_G ( sorb, delta, 2, 4, False );
15560 break;
15561
15562 case 0xBE: /* MOVSXb Eb,Gv */
15563 if (sz != 2 && sz != 4)
15564 goto decode_failure;
15565 delta = dis_movx_E_G ( sorb, delta, 1, sz, True );
15566 break;
15567
15568 case 0xBF: /* MOVSXw Ew,Gv */
15569 if (sz != 4 && /* accept movsww, sigh, see #250799 */sz != 2)
15570 goto decode_failure;
15571 delta = dis_movx_E_G ( sorb, delta, 2, sz, True );
15572 break;
15573
15574 //-- /* =-=-=-=-=-=-=-=-=-=-= MOVNTI -=-=-=-=-=-=-=-=-= */
15575 //--
15576 //-- case 0xC3: /* MOVNTI Gv,Ev */
15577 //-- vg_assert(sz == 4);
15578 //-- modrm = getUChar(eip);
15579 //-- vg_assert(!epartIsReg(modrm));
15580 //-- t1 = newTemp(cb);
15581 //-- uInstr2(cb, GET, 4, ArchReg, gregOfRM(modrm), TempReg, t1);
15582 //-- pair = disAMode ( cb, sorb, eip, dis_buf );
15583 //-- t2 = LOW24(pair);
15584 //-- eip += HI8(pair);
15585 //-- uInstr2(cb, STORE, 4, TempReg, t1, TempReg, t2);
15586 //-- DIP("movnti %s,%s\n", nameIReg(4,gregOfRM(modrm)), dis_buf);
15587 //-- break;
15588
15589 /* =-=-=-=-=-=-=-=-=- MUL/IMUL =-=-=-=-=-=-=-=-=-= */
15590
15591 case 0xAF: /* IMUL Ev, Gv */
15592 delta = dis_mul_E_G ( sorb, sz, delta );
15593 break;
15594
15595 /* =-=-=-=-=-=-=-=-=- NOPs =-=-=-=-=-=-=-=-=-=-=-= */
15596
15597 case 0x1F:
15598 modrm = getUChar(delta);
15599 if (epartIsReg(modrm)) goto decode_failure;
15600 addr = disAMode ( &alen, sorb, delta, dis_buf );
15601 delta += alen;
15602 DIP("nop%c %s\n", nameISize(sz), dis_buf);
15603 break;
15604
15605 /* =-=-=-=-=-=-=-=-=- Jcond d32 -=-=-=-=-=-=-=-=-= */
15606 case 0x80:
15607 case 0x81:
15608 case 0x82: /* JBb/JNAEb (jump below) */
15609 case 0x83: /* JNBb/JAEb (jump not below) */
15610 case 0x84: /* JZb/JEb (jump zero) */
15611 case 0x85: /* JNZb/JNEb (jump not zero) */
15612 case 0x86: /* JBEb/JNAb (jump below or equal) */
15613 case 0x87: /* JNBEb/JAb (jump not below or equal) */
15614 case 0x88: /* JSb (jump negative) */
15615 case 0x89: /* JSb (jump not negative) */
15616 case 0x8A: /* JP (jump parity even) */
15617 case 0x8B: /* JNP/JPO (jump parity odd) */
15618 case 0x8C: /* JLb/JNGEb (jump less) */
15619 case 0x8D: /* JGEb/JNLb (jump greater or equal) */
15620 case 0x8E: /* JLEb/JNGb (jump less or equal) */
15621 case 0x8F: /* JGb/JNLEb (jump greater) */
15622 { Int jmpDelta;
15623 const HChar* comment = "";
15624 jmpDelta = (Int)getUDisp(current_sz_data, delta);
15625 d32 = (((Addr32)guest_EIP_bbstart)+delta+current_sz_data) + jmpDelta;
15626 delta += current_sz_data;
15627 if (resteerCisOk
15628 && vex_control.guest_chase_cond
15629 && (Addr32)d32 != (Addr32)guest_EIP_bbstart
15630 && jmpDelta < 0
15631 && resteerOkFn( callback_opaque, (Addr32)d32) ) {
15632 /* Speculation: assume this backward branch is taken. So
15633 we need to emit a side-exit to the insn following this
15634 one, on the negation of the condition, and continue at
15635 the branch target address (d32). If we wind up back at
15636 the first instruction of the trace, just stop; it's
15637 better to let the IR loop unroller handle that case.*/
15638 stmt( IRStmt_Exit(
15639 mk_x86g_calculate_condition((X86Condcode)
15640 (1 ^ (opc - 0x80))),
15641 Ijk_Boring,
15642 IRConst_U32(guest_EIP_bbstart+delta),
15643 OFFB_EIP ) );
15644 dres.whatNext = Dis_ResteerC;
15645 dres.continueAt = (Addr32)d32;
15646 comment = "(assumed taken)";
15647 }
15648 else
15649 if (resteerCisOk
15650 && vex_control.guest_chase_cond
15651 && (Addr32)d32 != (Addr32)guest_EIP_bbstart
15652 && jmpDelta >= 0
15653 && resteerOkFn( callback_opaque,
15654 (Addr32)(guest_EIP_bbstart+delta)) ) {
15655 /* Speculation: assume this forward branch is not taken.
15656 So we need to emit a side-exit to d32 (the dest) and
15657 continue disassembling at the insn immediately
15658 following this one. */
15659 stmt( IRStmt_Exit(
15660 mk_x86g_calculate_condition((X86Condcode)(opc - 0x80)),
15661 Ijk_Boring,
15662 IRConst_U32(d32),
15663 OFFB_EIP ) );
15664 dres.whatNext = Dis_ResteerC;
15665 dres.continueAt = guest_EIP_bbstart + delta;
15666 comment = "(assumed not taken)";
15667 }
15668 else {
15669 /* Conservative default translation - end the block at
15670 this point. */
15671 jcc_01( &dres, (X86Condcode)(opc - 0x80),
15672 (Addr32)(guest_EIP_bbstart+delta), d32);
15673 vassert(dres.whatNext == Dis_StopHere);
15674 }
15675 DIP("j%s-32 0x%x %s\n", name_X86Condcode(opc - 0x80), d32, comment);
15676 break;
15677 }
15678
15679 /* =-=-=-=-=-=-=-=-=- RDTSC -=-=-=-=-=-=-=-=-=-=-= */
15680 case 0x31: { /* RDTSC */
15681 IRTemp val = newTemp(Ity_I64);
15682 IRExpr** args = mkIRExprVec_0();
15683 IRDirty* d = unsafeIRDirty_1_N (
15684 val,
15685 0/*regparms*/,
15686 "x86g_dirtyhelper_RDTSC",
15687 &x86g_dirtyhelper_RDTSC,
15688 args
15689 );
15690 /* execute the dirty call, dumping the result in val. */
15691 stmt( IRStmt_Dirty(d) );
15692 putIReg(4, R_EDX, unop(Iop_64HIto32, mkexpr(val)));
15693 putIReg(4, R_EAX, unop(Iop_64to32, mkexpr(val)));
15694 DIP("rdtsc\n");
15695 break;
15696 }
15697
15698 /* =-=-=-=-=-=-=-=-=- PUSH/POP Sreg =-=-=-=-=-=-=-=-=-= */
15699
15700 case 0xA1: /* POP %FS */
15701 dis_pop_segreg( R_FS, sz ); break;
15702 case 0xA9: /* POP %GS */
15703 dis_pop_segreg( R_GS, sz ); break;
15704
15705 case 0xA0: /* PUSH %FS */
15706 dis_push_segreg( R_FS, sz ); break;
15707 case 0xA8: /* PUSH %GS */
15708 dis_push_segreg( R_GS, sz ); break;
15709
15710 /* =-=-=-=-=-=-=-=-=- SETcc Eb =-=-=-=-=-=-=-=-=-= */
15711 case 0x90:
15712 case 0x91:
15713 case 0x92: /* set-Bb/set-NAEb (jump below) */
15714 case 0x93: /* set-NBb/set-AEb (jump not below) */
15715 case 0x94: /* set-Zb/set-Eb (jump zero) */
15716 case 0x95: /* set-NZb/set-NEb (jump not zero) */
15717 case 0x96: /* set-BEb/set-NAb (jump below or equal) */
15718 case 0x97: /* set-NBEb/set-Ab (jump not below or equal) */
15719 case 0x98: /* set-Sb (jump negative) */
15720 case 0x99: /* set-Sb (jump not negative) */
15721 case 0x9A: /* set-P (jump parity even) */
15722 case 0x9B: /* set-NP (jump parity odd) */
15723 case 0x9C: /* set-Lb/set-NGEb (jump less) */
15724 case 0x9D: /* set-GEb/set-NLb (jump greater or equal) */
15725 case 0x9E: /* set-LEb/set-NGb (jump less or equal) */
15726 case 0x9F: /* set-Gb/set-NLEb (jump greater) */
15727 t1 = newTemp(Ity_I8);
15728 assign( t1, unop(Iop_1Uto8,mk_x86g_calculate_condition(opc-0x90)) );
15729 modrm = getIByte(delta);
15730 if (epartIsReg(modrm)) {
15731 delta++;
15732 putIReg(1, eregOfRM(modrm), mkexpr(t1));
15733 DIP("set%s %s\n", name_X86Condcode(opc-0x90),
15734 nameIReg(1,eregOfRM(modrm)));
15735 } else {
15736 addr = disAMode ( &alen, sorb, delta, dis_buf );
15737 delta += alen;
15738 storeLE( mkexpr(addr), mkexpr(t1) );
15739 DIP("set%s %s\n", name_X86Condcode(opc-0x90), dis_buf);
15740 }
15741 break;
15742
15743 /* =-=-=-=-=-=-=-=-=- SHLD/SHRD -=-=-=-=-=-=-=-=-= */
15744
15745 case 0xA4: /* SHLDv imm8,Gv,Ev */
15746 modrm = getIByte(delta);
15747 d32 = delta + lengthAMode(delta);
15748 vex_sprintf(dis_buf, "$%d", getIByte(d32));
15749 delta = dis_SHLRD_Gv_Ev (
15750 sorb, delta, modrm, sz,
15751 mkU8(getIByte(d32)), True, /* literal */
15752 dis_buf, True );
15753 break;
15754 case 0xA5: /* SHLDv %cl,Gv,Ev */
15755 modrm = getIByte(delta);
15756 delta = dis_SHLRD_Gv_Ev (
15757 sorb, delta, modrm, sz,
15758 getIReg(1,R_ECX), False, /* not literal */
15759 "%cl", True );
15760 break;
15761
15762 case 0xAC: /* SHRDv imm8,Gv,Ev */
15763 modrm = getIByte(delta);
15764 d32 = delta + lengthAMode(delta);
15765 vex_sprintf(dis_buf, "$%d", getIByte(d32));
15766 delta = dis_SHLRD_Gv_Ev (
15767 sorb, delta, modrm, sz,
15768 mkU8(getIByte(d32)), True, /* literal */
15769 dis_buf, False );
15770 break;
15771 case 0xAD: /* SHRDv %cl,Gv,Ev */
15772 modrm = getIByte(delta);
15773 delta = dis_SHLRD_Gv_Ev (
15774 sorb, delta, modrm, sz,
15775 getIReg(1,R_ECX), False, /* not literal */
15776 "%cl", False );
15777 break;
15778
15779 /* =-=-=-=-=-=-=-=-=- SYSENTER -=-=-=-=-=-=-=-=-=-= */
15780
15781 case 0x34:
15782 /* Simple implementation needing a long explaination.
15783
15784 sysenter is a kind of syscall entry. The key thing here
15785 is that the return address is not known -- that is
15786 something that is beyond Vex's knowledge. So this IR
15787 forces a return to the scheduler, which can do what it
15788 likes to simulate the systenter, but it MUST set this
15789 thread's guest_EIP field with the continuation address
15790 before resuming execution. If that doesn't happen, the
15791 thread will jump to address zero, which is probably
15792 fatal.
15793 */
15794
15795 /* Note where we are, so we can back up the guest to this
15796 point if the syscall needs to be restarted. */
15797 stmt( IRStmt_Put( OFFB_IP_AT_SYSCALL,
15798 mkU32(guest_EIP_curr_instr) ) );
15799 jmp_lit(&dres, Ijk_Sys_sysenter, 0/*bogus next EIP value*/);
15800 vassert(dres.whatNext == Dis_StopHere);
15801 DIP("sysenter");
15802 break;
15803
15804 /* =-=-=-=-=-=-=-=-=- XADD -=-=-=-=-=-=-=-=-=-= */
15805
15806 case 0xC0: { /* XADD Gb,Eb */
15807 Bool decodeOK;
15808 delta = dis_xadd_G_E ( sorb, pfx_lock, 1, delta, &decodeOK );
15809 if (!decodeOK) goto decode_failure;
15810 break;
15811 }
15812 case 0xC1: { /* XADD Gv,Ev */
15813 Bool decodeOK;
15814 delta = dis_xadd_G_E ( sorb, pfx_lock, sz, delta, &decodeOK );
15815 if (!decodeOK) goto decode_failure;
15816 break;
15817 }
15818
15819 /* =-=-=-=-=-=-=-=-=- MMXery =-=-=-=-=-=-=-=-=-=-= */
15820
15821 case 0x71:
15822 case 0x72:
15823 case 0x73: /* PSLLgg/PSRAgg/PSRLgg mmxreg by imm8 */
15824
15825 case 0x6E: /* MOVD (src)ireg-or-mem, (dst)mmxreg */
15826 case 0x7E: /* MOVD (src)mmxreg, (dst)ireg-or-mem */
15827 case 0x7F: /* MOVQ (src)mmxreg, (dst)mmxreg-or-mem */
15828 case 0x6F: /* MOVQ (src)mmxreg-or-mem, (dst)mmxreg */
15829
15830 case 0xFC:
15831 case 0xFD:
15832 case 0xFE: /* PADDgg (src)mmxreg-or-mem, (dst)mmxreg */
15833
15834 case 0xEC:
15835 case 0xED: /* PADDSgg (src)mmxreg-or-mem, (dst)mmxreg */
15836
15837 case 0xDC:
15838 case 0xDD: /* PADDUSgg (src)mmxreg-or-mem, (dst)mmxreg */
15839
15840 case 0xF8:
15841 case 0xF9:
15842 case 0xFA: /* PSUBgg (src)mmxreg-or-mem, (dst)mmxreg */
15843
15844 case 0xE8:
15845 case 0xE9: /* PSUBSgg (src)mmxreg-or-mem, (dst)mmxreg */
15846
15847 case 0xD8:
15848 case 0xD9: /* PSUBUSgg (src)mmxreg-or-mem, (dst)mmxreg */
15849
15850 case 0xE5: /* PMULHW (src)mmxreg-or-mem, (dst)mmxreg */
15851 case 0xD5: /* PMULLW (src)mmxreg-or-mem, (dst)mmxreg */
15852
15853 case 0xF5: /* PMADDWD (src)mmxreg-or-mem, (dst)mmxreg */
15854
15855 case 0x74:
15856 case 0x75:
15857 case 0x76: /* PCMPEQgg (src)mmxreg-or-mem, (dst)mmxreg */
15858
15859 case 0x64:
15860 case 0x65:
15861 case 0x66: /* PCMPGTgg (src)mmxreg-or-mem, (dst)mmxreg */
15862
15863 case 0x6B: /* PACKSSDW (src)mmxreg-or-mem, (dst)mmxreg */
15864 case 0x63: /* PACKSSWB (src)mmxreg-or-mem, (dst)mmxreg */
15865 case 0x67: /* PACKUSWB (src)mmxreg-or-mem, (dst)mmxreg */
15866
15867 case 0x68:
15868 case 0x69:
15869 case 0x6A: /* PUNPCKHgg (src)mmxreg-or-mem, (dst)mmxreg */
15870
15871 case 0x60:
15872 case 0x61:
15873 case 0x62: /* PUNPCKLgg (src)mmxreg-or-mem, (dst)mmxreg */
15874
15875 case 0xDB: /* PAND (src)mmxreg-or-mem, (dst)mmxreg */
15876 case 0xDF: /* PANDN (src)mmxreg-or-mem, (dst)mmxreg */
15877 case 0xEB: /* POR (src)mmxreg-or-mem, (dst)mmxreg */
15878 case 0xEF: /* PXOR (src)mmxreg-or-mem, (dst)mmxreg */
15879
15880 case 0xF1: /* PSLLgg (src)mmxreg-or-mem, (dst)mmxreg */
15881 case 0xF2:
15882 case 0xF3:
15883
15884 case 0xD1: /* PSRLgg (src)mmxreg-or-mem, (dst)mmxreg */
15885 case 0xD2:
15886 case 0xD3:
15887
15888 case 0xE1: /* PSRAgg (src)mmxreg-or-mem, (dst)mmxreg */
15889 case 0xE2:
15890 {
15891 Int delta0 = delta-1;
15892 Bool decode_OK = False;
15893
15894 /* If sz==2 this is SSE, and we assume sse idec has
15895 already spotted those cases by now. */
15896 if (sz != 4)
15897 goto decode_failure;
15898
15899 delta = dis_MMX ( &decode_OK, sorb, sz, delta-1 );
15900 if (!decode_OK) {
15901 delta = delta0;
15902 goto decode_failure;
15903 }
15904 break;
15905 }
15906
15907 case 0x0E: /* FEMMS */
15908 case 0x77: /* EMMS */
15909 if (sz != 4)
15910 goto decode_failure;
15911 do_EMMS_preamble();
15912 DIP("{f}emms\n");
15913 break;
15914
15915 /* =-=-=-=-=-=-=-=-=- SGDT and SIDT =-=-=-=-=-=-=-=-=-=-= */
15916 case 0x01: /* 0F 01 /0 -- SGDT */
15917 /* 0F 01 /1 -- SIDT */
15918 /* 0F 01 /2 -- LGDT */
15919 /* 0F 01 /3 -- LIDT */
15920 {
15921 /* This is really revolting, but ... since each processor
15922 (core) only has one IDT and one GDT, just let the guest
15923 see it (pass-through semantics). I can't see any way to
15924 construct a faked-up value, so don't bother to try. */
15925 Int g;
15926 modrm = getUChar(delta);
15927 if (epartIsReg(modrm))
15928 goto decode_failure;
15929
15930 g = gregOfRM(modrm);
15931 if (g < 0 || g > 3)
15932 goto decode_failure;
15933
15934 addr = disAMode ( &alen, sorb, delta, dis_buf );
15935 delta += alen;
15936
15937 IRDirty* d = NULL;
15938 switch (g) {
15939 case 0: DIP("sgdt %s\n", dis_buf);
15940 case 1: DIP("sidt %s\n", dis_buf);
15941 d = unsafeIRDirty_0_N (
15942 0/*regparms*/,
15943 "x86g_dirtyhelper_SxDT",
15944 &x86g_dirtyhelper_SxDT,
15945 mkIRExprVec_2( mkexpr(addr),
15946 mkU32(gregOfRM(modrm)) )
15947 );
15948 /* declare we're writing memory */
15949 d->mFx = Ifx_Write;
15950 d->mAddr = mkexpr(addr);
15951 d->mSize = 6;
15952 break;
15953 case 2: DIP("lgdt %s\n", dis_buf);
15954 case 3: DIP("lidt %s\n", dis_buf);
15955 d = unsafeIRDirty_0_N (
15956 0/*regparms*/,
15957 "x86g_dirtyhelper_LGDT_LIDT",
15958 &x86g_dirtyhelper_LGDT_LIDT,
15959 mkIRExprVec_2( mkexpr(addr),
15960 mkU32(gregOfRM(modrm)) )
15961 );
15962 /* declare we're reading memory */
15963 d->mFx = Ifx_Read;
15964 d->mAddr = mkexpr(addr);
15965 d->mSize = 6;
15966 break;
15967 default: vassert(0); /*NOTREACHED*/
15968 }
15969
15970 vassert(d);
15971
15972 stmt( IRStmt_Dirty(d) );
15973 break;
15974 }
15975
15976 case 0x05: /* AMD's syscall */
15977 stmt( IRStmt_Put( OFFB_IP_AT_SYSCALL,
15978 mkU32(guest_EIP_curr_instr) ) );
15979 jmp_lit(&dres, Ijk_Sys_syscall, ((Addr32)guest_EIP_bbstart)+delta);
15980 vassert(dres.whatNext == Dis_StopHere);
15981 DIP("syscall\n");
15982 break;
15983
15984 /* =-=-=-=-=-=-=-=-=- unimp2 =-=-=-=-=-=-=-=-=-=-= */
15985
15986 default:
15987 goto decode_failure;
15988 } /* switch (opc) for the 2-byte opcodes */
15989 goto decode_success;
15990 } /* case 0x0F: of primary opcode */
15991
15992 /* ------------------------ ??? ------------------------ */
15993
15994 default:
15995 decode_failure:
15996 /* All decode failures end up here. */
15997 if (sigill_diag) {
15998 vex_printf("vex x86->IR: unhandled instruction bytes: "
15999 "0x%x 0x%x 0x%x 0x%x\n",
16000 getIByte(delta_start+0),
16001 getIByte(delta_start+1),
16002 getIByte(delta_start+2),
16003 getIByte(delta_start+3));
16004 }
16005
16006 /* Tell the dispatcher that this insn cannot be decoded, and so has
16007 not been executed, and (is currently) the next to be executed.
16008 EIP should be up-to-date since it made so at the start of each
16009 insn, but nevertheless be paranoid and update it again right
16010 now. */
16011 stmt( IRStmt_Put( OFFB_EIP, mkU32(guest_EIP_curr_instr) ) );
16012 jmp_lit(&dres, Ijk_NoDecode, guest_EIP_curr_instr);
16013 vassert(dres.whatNext == Dis_StopHere);
16014 dres.len = 0;
16015 /* We also need to say that a CAS is not expected now, regardless
16016 of what it might have been set to at the start of the function,
16017 since the IR that we've emitted just above (to synthesis a
16018 SIGILL) does not involve any CAS, and presumably no other IR has
16019 been emitted for this (non-decoded) insn. */
16020 *expect_CAS = False;
16021 return dres;
16022
16023 } /* switch (opc) for the main (primary) opcode switch. */
16024
16025 decode_success:
16026 /* All decode successes end up here. */
16027 switch (dres.whatNext) {
16028 case Dis_Continue:
16029 stmt( IRStmt_Put( OFFB_EIP, mkU32(guest_EIP_bbstart + delta) ) );
16030 break;
16031 case Dis_ResteerU:
16032 case Dis_ResteerC:
16033 stmt( IRStmt_Put( OFFB_EIP, mkU32(dres.continueAt) ) );
16034 break;
16035 case Dis_StopHere:
16036 break;
16037 default:
16038 vassert(0);
16039 }
16040
16041 DIP("\n");
16042 dres.len = delta - delta_start;
16043 return dres;
16044 }
16045
16046 #undef DIP
16047 #undef DIS
16048
16049
16050 /*------------------------------------------------------------*/
16051 /*--- Top-level fn ---*/
16052 /*------------------------------------------------------------*/
16053
16054 /* Disassemble a single instruction into IR. The instruction
16055 is located in host memory at &guest_code[delta]. */
16056
disInstr_X86(IRSB * irsb_IN,Bool (* resteerOkFn)(void *,Addr),Bool resteerCisOk,void * callback_opaque,const UChar * guest_code_IN,Long delta,Addr guest_IP,VexArch guest_arch,const VexArchInfo * archinfo,const VexAbiInfo * abiinfo,VexEndness host_endness_IN,Bool sigill_diag_IN)16057 DisResult disInstr_X86 ( IRSB* irsb_IN,
16058 Bool (*resteerOkFn) ( void*, Addr ),
16059 Bool resteerCisOk,
16060 void* callback_opaque,
16061 const UChar* guest_code_IN,
16062 Long delta,
16063 Addr guest_IP,
16064 VexArch guest_arch,
16065 const VexArchInfo* archinfo,
16066 const VexAbiInfo* abiinfo,
16067 VexEndness host_endness_IN,
16068 Bool sigill_diag_IN )
16069 {
16070 Int i, x1, x2;
16071 Bool expect_CAS, has_CAS;
16072 DisResult dres;
16073
16074 /* Set globals (see top of this file) */
16075 vassert(guest_arch == VexArchX86);
16076 guest_code = guest_code_IN;
16077 irsb = irsb_IN;
16078 host_endness = host_endness_IN;
16079 guest_EIP_curr_instr = (Addr32)guest_IP;
16080 guest_EIP_bbstart = (Addr32)toUInt(guest_IP - delta);
16081
16082 x1 = irsb_IN->stmts_used;
16083 expect_CAS = False;
16084 dres = disInstr_X86_WRK ( &expect_CAS, resteerOkFn,
16085 resteerCisOk,
16086 callback_opaque,
16087 delta, archinfo, abiinfo, sigill_diag_IN );
16088 x2 = irsb_IN->stmts_used;
16089 vassert(x2 >= x1);
16090
16091 /* See comment at the top of disInstr_X86_WRK for meaning of
16092 expect_CAS. Here, we (sanity-)check for the presence/absence of
16093 IRCAS as directed by the returned expect_CAS value. */
16094 has_CAS = False;
16095 for (i = x1; i < x2; i++) {
16096 if (irsb_IN->stmts[i]->tag == Ist_CAS)
16097 has_CAS = True;
16098 }
16099
16100 if (expect_CAS != has_CAS) {
16101 /* inconsistency detected. re-disassemble the instruction so as
16102 to generate a useful error message; then assert. */
16103 vex_traceflags |= VEX_TRACE_FE;
16104 dres = disInstr_X86_WRK ( &expect_CAS, resteerOkFn,
16105 resteerCisOk,
16106 callback_opaque,
16107 delta, archinfo, abiinfo, sigill_diag_IN );
16108 for (i = x1; i < x2; i++) {
16109 vex_printf("\t\t");
16110 ppIRStmt(irsb_IN->stmts[i]);
16111 vex_printf("\n");
16112 }
16113 /* Failure of this assertion is serious and denotes a bug in
16114 disInstr. */
16115 vpanic("disInstr_X86: inconsistency in LOCK prefix handling");
16116 }
16117
16118 return dres;
16119 }
16120
16121
16122 /*--------------------------------------------------------------------*/
16123 /*--- end guest_x86_toIR.c ---*/
16124 /*--------------------------------------------------------------------*/
16125