1
2 /*--------------------------------------------------------------------*/
3 /*--- begin guest_x86_toIR.c ---*/
4 /*--------------------------------------------------------------------*/
5
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
9
10 Copyright (C) 2004-2017 OpenWorks LLP
11 info@open-works.net
12
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 02110-1301, USA.
27
28 The GNU General Public License is contained in the file COPYING.
29
30 Neither the names of the U.S. Department of Energy nor the
31 University of California nor the names of its contributors may be
32 used to endorse or promote products derived from this software
33 without prior written permission.
34 */
35
36 /* Translates x86 code to IR. */
37
38 /* TODO:
39
40 All Puts to CC_OP/CC_DEP1/CC_DEP2/CC_NDEP should really be checked
41 to ensure a 32-bit value is being written.
42
43 FUCOMI(P): what happens to A and S flags? Currently are forced
44 to zero.
45
46 x87 FP Limitations:
47
48 * all arithmetic done at 64 bits
49
50 * no FP exceptions, except for handling stack over/underflow
51
52 * FP rounding mode observed only for float->int conversions
53 and int->float conversions which could lose accuracy, and
54 for float-to-float rounding. For all other operations,
55 round-to-nearest is used, regardless.
56
57 * some of the FCOM cases could do with testing -- not convinced
58 that the args are the right way round.
59
60 * FSAVE does not re-initialise the FPU; it should do
61
62 * FINIT not only initialises the FPU environment, it also
63 zeroes all the FP registers. It should leave the registers
64 unchanged.
65
66 SAHF should cause eflags[1] == 1, and in fact it produces 0. As
67 per Intel docs this bit has no meaning anyway. Since PUSHF is the
68 only way to observe eflags[1], a proper fix would be to make that
69 bit be set by PUSHF.
70
71 The state of %eflags.AC (alignment check, bit 18) is recorded by
72 the simulation (viz, if you set it with popf then a pushf produces
73 the value you set it to), but it is otherwise ignored. In
74 particular, setting it to 1 does NOT cause alignment checking to
75 happen. Programs that set it to 1 and then rely on the resulting
76 SIGBUSs to inform them of misaligned accesses will not work.
77
78 Implementation of sysenter is necessarily partial. sysenter is a
79 kind of system call entry. When doing a sysenter, the return
80 address is not known -- that is something that is beyond Vex's
81 knowledge. So the generated IR forces a return to the scheduler,
82 which can do what it likes to simulate the systenter, but it MUST
83 set this thread's guest_EIP field with the continuation address
84 before resuming execution. If that doesn't happen, the thread will
85 jump to address zero, which is probably fatal.
86
87 This module uses global variables and so is not MT-safe (if that
88 should ever become relevant).
89
90 The delta values are 32-bit ints, not 64-bit ints. That means
91 this module may not work right if run on a 64-bit host. That should
92 be fixed properly, really -- if anyone ever wants to use Vex to
93 translate x86 code for execution on a 64-bit host.
94
95 casLE (implementation of lock-prefixed insns) and rep-prefixed
96 insns: the side-exit back to the start of the insn is done with
97 Ijk_Boring. This is quite wrong, it should be done with
98 Ijk_NoRedir, since otherwise the side exit, which is intended to
99 restart the instruction for whatever reason, could go somewhere
100 entirely else. Doing it right (with Ijk_NoRedir jumps) would make
101 no-redir jumps performance critical, at least for rep-prefixed
102 instructions, since all iterations thereof would involve such a
103 jump. It's not such a big deal with casLE since the side exit is
104 only taken if the CAS fails, that is, the location is contended,
105 which is relatively unlikely.
106
107 XXXX: Nov 2009: handling of SWP on ARM suffers from the same
108 problem.
109
110 Note also, the test for CAS success vs failure is done using
111 Iop_CasCmp{EQ,NE}{8,16,32,64} rather than the ordinary
112 Iop_Cmp{EQ,NE} equivalents. This is so as to tell Memcheck that it
113 shouldn't definedness-check these comparisons. See
114 COMMENT_ON_CasCmpEQ in memcheck/mc_translate.c for
115 background/rationale.
116 */
117
118 /* Performance holes:
119
120 - fcom ; fstsw %ax ; sahf
121 sahf does not update the O flag (sigh) and so O needs to
122 be computed. This is done expensively; it would be better
123 to have a calculate_eflags_o helper.
124
125 - emwarns; some FP codes can generate huge numbers of these
126 if the fpucw is changed in an inner loop. It would be
127 better for the guest state to have an emwarn-enable reg
128 which can be set zero or nonzero. If it is zero, emwarns
129 are not flagged, and instead control just flows all the
130 way through bbs as usual.
131 */
132
133 /* "Special" instructions.
134
135 This instruction decoder can decode three special instructions
136 which mean nothing natively (are no-ops as far as regs/mem are
137 concerned) but have meaning for supporting Valgrind. A special
138 instruction is flagged by the 12-byte preamble C1C703 C1C70D C1C71D
139 C1C713 (in the standard interpretation, that means: roll $3, %edi;
140 roll $13, %edi; roll $29, %edi; roll $19, %edi). Following that,
141 one of the following 3 are allowed (standard interpretation in
142 parentheses):
143
144 87DB (xchgl %ebx,%ebx) %EDX = client_request ( %EAX )
145 87C9 (xchgl %ecx,%ecx) %EAX = guest_NRADDR
146 87D2 (xchgl %edx,%edx) call-noredir *%EAX
147 87FF (xchgl %edi,%edi) IR injection
148
149 Any other bytes following the 12-byte preamble are illegal and
150 constitute a failure in instruction decoding. This all assumes
151 that the preamble will never occur except in specific code
152 fragments designed for Valgrind to catch.
153
154 No prefixes may precede a "Special" instruction.
155 */
156
157 /* LOCK prefixed instructions. These are translated using IR-level
158 CAS statements (IRCAS) and are believed to preserve atomicity, even
159 from the point of view of some other process racing against a
160 simulated one (presumably they communicate via a shared memory
161 segment).
162
163 Handlers which are aware of LOCK prefixes are:
164 dis_op2_G_E (add, or, adc, sbb, and, sub, xor)
165 dis_cmpxchg_G_E (cmpxchg)
166 dis_Grp1 (add, or, adc, sbb, and, sub, xor)
167 dis_Grp3 (not, neg)
168 dis_Grp4 (inc, dec)
169 dis_Grp5 (inc, dec)
170 dis_Grp8_Imm (bts, btc, btr)
171 dis_bt_G_E (bts, btc, btr)
172 dis_xadd_G_E (xadd)
173 */
174
175
176 #include "libvex_basictypes.h"
177 #include "libvex_ir.h"
178 #include "libvex.h"
179 #include "libvex_guest_x86.h"
180
181 #include "main_util.h"
182 #include "main_globals.h"
183 #include "guest_generic_bb_to_IR.h"
184 #include "guest_generic_x87.h"
185 #include "guest_x86_defs.h"
186
187
188 /*------------------------------------------------------------*/
189 /*--- Globals ---*/
190 /*------------------------------------------------------------*/
191
192 /* These are set at the start of the translation of an insn, right
193 down in disInstr_X86, so that we don't have to pass them around
194 endlessly. They are all constant during the translation of any
195 given insn. */
196
197 /* We need to know this to do sub-register accesses correctly. */
198 static VexEndness host_endness;
199
200 /* Pointer to the guest code area (points to start of BB, not to the
201 insn being processed). */
202 static const UChar* guest_code;
203
204 /* The guest address corresponding to guest_code[0]. */
205 static Addr32 guest_EIP_bbstart;
206
207 /* The guest address for the instruction currently being
208 translated. */
209 static Addr32 guest_EIP_curr_instr;
210
211 /* The IRSB* into which we're generating code. */
212 static IRSB* irsb;
213
214
215 /*------------------------------------------------------------*/
216 /*--- Debugging output ---*/
217 /*------------------------------------------------------------*/
218
219 #define DIP(format, args...) \
220 if (vex_traceflags & VEX_TRACE_FE) \
221 vex_printf(format, ## args)
222
223 #define DIS(buf, format, args...) \
224 if (vex_traceflags & VEX_TRACE_FE) \
225 vex_sprintf(buf, format, ## args)
226
227
228 /*------------------------------------------------------------*/
229 /*--- Offsets of various parts of the x86 guest state. ---*/
230 /*------------------------------------------------------------*/
231
232 #define OFFB_EAX offsetof(VexGuestX86State,guest_EAX)
233 #define OFFB_EBX offsetof(VexGuestX86State,guest_EBX)
234 #define OFFB_ECX offsetof(VexGuestX86State,guest_ECX)
235 #define OFFB_EDX offsetof(VexGuestX86State,guest_EDX)
236 #define OFFB_ESP offsetof(VexGuestX86State,guest_ESP)
237 #define OFFB_EBP offsetof(VexGuestX86State,guest_EBP)
238 #define OFFB_ESI offsetof(VexGuestX86State,guest_ESI)
239 #define OFFB_EDI offsetof(VexGuestX86State,guest_EDI)
240
241 #define OFFB_EIP offsetof(VexGuestX86State,guest_EIP)
242
243 #define OFFB_CC_OP offsetof(VexGuestX86State,guest_CC_OP)
244 #define OFFB_CC_DEP1 offsetof(VexGuestX86State,guest_CC_DEP1)
245 #define OFFB_CC_DEP2 offsetof(VexGuestX86State,guest_CC_DEP2)
246 #define OFFB_CC_NDEP offsetof(VexGuestX86State,guest_CC_NDEP)
247
248 #define OFFB_FPREGS offsetof(VexGuestX86State,guest_FPREG[0])
249 #define OFFB_FPTAGS offsetof(VexGuestX86State,guest_FPTAG[0])
250 #define OFFB_DFLAG offsetof(VexGuestX86State,guest_DFLAG)
251 #define OFFB_IDFLAG offsetof(VexGuestX86State,guest_IDFLAG)
252 #define OFFB_ACFLAG offsetof(VexGuestX86State,guest_ACFLAG)
253 #define OFFB_FTOP offsetof(VexGuestX86State,guest_FTOP)
254 #define OFFB_FC3210 offsetof(VexGuestX86State,guest_FC3210)
255 #define OFFB_FPROUND offsetof(VexGuestX86State,guest_FPROUND)
256
257 #define OFFB_CS offsetof(VexGuestX86State,guest_CS)
258 #define OFFB_DS offsetof(VexGuestX86State,guest_DS)
259 #define OFFB_ES offsetof(VexGuestX86State,guest_ES)
260 #define OFFB_FS offsetof(VexGuestX86State,guest_FS)
261 #define OFFB_GS offsetof(VexGuestX86State,guest_GS)
262 #define OFFB_SS offsetof(VexGuestX86State,guest_SS)
263 #define OFFB_LDT offsetof(VexGuestX86State,guest_LDT)
264 #define OFFB_GDT offsetof(VexGuestX86State,guest_GDT)
265
266 #define OFFB_SSEROUND offsetof(VexGuestX86State,guest_SSEROUND)
267 #define OFFB_XMM0 offsetof(VexGuestX86State,guest_XMM0)
268 #define OFFB_XMM1 offsetof(VexGuestX86State,guest_XMM1)
269 #define OFFB_XMM2 offsetof(VexGuestX86State,guest_XMM2)
270 #define OFFB_XMM3 offsetof(VexGuestX86State,guest_XMM3)
271 #define OFFB_XMM4 offsetof(VexGuestX86State,guest_XMM4)
272 #define OFFB_XMM5 offsetof(VexGuestX86State,guest_XMM5)
273 #define OFFB_XMM6 offsetof(VexGuestX86State,guest_XMM6)
274 #define OFFB_XMM7 offsetof(VexGuestX86State,guest_XMM7)
275
276 #define OFFB_EMNOTE offsetof(VexGuestX86State,guest_EMNOTE)
277
278 #define OFFB_CMSTART offsetof(VexGuestX86State,guest_CMSTART)
279 #define OFFB_CMLEN offsetof(VexGuestX86State,guest_CMLEN)
280 #define OFFB_NRADDR offsetof(VexGuestX86State,guest_NRADDR)
281
282 #define OFFB_IP_AT_SYSCALL offsetof(VexGuestX86State,guest_IP_AT_SYSCALL)
283
284
285 /*------------------------------------------------------------*/
286 /*--- Helper bits and pieces for deconstructing the ---*/
287 /*--- x86 insn stream. ---*/
288 /*------------------------------------------------------------*/
289
290 /* This is the Intel register encoding -- integer regs. */
291 #define R_EAX 0
292 #define R_ECX 1
293 #define R_EDX 2
294 #define R_EBX 3
295 #define R_ESP 4
296 #define R_EBP 5
297 #define R_ESI 6
298 #define R_EDI 7
299
300 #define R_AL (0+R_EAX)
301 #define R_AH (4+R_EAX)
302
303 /* This is the Intel register encoding -- segment regs. */
304 #define R_ES 0
305 #define R_CS 1
306 #define R_SS 2
307 #define R_DS 3
308 #define R_FS 4
309 #define R_GS 5
310
311
312 /* Add a statement to the list held by "irbb". */
stmt(IRStmt * st)313 static void stmt ( IRStmt* st )
314 {
315 addStmtToIRSB( irsb, st );
316 }
317
318 /* Generate a new temporary of the given type. */
newTemp(IRType ty)319 static IRTemp newTemp ( IRType ty )
320 {
321 vassert(isPlausibleIRType(ty));
322 return newIRTemp( irsb->tyenv, ty );
323 }
324
325 /* Various simple conversions */
326
extend_s_8to32(UInt x)327 static UInt extend_s_8to32( UInt x )
328 {
329 return (UInt)((Int)(x << 24) >> 24);
330 }
331
extend_s_16to32(UInt x)332 static UInt extend_s_16to32 ( UInt x )
333 {
334 return (UInt)((Int)(x << 16) >> 16);
335 }
336
337 /* Fetch a byte from the guest insn stream. */
getIByte(Int delta)338 static UChar getIByte ( Int delta )
339 {
340 return guest_code[delta];
341 }
342
343 /* Extract the reg field from a modRM byte. */
gregOfRM(UChar mod_reg_rm)344 static Int gregOfRM ( UChar mod_reg_rm )
345 {
346 return (Int)( (mod_reg_rm >> 3) & 7 );
347 }
348
349 /* Figure out whether the mod and rm parts of a modRM byte refer to a
350 register or memory. If so, the byte will have the form 11XXXYYY,
351 where YYY is the register number. */
epartIsReg(UChar mod_reg_rm)352 static Bool epartIsReg ( UChar mod_reg_rm )
353 {
354 return toBool(0xC0 == (mod_reg_rm & 0xC0));
355 }
356
357 /* ... and extract the register number ... */
eregOfRM(UChar mod_reg_rm)358 static Int eregOfRM ( UChar mod_reg_rm )
359 {
360 return (Int)(mod_reg_rm & 0x7);
361 }
362
363 /* Get a 8/16/32-bit unsigned value out of the insn stream. */
364
getUChar(Int delta)365 static UChar getUChar ( Int delta )
366 {
367 UChar v = guest_code[delta+0];
368 return toUChar(v);
369 }
370
getUDisp16(Int delta)371 static UInt getUDisp16 ( Int delta )
372 {
373 UInt v = guest_code[delta+1]; v <<= 8;
374 v |= guest_code[delta+0];
375 return v & 0xFFFF;
376 }
377
getUDisp32(Int delta)378 static UInt getUDisp32 ( Int delta )
379 {
380 UInt v = guest_code[delta+3]; v <<= 8;
381 v |= guest_code[delta+2]; v <<= 8;
382 v |= guest_code[delta+1]; v <<= 8;
383 v |= guest_code[delta+0];
384 return v;
385 }
386
getUDisp(Int size,Int delta)387 static UInt getUDisp ( Int size, Int delta )
388 {
389 switch (size) {
390 case 4: return getUDisp32(delta);
391 case 2: return getUDisp16(delta);
392 case 1: return (UInt)getUChar(delta);
393 default: vpanic("getUDisp(x86)");
394 }
395 return 0; /*notreached*/
396 }
397
398
399 /* Get a byte value out of the insn stream and sign-extend to 32
400 bits. */
getSDisp8(Int delta)401 static UInt getSDisp8 ( Int delta )
402 {
403 return extend_s_8to32( (UInt) (guest_code[delta]) );
404 }
405
getSDisp16(Int delta0)406 static UInt getSDisp16 ( Int delta0 )
407 {
408 const UChar* eip = &guest_code[delta0];
409 UInt d = *eip++;
410 d |= ((*eip++) << 8);
411 return extend_s_16to32(d);
412 }
413
getSDisp(Int size,Int delta)414 static UInt getSDisp ( Int size, Int delta )
415 {
416 switch (size) {
417 case 4: return getUDisp32(delta);
418 case 2: return getSDisp16(delta);
419 case 1: return getSDisp8(delta);
420 default: vpanic("getSDisp(x86)");
421 }
422 return 0; /*notreached*/
423 }
424
425
426 /*------------------------------------------------------------*/
427 /*--- Helpers for constructing IR. ---*/
428 /*------------------------------------------------------------*/
429
430 /* Create a 1/2/4 byte read of an x86 integer registers. For 16/8 bit
431 register references, we need to take the host endianness into
432 account. Supplied value is 0 .. 7 and in the Intel instruction
433 encoding. */
434
szToITy(Int n)435 static IRType szToITy ( Int n )
436 {
437 switch (n) {
438 case 1: return Ity_I8;
439 case 2: return Ity_I16;
440 case 4: return Ity_I32;
441 default: vpanic("szToITy(x86)");
442 }
443 }
444
445 /* On a little-endian host, less significant bits of the guest
446 registers are at lower addresses. Therefore, if a reference to a
447 register low half has the safe guest state offset as a reference to
448 the full register.
449 */
integerGuestRegOffset(Int sz,UInt archreg)450 static Int integerGuestRegOffset ( Int sz, UInt archreg )
451 {
452 vassert(archreg < 8);
453
454 /* Correct for little-endian host only. */
455 vassert(host_endness == VexEndnessLE);
456
457 if (sz == 4 || sz == 2 || (sz == 1 && archreg < 4)) {
458 switch (archreg) {
459 case R_EAX: return OFFB_EAX;
460 case R_EBX: return OFFB_EBX;
461 case R_ECX: return OFFB_ECX;
462 case R_EDX: return OFFB_EDX;
463 case R_ESI: return OFFB_ESI;
464 case R_EDI: return OFFB_EDI;
465 case R_ESP: return OFFB_ESP;
466 case R_EBP: return OFFB_EBP;
467 default: vpanic("integerGuestRegOffset(x86,le)(4,2)");
468 }
469 }
470
471 vassert(archreg >= 4 && archreg < 8 && sz == 1);
472 switch (archreg-4) {
473 case R_EAX: return 1+ OFFB_EAX;
474 case R_EBX: return 1+ OFFB_EBX;
475 case R_ECX: return 1+ OFFB_ECX;
476 case R_EDX: return 1+ OFFB_EDX;
477 default: vpanic("integerGuestRegOffset(x86,le)(1h)");
478 }
479
480 /* NOTREACHED */
481 vpanic("integerGuestRegOffset(x86,le)");
482 }
483
segmentGuestRegOffset(UInt sreg)484 static Int segmentGuestRegOffset ( UInt sreg )
485 {
486 switch (sreg) {
487 case R_ES: return OFFB_ES;
488 case R_CS: return OFFB_CS;
489 case R_SS: return OFFB_SS;
490 case R_DS: return OFFB_DS;
491 case R_FS: return OFFB_FS;
492 case R_GS: return OFFB_GS;
493 default: vpanic("segmentGuestRegOffset(x86)");
494 }
495 }
496
xmmGuestRegOffset(UInt xmmreg)497 static Int xmmGuestRegOffset ( UInt xmmreg )
498 {
499 switch (xmmreg) {
500 case 0: return OFFB_XMM0;
501 case 1: return OFFB_XMM1;
502 case 2: return OFFB_XMM2;
503 case 3: return OFFB_XMM3;
504 case 4: return OFFB_XMM4;
505 case 5: return OFFB_XMM5;
506 case 6: return OFFB_XMM6;
507 case 7: return OFFB_XMM7;
508 default: vpanic("xmmGuestRegOffset");
509 }
510 }
511
512 /* Lanes of vector registers are always numbered from zero being the
513 least significant lane (rightmost in the register). */
514
xmmGuestRegLane16offset(UInt xmmreg,Int laneno)515 static Int xmmGuestRegLane16offset ( UInt xmmreg, Int laneno )
516 {
517 /* Correct for little-endian host only. */
518 vassert(host_endness == VexEndnessLE);
519 vassert(laneno >= 0 && laneno < 8);
520 return xmmGuestRegOffset( xmmreg ) + 2 * laneno;
521 }
522
xmmGuestRegLane32offset(UInt xmmreg,Int laneno)523 static Int xmmGuestRegLane32offset ( UInt xmmreg, Int laneno )
524 {
525 /* Correct for little-endian host only. */
526 vassert(host_endness == VexEndnessLE);
527 vassert(laneno >= 0 && laneno < 4);
528 return xmmGuestRegOffset( xmmreg ) + 4 * laneno;
529 }
530
xmmGuestRegLane64offset(UInt xmmreg,Int laneno)531 static Int xmmGuestRegLane64offset ( UInt xmmreg, Int laneno )
532 {
533 /* Correct for little-endian host only. */
534 vassert(host_endness == VexEndnessLE);
535 vassert(laneno >= 0 && laneno < 2);
536 return xmmGuestRegOffset( xmmreg ) + 8 * laneno;
537 }
538
getIReg(Int sz,UInt archreg)539 static IRExpr* getIReg ( Int sz, UInt archreg )
540 {
541 vassert(sz == 1 || sz == 2 || sz == 4);
542 vassert(archreg < 8);
543 return IRExpr_Get( integerGuestRegOffset(sz,archreg),
544 szToITy(sz) );
545 }
546
547 /* Ditto, but write to a reg instead. */
putIReg(Int sz,UInt archreg,IRExpr * e)548 static void putIReg ( Int sz, UInt archreg, IRExpr* e )
549 {
550 IRType ty = typeOfIRExpr(irsb->tyenv, e);
551 switch (sz) {
552 case 1: vassert(ty == Ity_I8); break;
553 case 2: vassert(ty == Ity_I16); break;
554 case 4: vassert(ty == Ity_I32); break;
555 default: vpanic("putIReg(x86)");
556 }
557 vassert(archreg < 8);
558 stmt( IRStmt_Put(integerGuestRegOffset(sz,archreg), e) );
559 }
560
getSReg(UInt sreg)561 static IRExpr* getSReg ( UInt sreg )
562 {
563 return IRExpr_Get( segmentGuestRegOffset(sreg), Ity_I16 );
564 }
565
putSReg(UInt sreg,IRExpr * e)566 static void putSReg ( UInt sreg, IRExpr* e )
567 {
568 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I16);
569 stmt( IRStmt_Put( segmentGuestRegOffset(sreg), e ) );
570 }
571
getXMMReg(UInt xmmreg)572 static IRExpr* getXMMReg ( UInt xmmreg )
573 {
574 return IRExpr_Get( xmmGuestRegOffset(xmmreg), Ity_V128 );
575 }
576
getXMMRegLane64(UInt xmmreg,Int laneno)577 static IRExpr* getXMMRegLane64 ( UInt xmmreg, Int laneno )
578 {
579 return IRExpr_Get( xmmGuestRegLane64offset(xmmreg,laneno), Ity_I64 );
580 }
581
getXMMRegLane64F(UInt xmmreg,Int laneno)582 static IRExpr* getXMMRegLane64F ( UInt xmmreg, Int laneno )
583 {
584 return IRExpr_Get( xmmGuestRegLane64offset(xmmreg,laneno), Ity_F64 );
585 }
586
getXMMRegLane32(UInt xmmreg,Int laneno)587 static IRExpr* getXMMRegLane32 ( UInt xmmreg, Int laneno )
588 {
589 return IRExpr_Get( xmmGuestRegLane32offset(xmmreg,laneno), Ity_I32 );
590 }
591
getXMMRegLane32F(UInt xmmreg,Int laneno)592 static IRExpr* getXMMRegLane32F ( UInt xmmreg, Int laneno )
593 {
594 return IRExpr_Get( xmmGuestRegLane32offset(xmmreg,laneno), Ity_F32 );
595 }
596
putXMMReg(UInt xmmreg,IRExpr * e)597 static void putXMMReg ( UInt xmmreg, IRExpr* e )
598 {
599 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_V128);
600 stmt( IRStmt_Put( xmmGuestRegOffset(xmmreg), e ) );
601 }
602
putXMMRegLane64(UInt xmmreg,Int laneno,IRExpr * e)603 static void putXMMRegLane64 ( UInt xmmreg, Int laneno, IRExpr* e )
604 {
605 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64);
606 stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg,laneno), e ) );
607 }
608
putXMMRegLane64F(UInt xmmreg,Int laneno,IRExpr * e)609 static void putXMMRegLane64F ( UInt xmmreg, Int laneno, IRExpr* e )
610 {
611 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F64);
612 stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg,laneno), e ) );
613 }
614
putXMMRegLane32F(UInt xmmreg,Int laneno,IRExpr * e)615 static void putXMMRegLane32F ( UInt xmmreg, Int laneno, IRExpr* e )
616 {
617 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F32);
618 stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg,laneno), e ) );
619 }
620
putXMMRegLane32(UInt xmmreg,Int laneno,IRExpr * e)621 static void putXMMRegLane32 ( UInt xmmreg, Int laneno, IRExpr* e )
622 {
623 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I32);
624 stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg,laneno), e ) );
625 }
626
putXMMRegLane16(UInt xmmreg,Int laneno,IRExpr * e)627 static void putXMMRegLane16 ( UInt xmmreg, Int laneno, IRExpr* e )
628 {
629 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I16);
630 stmt( IRStmt_Put( xmmGuestRegLane16offset(xmmreg,laneno), e ) );
631 }
632
assign(IRTemp dst,IRExpr * e)633 static void assign ( IRTemp dst, IRExpr* e )
634 {
635 stmt( IRStmt_WrTmp(dst, e) );
636 }
637
storeLE(IRExpr * addr,IRExpr * data)638 static void storeLE ( IRExpr* addr, IRExpr* data )
639 {
640 stmt( IRStmt_Store(Iend_LE, addr, data) );
641 }
642
unop(IROp op,IRExpr * a)643 static IRExpr* unop ( IROp op, IRExpr* a )
644 {
645 return IRExpr_Unop(op, a);
646 }
647
binop(IROp op,IRExpr * a1,IRExpr * a2)648 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
649 {
650 return IRExpr_Binop(op, a1, a2);
651 }
652
triop(IROp op,IRExpr * a1,IRExpr * a2,IRExpr * a3)653 static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 )
654 {
655 return IRExpr_Triop(op, a1, a2, a3);
656 }
657
mkexpr(IRTemp tmp)658 static IRExpr* mkexpr ( IRTemp tmp )
659 {
660 return IRExpr_RdTmp(tmp);
661 }
662
mkU8(UInt i)663 static IRExpr* mkU8 ( UInt i )
664 {
665 vassert(i < 256);
666 return IRExpr_Const(IRConst_U8( (UChar)i ));
667 }
668
mkU16(UInt i)669 static IRExpr* mkU16 ( UInt i )
670 {
671 vassert(i < 65536);
672 return IRExpr_Const(IRConst_U16( (UShort)i ));
673 }
674
mkU32(UInt i)675 static IRExpr* mkU32 ( UInt i )
676 {
677 return IRExpr_Const(IRConst_U32(i));
678 }
679
mkU64(ULong i)680 static IRExpr* mkU64 ( ULong i )
681 {
682 return IRExpr_Const(IRConst_U64(i));
683 }
684
mkU(IRType ty,UInt i)685 static IRExpr* mkU ( IRType ty, UInt i )
686 {
687 if (ty == Ity_I8) return mkU8(i);
688 if (ty == Ity_I16) return mkU16(i);
689 if (ty == Ity_I32) return mkU32(i);
690 /* If this panics, it usually means you passed a size (1,2,4)
691 value as the IRType, rather than a real IRType. */
692 vpanic("mkU(x86)");
693 }
694
mkV128(UShort mask)695 static IRExpr* mkV128 ( UShort mask )
696 {
697 return IRExpr_Const(IRConst_V128(mask));
698 }
699
loadLE(IRType ty,IRExpr * addr)700 static IRExpr* loadLE ( IRType ty, IRExpr* addr )
701 {
702 return IRExpr_Load(Iend_LE, ty, addr);
703 }
704
mkSizedOp(IRType ty,IROp op8)705 static IROp mkSizedOp ( IRType ty, IROp op8 )
706 {
707 Int adj;
708 vassert(ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32);
709 vassert(op8 == Iop_Add8 || op8 == Iop_Sub8
710 || op8 == Iop_Mul8
711 || op8 == Iop_Or8 || op8 == Iop_And8 || op8 == Iop_Xor8
712 || op8 == Iop_Shl8 || op8 == Iop_Shr8 || op8 == Iop_Sar8
713 || op8 == Iop_CmpEQ8 || op8 == Iop_CmpNE8
714 || op8 == Iop_CasCmpNE8
715 || op8 == Iop_ExpCmpNE8
716 || op8 == Iop_Not8);
717 adj = ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1 : 2);
718 return adj + op8;
719 }
720
mkWidenOp(Int szSmall,Int szBig,Bool signd)721 static IROp mkWidenOp ( Int szSmall, Int szBig, Bool signd )
722 {
723 if (szSmall == 1 && szBig == 4) {
724 return signd ? Iop_8Sto32 : Iop_8Uto32;
725 }
726 if (szSmall == 1 && szBig == 2) {
727 return signd ? Iop_8Sto16 : Iop_8Uto16;
728 }
729 if (szSmall == 2 && szBig == 4) {
730 return signd ? Iop_16Sto32 : Iop_16Uto32;
731 }
732 vpanic("mkWidenOp(x86,guest)");
733 }
734
mkAnd1(IRExpr * x,IRExpr * y)735 static IRExpr* mkAnd1 ( IRExpr* x, IRExpr* y )
736 {
737 vassert(typeOfIRExpr(irsb->tyenv,x) == Ity_I1);
738 vassert(typeOfIRExpr(irsb->tyenv,y) == Ity_I1);
739 return unop(Iop_32to1,
740 binop(Iop_And32,
741 unop(Iop_1Uto32,x),
742 unop(Iop_1Uto32,y)));
743 }
744
745 /* Generate a compare-and-swap operation, operating on memory at
746 'addr'. The expected value is 'expVal' and the new value is
747 'newVal'. If the operation fails, then transfer control (with a
748 no-redir jump (XXX no -- see comment at top of this file)) to
749 'restart_point', which is presumably the address of the guest
750 instruction again -- retrying, essentially. */
casLE(IRExpr * addr,IRExpr * expVal,IRExpr * newVal,Addr32 restart_point)751 static void casLE ( IRExpr* addr, IRExpr* expVal, IRExpr* newVal,
752 Addr32 restart_point )
753 {
754 IRCAS* cas;
755 IRType tyE = typeOfIRExpr(irsb->tyenv, expVal);
756 IRType tyN = typeOfIRExpr(irsb->tyenv, newVal);
757 IRTemp oldTmp = newTemp(tyE);
758 IRTemp expTmp = newTemp(tyE);
759 vassert(tyE == tyN);
760 vassert(tyE == Ity_I32 || tyE == Ity_I16 || tyE == Ity_I8);
761 assign(expTmp, expVal);
762 cas = mkIRCAS( IRTemp_INVALID, oldTmp, Iend_LE, addr,
763 NULL, mkexpr(expTmp), NULL, newVal );
764 stmt( IRStmt_CAS(cas) );
765 stmt( IRStmt_Exit(
766 binop( mkSizedOp(tyE,Iop_CasCmpNE8),
767 mkexpr(oldTmp), mkexpr(expTmp) ),
768 Ijk_Boring, /*Ijk_NoRedir*/
769 IRConst_U32( restart_point ),
770 OFFB_EIP
771 ));
772 }
773
774
775 /*------------------------------------------------------------*/
776 /*--- Helpers for %eflags. ---*/
777 /*------------------------------------------------------------*/
778
779 /* -------------- Evaluating the flags-thunk. -------------- */
780
781 /* Build IR to calculate all the eflags from stored
782 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
783 Ity_I32. */
mk_x86g_calculate_eflags_all(void)784 static IRExpr* mk_x86g_calculate_eflags_all ( void )
785 {
786 IRExpr** args
787 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I32),
788 IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
789 IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
790 IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
791 IRExpr* call
792 = mkIRExprCCall(
793 Ity_I32,
794 0/*regparm*/,
795 "x86g_calculate_eflags_all", &x86g_calculate_eflags_all,
796 args
797 );
798 /* Exclude OP and NDEP from definedness checking. We're only
799 interested in DEP1 and DEP2. */
800 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
801 return call;
802 }
803
804 /* Build IR to calculate some particular condition from stored
805 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
806 Ity_Bit. */
mk_x86g_calculate_condition(X86Condcode cond)807 static IRExpr* mk_x86g_calculate_condition ( X86Condcode cond )
808 {
809 IRExpr** args
810 = mkIRExprVec_5( mkU32(cond),
811 IRExpr_Get(OFFB_CC_OP, Ity_I32),
812 IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
813 IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
814 IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
815 IRExpr* call
816 = mkIRExprCCall(
817 Ity_I32,
818 0/*regparm*/,
819 "x86g_calculate_condition", &x86g_calculate_condition,
820 args
821 );
822 /* Exclude the requested condition, OP and NDEP from definedness
823 checking. We're only interested in DEP1 and DEP2. */
824 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<1) | (1<<4);
825 return unop(Iop_32to1, call);
826 }
827
828 /* Build IR to calculate just the carry flag from stored
829 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: Ity_I32. */
mk_x86g_calculate_eflags_c(void)830 static IRExpr* mk_x86g_calculate_eflags_c ( void )
831 {
832 IRExpr** args
833 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I32),
834 IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
835 IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
836 IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
837 IRExpr* call
838 = mkIRExprCCall(
839 Ity_I32,
840 3/*regparm*/,
841 "x86g_calculate_eflags_c", &x86g_calculate_eflags_c,
842 args
843 );
844 /* Exclude OP and NDEP from definedness checking. We're only
845 interested in DEP1 and DEP2. */
846 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
847 return call;
848 }
849
850
851 /* -------------- Building the flags-thunk. -------------- */
852
853 /* The machinery in this section builds the flag-thunk following a
854 flag-setting operation. Hence the various setFlags_* functions.
855 */
856
isAddSub(IROp op8)857 static Bool isAddSub ( IROp op8 )
858 {
859 return toBool(op8 == Iop_Add8 || op8 == Iop_Sub8);
860 }
861
isLogic(IROp op8)862 static Bool isLogic ( IROp op8 )
863 {
864 return toBool(op8 == Iop_And8 || op8 == Iop_Or8 || op8 == Iop_Xor8);
865 }
866
867 /* U-widen 8/16/32 bit int expr to 32. */
widenUto32(IRExpr * e)868 static IRExpr* widenUto32 ( IRExpr* e )
869 {
870 switch (typeOfIRExpr(irsb->tyenv,e)) {
871 case Ity_I32: return e;
872 case Ity_I16: return unop(Iop_16Uto32,e);
873 case Ity_I8: return unop(Iop_8Uto32,e);
874 default: vpanic("widenUto32");
875 }
876 }
877
878 /* S-widen 8/16/32 bit int expr to 32. */
widenSto32(IRExpr * e)879 static IRExpr* widenSto32 ( IRExpr* e )
880 {
881 switch (typeOfIRExpr(irsb->tyenv,e)) {
882 case Ity_I32: return e;
883 case Ity_I16: return unop(Iop_16Sto32,e);
884 case Ity_I8: return unop(Iop_8Sto32,e);
885 default: vpanic("widenSto32");
886 }
887 }
888
889 /* Narrow 8/16/32 bit int expr to 8/16/32. Clearly only some
890 of these combinations make sense. */
narrowTo(IRType dst_ty,IRExpr * e)891 static IRExpr* narrowTo ( IRType dst_ty, IRExpr* e )
892 {
893 IRType src_ty = typeOfIRExpr(irsb->tyenv,e);
894 if (src_ty == dst_ty)
895 return e;
896 if (src_ty == Ity_I32 && dst_ty == Ity_I16)
897 return unop(Iop_32to16, e);
898 if (src_ty == Ity_I32 && dst_ty == Ity_I8)
899 return unop(Iop_32to8, e);
900
901 vex_printf("\nsrc, dst tys are: ");
902 ppIRType(src_ty);
903 vex_printf(", ");
904 ppIRType(dst_ty);
905 vex_printf("\n");
906 vpanic("narrowTo(x86)");
907 }
908
909
910 /* Set the flags thunk OP, DEP1 and DEP2 fields. The supplied op is
911 auto-sized up to the real op. */
912
913 static
setFlags_DEP1_DEP2(IROp op8,IRTemp dep1,IRTemp dep2,IRType ty)914 void setFlags_DEP1_DEP2 ( IROp op8, IRTemp dep1, IRTemp dep2, IRType ty )
915 {
916 Int ccOp = ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1 : 2);
917
918 vassert(ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32);
919
920 switch (op8) {
921 case Iop_Add8: ccOp += X86G_CC_OP_ADDB; break;
922 case Iop_Sub8: ccOp += X86G_CC_OP_SUBB; break;
923 default: ppIROp(op8);
924 vpanic("setFlags_DEP1_DEP2(x86)");
925 }
926 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(ccOp)) );
927 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto32(mkexpr(dep1))) );
928 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto32(mkexpr(dep2))) );
929 /* Set NDEP even though it isn't used. This makes redundant-PUT
930 elimination of previous stores to this field work better. */
931 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
932 }
933
934
935 /* Set the OP and DEP1 fields only, and write zero to DEP2. */
936
937 static
setFlags_DEP1(IROp op8,IRTemp dep1,IRType ty)938 void setFlags_DEP1 ( IROp op8, IRTemp dep1, IRType ty )
939 {
940 Int ccOp = ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1 : 2);
941
942 vassert(ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32);
943
944 switch (op8) {
945 case Iop_Or8:
946 case Iop_And8:
947 case Iop_Xor8: ccOp += X86G_CC_OP_LOGICB; break;
948 default: ppIROp(op8);
949 vpanic("setFlags_DEP1(x86)");
950 }
951 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(ccOp)) );
952 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto32(mkexpr(dep1))) );
953 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0)) );
954 /* Set NDEP even though it isn't used. This makes redundant-PUT
955 elimination of previous stores to this field work better. */
956 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
957 }
958
959
960 /* For shift operations, we put in the result and the undershifted
961 result. Except if the shift amount is zero, the thunk is left
962 unchanged. */
963
setFlags_DEP1_DEP2_shift(IROp op32,IRTemp res,IRTemp resUS,IRType ty,IRTemp guard)964 static void setFlags_DEP1_DEP2_shift ( IROp op32,
965 IRTemp res,
966 IRTemp resUS,
967 IRType ty,
968 IRTemp guard )
969 {
970 Int ccOp = ty==Ity_I8 ? 2 : (ty==Ity_I16 ? 1 : 0);
971
972 vassert(ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32);
973 vassert(guard);
974
975 /* Both kinds of right shifts are handled by the same thunk
976 operation. */
977 switch (op32) {
978 case Iop_Shr32:
979 case Iop_Sar32: ccOp = X86G_CC_OP_SHRL - ccOp; break;
980 case Iop_Shl32: ccOp = X86G_CC_OP_SHLL - ccOp; break;
981 default: ppIROp(op32);
982 vpanic("setFlags_DEP1_DEP2_shift(x86)");
983 }
984
985 /* guard :: Ity_I8. We need to convert it to I1. */
986 IRTemp guardB = newTemp(Ity_I1);
987 assign( guardB, binop(Iop_CmpNE8, mkexpr(guard), mkU8(0)) );
988
989 /* DEP1 contains the result, DEP2 contains the undershifted value. */
990 stmt( IRStmt_Put( OFFB_CC_OP,
991 IRExpr_ITE( mkexpr(guardB),
992 mkU32(ccOp),
993 IRExpr_Get(OFFB_CC_OP,Ity_I32) ) ));
994 stmt( IRStmt_Put( OFFB_CC_DEP1,
995 IRExpr_ITE( mkexpr(guardB),
996 widenUto32(mkexpr(res)),
997 IRExpr_Get(OFFB_CC_DEP1,Ity_I32) ) ));
998 stmt( IRStmt_Put( OFFB_CC_DEP2,
999 IRExpr_ITE( mkexpr(guardB),
1000 widenUto32(mkexpr(resUS)),
1001 IRExpr_Get(OFFB_CC_DEP2,Ity_I32) ) ));
1002 /* Set NDEP even though it isn't used. This makes redundant-PUT
1003 elimination of previous stores to this field work better. */
1004 stmt( IRStmt_Put( OFFB_CC_NDEP,
1005 IRExpr_ITE( mkexpr(guardB),
1006 mkU32(0),
1007 IRExpr_Get(OFFB_CC_NDEP,Ity_I32) ) ));
1008 }
1009
1010
1011 /* For the inc/dec case, we store in DEP1 the result value and in NDEP
1012 the former value of the carry flag, which unfortunately we have to
1013 compute. */
1014
setFlags_INC_DEC(Bool inc,IRTemp res,IRType ty)1015 static void setFlags_INC_DEC ( Bool inc, IRTemp res, IRType ty )
1016 {
1017 Int ccOp = inc ? X86G_CC_OP_INCB : X86G_CC_OP_DECB;
1018
1019 ccOp += ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1 : 2);
1020 vassert(ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32);
1021
1022 /* This has to come first, because calculating the C flag
1023 may require reading all four thunk fields. */
1024 stmt( IRStmt_Put( OFFB_CC_NDEP, mk_x86g_calculate_eflags_c()) );
1025 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(ccOp)) );
1026 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto32(mkexpr(res))) );
1027 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0)) );
1028 }
1029
1030
1031 /* Multiplies are pretty much like add and sub: DEP1 and DEP2 hold the
1032 two arguments. */
1033
1034 static
setFlags_MUL(IRType ty,IRTemp arg1,IRTemp arg2,UInt base_op)1035 void setFlags_MUL ( IRType ty, IRTemp arg1, IRTemp arg2, UInt base_op )
1036 {
1037 switch (ty) {
1038 case Ity_I8:
1039 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(base_op+0) ) );
1040 break;
1041 case Ity_I16:
1042 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(base_op+1) ) );
1043 break;
1044 case Ity_I32:
1045 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(base_op+2) ) );
1046 break;
1047 default:
1048 vpanic("setFlags_MUL(x86)");
1049 }
1050 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto32(mkexpr(arg1)) ));
1051 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto32(mkexpr(arg2)) ));
1052 /* Set NDEP even though it isn't used. This makes redundant-PUT
1053 elimination of previous stores to this field work better. */
1054 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
1055 }
1056
1057
1058 /* -------------- Condition codes. -------------- */
1059
1060 /* Condition codes, using the Intel encoding. */
1061
name_X86Condcode(X86Condcode cond)1062 static const HChar* name_X86Condcode ( X86Condcode cond )
1063 {
1064 switch (cond) {
1065 case X86CondO: return "o";
1066 case X86CondNO: return "no";
1067 case X86CondB: return "b";
1068 case X86CondNB: return "nb";
1069 case X86CondZ: return "z";
1070 case X86CondNZ: return "nz";
1071 case X86CondBE: return "be";
1072 case X86CondNBE: return "nbe";
1073 case X86CondS: return "s";
1074 case X86CondNS: return "ns";
1075 case X86CondP: return "p";
1076 case X86CondNP: return "np";
1077 case X86CondL: return "l";
1078 case X86CondNL: return "nl";
1079 case X86CondLE: return "le";
1080 case X86CondNLE: return "nle";
1081 case X86CondAlways: return "ALWAYS";
1082 default: vpanic("name_X86Condcode");
1083 }
1084 }
1085
1086 static
positiveIse_X86Condcode(X86Condcode cond,Bool * needInvert)1087 X86Condcode positiveIse_X86Condcode ( X86Condcode cond,
1088 Bool* needInvert )
1089 {
1090 vassert(cond >= X86CondO && cond <= X86CondNLE);
1091 if (cond & 1) {
1092 *needInvert = True;
1093 return cond-1;
1094 } else {
1095 *needInvert = False;
1096 return cond;
1097 }
1098 }
1099
1100
1101 /* -------------- Helpers for ADD/SUB with carry. -------------- */
1102
1103 /* Given ta1, ta2 and tres, compute tres = ADC(ta1,ta2) and set flags
1104 appropriately.
1105
1106 Optionally, generate a store for the 'tres' value. This can either
1107 be a normal store, or it can be a cas-with-possible-failure style
1108 store:
1109
1110 if taddr is IRTemp_INVALID, then no store is generated.
1111
1112 if taddr is not IRTemp_INVALID, then a store (using taddr as
1113 the address) is generated:
1114
1115 if texpVal is IRTemp_INVALID then a normal store is
1116 generated, and restart_point must be zero (it is irrelevant).
1117
1118 if texpVal is not IRTemp_INVALID then a cas-style store is
1119 generated. texpVal is the expected value, restart_point
1120 is the restart point if the store fails, and texpVal must
1121 have the same type as tres.
1122 */
helper_ADC(Int sz,IRTemp tres,IRTemp ta1,IRTemp ta2,IRTemp taddr,IRTemp texpVal,Addr32 restart_point)1123 static void helper_ADC ( Int sz,
1124 IRTemp tres, IRTemp ta1, IRTemp ta2,
1125 /* info about optional store: */
1126 IRTemp taddr, IRTemp texpVal, Addr32 restart_point )
1127 {
1128 UInt thunkOp;
1129 IRType ty = szToITy(sz);
1130 IRTemp oldc = newTemp(Ity_I32);
1131 IRTemp oldcn = newTemp(ty);
1132 IROp plus = mkSizedOp(ty, Iop_Add8);
1133 IROp xor = mkSizedOp(ty, Iop_Xor8);
1134
1135 vassert(typeOfIRTemp(irsb->tyenv, tres) == ty);
1136 vassert(sz == 1 || sz == 2 || sz == 4);
1137 thunkOp = sz==4 ? X86G_CC_OP_ADCL
1138 : (sz==2 ? X86G_CC_OP_ADCW : X86G_CC_OP_ADCB);
1139
1140 /* oldc = old carry flag, 0 or 1 */
1141 assign( oldc, binop(Iop_And32,
1142 mk_x86g_calculate_eflags_c(),
1143 mkU32(1)) );
1144
1145 assign( oldcn, narrowTo(ty, mkexpr(oldc)) );
1146
1147 assign( tres, binop(plus,
1148 binop(plus,mkexpr(ta1),mkexpr(ta2)),
1149 mkexpr(oldcn)) );
1150
1151 /* Possibly generate a store of 'tres' to 'taddr'. See comment at
1152 start of this function. */
1153 if (taddr != IRTemp_INVALID) {
1154 if (texpVal == IRTemp_INVALID) {
1155 vassert(restart_point == 0);
1156 storeLE( mkexpr(taddr), mkexpr(tres) );
1157 } else {
1158 vassert(typeOfIRTemp(irsb->tyenv, texpVal) == ty);
1159 /* .. and hence 'texpVal' has the same type as 'tres'. */
1160 casLE( mkexpr(taddr),
1161 mkexpr(texpVal), mkexpr(tres), restart_point );
1162 }
1163 }
1164
1165 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(thunkOp) ) );
1166 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto32(mkexpr(ta1)) ));
1167 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto32(binop(xor, mkexpr(ta2),
1168 mkexpr(oldcn)) )) );
1169 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(oldc) ) );
1170 }
1171
1172
1173 /* Given ta1, ta2 and tres, compute tres = SBB(ta1,ta2) and set flags
1174 appropriately. As with helper_ADC, possibly generate a store of
1175 the result -- see comments on helper_ADC for details.
1176 */
helper_SBB(Int sz,IRTemp tres,IRTemp ta1,IRTemp ta2,IRTemp taddr,IRTemp texpVal,Addr32 restart_point)1177 static void helper_SBB ( Int sz,
1178 IRTemp tres, IRTemp ta1, IRTemp ta2,
1179 /* info about optional store: */
1180 IRTemp taddr, IRTemp texpVal, Addr32 restart_point )
1181 {
1182 UInt thunkOp;
1183 IRType ty = szToITy(sz);
1184 IRTemp oldc = newTemp(Ity_I32);
1185 IRTemp oldcn = newTemp(ty);
1186 IROp minus = mkSizedOp(ty, Iop_Sub8);
1187 IROp xor = mkSizedOp(ty, Iop_Xor8);
1188
1189 vassert(typeOfIRTemp(irsb->tyenv, tres) == ty);
1190 vassert(sz == 1 || sz == 2 || sz == 4);
1191 thunkOp = sz==4 ? X86G_CC_OP_SBBL
1192 : (sz==2 ? X86G_CC_OP_SBBW : X86G_CC_OP_SBBB);
1193
1194 /* oldc = old carry flag, 0 or 1 */
1195 assign( oldc, binop(Iop_And32,
1196 mk_x86g_calculate_eflags_c(),
1197 mkU32(1)) );
1198
1199 assign( oldcn, narrowTo(ty, mkexpr(oldc)) );
1200
1201 assign( tres, binop(minus,
1202 binop(minus,mkexpr(ta1),mkexpr(ta2)),
1203 mkexpr(oldcn)) );
1204
1205 /* Possibly generate a store of 'tres' to 'taddr'. See comment at
1206 start of this function. */
1207 if (taddr != IRTemp_INVALID) {
1208 if (texpVal == IRTemp_INVALID) {
1209 vassert(restart_point == 0);
1210 storeLE( mkexpr(taddr), mkexpr(tres) );
1211 } else {
1212 vassert(typeOfIRTemp(irsb->tyenv, texpVal) == ty);
1213 /* .. and hence 'texpVal' has the same type as 'tres'. */
1214 casLE( mkexpr(taddr),
1215 mkexpr(texpVal), mkexpr(tres), restart_point );
1216 }
1217 }
1218
1219 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(thunkOp) ) );
1220 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto32(mkexpr(ta1) )) );
1221 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto32(binop(xor, mkexpr(ta2),
1222 mkexpr(oldcn)) )) );
1223 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(oldc) ) );
1224 }
1225
1226
1227 /* -------------- Helpers for disassembly printing. -------------- */
1228
nameGrp1(Int opc_aux)1229 static const HChar* nameGrp1 ( Int opc_aux )
1230 {
1231 static const HChar* grp1_names[8]
1232 = { "add", "or", "adc", "sbb", "and", "sub", "xor", "cmp" };
1233 if (opc_aux < 0 || opc_aux > 7) vpanic("nameGrp1(x86)");
1234 return grp1_names[opc_aux];
1235 }
1236
nameGrp2(Int opc_aux)1237 static const HChar* nameGrp2 ( Int opc_aux )
1238 {
1239 static const HChar* grp2_names[8]
1240 = { "rol", "ror", "rcl", "rcr", "shl", "shr", "shl", "sar" };
1241 if (opc_aux < 0 || opc_aux > 7) vpanic("nameGrp2(x86)");
1242 return grp2_names[opc_aux];
1243 }
1244
nameGrp4(Int opc_aux)1245 static const HChar* nameGrp4 ( Int opc_aux )
1246 {
1247 static const HChar* grp4_names[8]
1248 = { "inc", "dec", "???", "???", "???", "???", "???", "???" };
1249 if (opc_aux < 0 || opc_aux > 1) vpanic("nameGrp4(x86)");
1250 return grp4_names[opc_aux];
1251 }
1252
nameGrp5(Int opc_aux)1253 static const HChar* nameGrp5 ( Int opc_aux )
1254 {
1255 static const HChar* grp5_names[8]
1256 = { "inc", "dec", "call*", "call*", "jmp*", "jmp*", "push", "???" };
1257 if (opc_aux < 0 || opc_aux > 6) vpanic("nameGrp5(x86)");
1258 return grp5_names[opc_aux];
1259 }
1260
nameGrp8(Int opc_aux)1261 static const HChar* nameGrp8 ( Int opc_aux )
1262 {
1263 static const HChar* grp8_names[8]
1264 = { "???", "???", "???", "???", "bt", "bts", "btr", "btc" };
1265 if (opc_aux < 4 || opc_aux > 7) vpanic("nameGrp8(x86)");
1266 return grp8_names[opc_aux];
1267 }
1268
nameIReg(Int size,Int reg)1269 static const HChar* nameIReg ( Int size, Int reg )
1270 {
1271 static const HChar* ireg32_names[8]
1272 = { "%eax", "%ecx", "%edx", "%ebx",
1273 "%esp", "%ebp", "%esi", "%edi" };
1274 static const HChar* ireg16_names[8]
1275 = { "%ax", "%cx", "%dx", "%bx", "%sp", "%bp", "%si", "%di" };
1276 static const HChar* ireg8_names[8]
1277 = { "%al", "%cl", "%dl", "%bl",
1278 "%ah{sp}", "%ch{bp}", "%dh{si}", "%bh{di}" };
1279 if (reg < 0 || reg > 7) goto bad;
1280 switch (size) {
1281 case 4: return ireg32_names[reg];
1282 case 2: return ireg16_names[reg];
1283 case 1: return ireg8_names[reg];
1284 }
1285 bad:
1286 vpanic("nameIReg(X86)");
1287 return NULL; /*notreached*/
1288 }
1289
nameSReg(UInt sreg)1290 static const HChar* nameSReg ( UInt sreg )
1291 {
1292 switch (sreg) {
1293 case R_ES: return "%es";
1294 case R_CS: return "%cs";
1295 case R_SS: return "%ss";
1296 case R_DS: return "%ds";
1297 case R_FS: return "%fs";
1298 case R_GS: return "%gs";
1299 default: vpanic("nameSReg(x86)");
1300 }
1301 }
1302
nameMMXReg(Int mmxreg)1303 static const HChar* nameMMXReg ( Int mmxreg )
1304 {
1305 static const HChar* mmx_names[8]
1306 = { "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7" };
1307 if (mmxreg < 0 || mmxreg > 7) vpanic("nameMMXReg(x86,guest)");
1308 return mmx_names[mmxreg];
1309 }
1310
nameXMMReg(Int xmmreg)1311 static const HChar* nameXMMReg ( Int xmmreg )
1312 {
1313 static const HChar* xmm_names[8]
1314 = { "%xmm0", "%xmm1", "%xmm2", "%xmm3",
1315 "%xmm4", "%xmm5", "%xmm6", "%xmm7" };
1316 if (xmmreg < 0 || xmmreg > 7) vpanic("name_of_xmm_reg");
1317 return xmm_names[xmmreg];
1318 }
1319
nameMMXGran(Int gran)1320 static const HChar* nameMMXGran ( Int gran )
1321 {
1322 switch (gran) {
1323 case 0: return "b";
1324 case 1: return "w";
1325 case 2: return "d";
1326 case 3: return "q";
1327 default: vpanic("nameMMXGran(x86,guest)");
1328 }
1329 }
1330
nameISize(Int size)1331 static HChar nameISize ( Int size )
1332 {
1333 switch (size) {
1334 case 4: return 'l';
1335 case 2: return 'w';
1336 case 1: return 'b';
1337 default: vpanic("nameISize(x86)");
1338 }
1339 }
1340
1341
1342 /*------------------------------------------------------------*/
1343 /*--- JMP helpers ---*/
1344 /*------------------------------------------------------------*/
1345
jmp_lit(DisResult * dres,IRJumpKind kind,Addr32 d32)1346 static void jmp_lit( /*MOD*/DisResult* dres,
1347 IRJumpKind kind, Addr32 d32 )
1348 {
1349 vassert(dres->whatNext == Dis_Continue);
1350 vassert(dres->len == 0);
1351 vassert(dres->continueAt == 0);
1352 vassert(dres->jk_StopHere == Ijk_INVALID);
1353 dres->whatNext = Dis_StopHere;
1354 dres->jk_StopHere = kind;
1355 stmt( IRStmt_Put( OFFB_EIP, mkU32(d32) ) );
1356 }
1357
jmp_treg(DisResult * dres,IRJumpKind kind,IRTemp t)1358 static void jmp_treg( /*MOD*/DisResult* dres,
1359 IRJumpKind kind, IRTemp t )
1360 {
1361 vassert(dres->whatNext == Dis_Continue);
1362 vassert(dres->len == 0);
1363 vassert(dres->continueAt == 0);
1364 vassert(dres->jk_StopHere == Ijk_INVALID);
1365 dres->whatNext = Dis_StopHere;
1366 dres->jk_StopHere = kind;
1367 stmt( IRStmt_Put( OFFB_EIP, mkexpr(t) ) );
1368 }
1369
1370 static
jcc_01(DisResult * dres,X86Condcode cond,Addr32 d32_false,Addr32 d32_true)1371 void jcc_01( /*MOD*/DisResult* dres,
1372 X86Condcode cond, Addr32 d32_false, Addr32 d32_true )
1373 {
1374 Bool invert;
1375 X86Condcode condPos;
1376 vassert(dres->whatNext == Dis_Continue);
1377 vassert(dres->len == 0);
1378 vassert(dres->continueAt == 0);
1379 vassert(dres->jk_StopHere == Ijk_INVALID);
1380 dres->whatNext = Dis_StopHere;
1381 dres->jk_StopHere = Ijk_Boring;
1382 condPos = positiveIse_X86Condcode ( cond, &invert );
1383 if (invert) {
1384 stmt( IRStmt_Exit( mk_x86g_calculate_condition(condPos),
1385 Ijk_Boring,
1386 IRConst_U32(d32_false),
1387 OFFB_EIP ) );
1388 stmt( IRStmt_Put( OFFB_EIP, mkU32(d32_true) ) );
1389 } else {
1390 stmt( IRStmt_Exit( mk_x86g_calculate_condition(condPos),
1391 Ijk_Boring,
1392 IRConst_U32(d32_true),
1393 OFFB_EIP ) );
1394 stmt( IRStmt_Put( OFFB_EIP, mkU32(d32_false) ) );
1395 }
1396 }
1397
1398
1399 /*------------------------------------------------------------*/
1400 /*--- Disassembling addressing modes ---*/
1401 /*------------------------------------------------------------*/
1402
1403 static
sorbTxt(UChar sorb)1404 const HChar* sorbTxt ( UChar sorb )
1405 {
1406 switch (sorb) {
1407 case 0: return ""; /* no override */
1408 case 0x3E: return "%ds";
1409 case 0x26: return "%es:";
1410 case 0x64: return "%fs:";
1411 case 0x65: return "%gs:";
1412 case 0x36: return "%ss:";
1413 default: vpanic("sorbTxt(x86,guest)");
1414 }
1415 }
1416
1417
1418 /* 'virtual' is an IRExpr* holding a virtual address. Convert it to a
1419 linear address by adding any required segment override as indicated
1420 by sorb. */
1421 static
handleSegOverride(UChar sorb,IRExpr * virtual)1422 IRExpr* handleSegOverride ( UChar sorb, IRExpr* virtual )
1423 {
1424 Int sreg;
1425 IRType hWordTy;
1426 IRTemp ldt_ptr, gdt_ptr, seg_selector, r64;
1427
1428 if (sorb == 0)
1429 /* the common case - no override */
1430 return virtual;
1431
1432 switch (sorb) {
1433 case 0x3E: sreg = R_DS; break;
1434 case 0x26: sreg = R_ES; break;
1435 case 0x64: sreg = R_FS; break;
1436 case 0x65: sreg = R_GS; break;
1437 case 0x36: sreg = R_SS; break;
1438 default: vpanic("handleSegOverride(x86,guest)");
1439 }
1440
1441 hWordTy = sizeof(HWord)==4 ? Ity_I32 : Ity_I64;
1442
1443 seg_selector = newTemp(Ity_I32);
1444 ldt_ptr = newTemp(hWordTy);
1445 gdt_ptr = newTemp(hWordTy);
1446 r64 = newTemp(Ity_I64);
1447
1448 assign( seg_selector, unop(Iop_16Uto32, getSReg(sreg)) );
1449 assign( ldt_ptr, IRExpr_Get( OFFB_LDT, hWordTy ));
1450 assign( gdt_ptr, IRExpr_Get( OFFB_GDT, hWordTy ));
1451
1452 /*
1453 Call this to do the translation and limit checks:
1454 ULong x86g_use_seg_selector ( HWord ldt, HWord gdt,
1455 UInt seg_selector, UInt virtual_addr )
1456 */
1457 assign(
1458 r64,
1459 mkIRExprCCall(
1460 Ity_I64,
1461 0/*regparms*/,
1462 "x86g_use_seg_selector",
1463 &x86g_use_seg_selector,
1464 mkIRExprVec_4( mkexpr(ldt_ptr), mkexpr(gdt_ptr),
1465 mkexpr(seg_selector), virtual)
1466 )
1467 );
1468
1469 /* If the high 32 of the result are non-zero, there was a
1470 failure in address translation. In which case, make a
1471 quick exit.
1472 */
1473 stmt(
1474 IRStmt_Exit(
1475 binop(Iop_CmpNE32, unop(Iop_64HIto32, mkexpr(r64)), mkU32(0)),
1476 Ijk_MapFail,
1477 IRConst_U32( guest_EIP_curr_instr ),
1478 OFFB_EIP
1479 )
1480 );
1481
1482 /* otherwise, here's the translated result. */
1483 return unop(Iop_64to32, mkexpr(r64));
1484 }
1485
1486
1487 /* Generate IR to calculate an address indicated by a ModRM and
1488 following SIB bytes. The expression, and the number of bytes in
1489 the address mode, are returned. Note that this fn should not be
1490 called if the R/M part of the address denotes a register instead of
1491 memory. If print_codegen is true, text of the addressing mode is
1492 placed in buf.
1493
1494 The computed address is stored in a new tempreg, and the
1495 identity of the tempreg is returned. */
1496
disAMode_copy2tmp(IRExpr * addr32)1497 static IRTemp disAMode_copy2tmp ( IRExpr* addr32 )
1498 {
1499 IRTemp tmp = newTemp(Ity_I32);
1500 assign( tmp, addr32 );
1501 return tmp;
1502 }
1503
1504 static
disAMode(Int * len,UChar sorb,Int delta,HChar * buf)1505 IRTemp disAMode ( Int* len, UChar sorb, Int delta, HChar* buf )
1506 {
1507 UChar mod_reg_rm = getIByte(delta);
1508 delta++;
1509
1510 buf[0] = (UChar)0;
1511
1512 /* squeeze out the reg field from mod_reg_rm, since a 256-entry
1513 jump table seems a bit excessive.
1514 */
1515 mod_reg_rm &= 0xC7; /* is now XX000YYY */
1516 mod_reg_rm = toUChar(mod_reg_rm | (mod_reg_rm >> 3));
1517 /* is now XX0XXYYY */
1518 mod_reg_rm &= 0x1F; /* is now 000XXYYY */
1519 switch (mod_reg_rm) {
1520
1521 /* (%eax) .. (%edi), not including (%esp) or (%ebp).
1522 --> GET %reg, t
1523 */
1524 case 0x00: case 0x01: case 0x02: case 0x03:
1525 /* ! 04 */ /* ! 05 */ case 0x06: case 0x07:
1526 { UChar rm = mod_reg_rm;
1527 DIS(buf, "%s(%s)", sorbTxt(sorb), nameIReg(4,rm));
1528 *len = 1;
1529 return disAMode_copy2tmp(
1530 handleSegOverride(sorb, getIReg(4,rm)));
1531 }
1532
1533 /* d8(%eax) ... d8(%edi), not including d8(%esp)
1534 --> GET %reg, t ; ADDL d8, t
1535 */
1536 case 0x08: case 0x09: case 0x0A: case 0x0B:
1537 /* ! 0C */ case 0x0D: case 0x0E: case 0x0F:
1538 { UChar rm = toUChar(mod_reg_rm & 7);
1539 UInt d = getSDisp8(delta);
1540 DIS(buf, "%s%d(%s)", sorbTxt(sorb), (Int)d, nameIReg(4,rm));
1541 *len = 2;
1542 return disAMode_copy2tmp(
1543 handleSegOverride(sorb,
1544 binop(Iop_Add32,getIReg(4,rm),mkU32(d))));
1545 }
1546
1547 /* d32(%eax) ... d32(%edi), not including d32(%esp)
1548 --> GET %reg, t ; ADDL d8, t
1549 */
1550 case 0x10: case 0x11: case 0x12: case 0x13:
1551 /* ! 14 */ case 0x15: case 0x16: case 0x17:
1552 { UChar rm = toUChar(mod_reg_rm & 7);
1553 UInt d = getUDisp32(delta);
1554 DIS(buf, "%s0x%x(%s)", sorbTxt(sorb), d, nameIReg(4,rm));
1555 *len = 5;
1556 return disAMode_copy2tmp(
1557 handleSegOverride(sorb,
1558 binop(Iop_Add32,getIReg(4,rm),mkU32(d))));
1559 }
1560
1561 /* a register, %eax .. %edi. This shouldn't happen. */
1562 case 0x18: case 0x19: case 0x1A: case 0x1B:
1563 case 0x1C: case 0x1D: case 0x1E: case 0x1F:
1564 vpanic("disAMode(x86): not an addr!");
1565
1566 /* a 32-bit literal address
1567 --> MOV d32, tmp
1568 */
1569 case 0x05:
1570 { UInt d = getUDisp32(delta);
1571 *len = 5;
1572 DIS(buf, "%s(0x%x)", sorbTxt(sorb), d);
1573 return disAMode_copy2tmp(
1574 handleSegOverride(sorb, mkU32(d)));
1575 }
1576
1577 case 0x04: {
1578 /* SIB, with no displacement. Special cases:
1579 -- %esp cannot act as an index value.
1580 If index_r indicates %esp, zero is used for the index.
1581 -- when mod is zero and base indicates EBP, base is instead
1582 a 32-bit literal.
1583 It's all madness, I tell you. Extract %index, %base and
1584 scale from the SIB byte. The value denoted is then:
1585 | %index == %ESP && %base == %EBP
1586 = d32 following SIB byte
1587 | %index == %ESP && %base != %EBP
1588 = %base
1589 | %index != %ESP && %base == %EBP
1590 = d32 following SIB byte + (%index << scale)
1591 | %index != %ESP && %base != %ESP
1592 = %base + (%index << scale)
1593
1594 What happens to the souls of CPU architects who dream up such
1595 horrendous schemes, do you suppose?
1596 */
1597 UChar sib = getIByte(delta);
1598 UChar scale = toUChar((sib >> 6) & 3);
1599 UChar index_r = toUChar((sib >> 3) & 7);
1600 UChar base_r = toUChar(sib & 7);
1601 delta++;
1602
1603 if (index_r != R_ESP && base_r != R_EBP) {
1604 DIS(buf, "%s(%s,%s,%d)", sorbTxt(sorb),
1605 nameIReg(4,base_r), nameIReg(4,index_r), 1<<scale);
1606 *len = 2;
1607 return
1608 disAMode_copy2tmp(
1609 handleSegOverride(sorb,
1610 binop(Iop_Add32,
1611 getIReg(4,base_r),
1612 binop(Iop_Shl32, getIReg(4,index_r),
1613 mkU8(scale)))));
1614 }
1615
1616 if (index_r != R_ESP && base_r == R_EBP) {
1617 UInt d = getUDisp32(delta);
1618 DIS(buf, "%s0x%x(,%s,%d)", sorbTxt(sorb), d,
1619 nameIReg(4,index_r), 1<<scale);
1620 *len = 6;
1621 return
1622 disAMode_copy2tmp(
1623 handleSegOverride(sorb,
1624 binop(Iop_Add32,
1625 binop(Iop_Shl32, getIReg(4,index_r), mkU8(scale)),
1626 mkU32(d))));
1627 }
1628
1629 if (index_r == R_ESP && base_r != R_EBP) {
1630 DIS(buf, "%s(%s,,)", sorbTxt(sorb), nameIReg(4,base_r));
1631 *len = 2;
1632 return disAMode_copy2tmp(
1633 handleSegOverride(sorb, getIReg(4,base_r)));
1634 }
1635
1636 if (index_r == R_ESP && base_r == R_EBP) {
1637 UInt d = getUDisp32(delta);
1638 DIS(buf, "%s0x%x(,,)", sorbTxt(sorb), d);
1639 *len = 6;
1640 return disAMode_copy2tmp(
1641 handleSegOverride(sorb, mkU32(d)));
1642 }
1643 /*NOTREACHED*/
1644 vassert(0);
1645 }
1646
1647 /* SIB, with 8-bit displacement. Special cases:
1648 -- %esp cannot act as an index value.
1649 If index_r indicates %esp, zero is used for the index.
1650 Denoted value is:
1651 | %index == %ESP
1652 = d8 + %base
1653 | %index != %ESP
1654 = d8 + %base + (%index << scale)
1655 */
1656 case 0x0C: {
1657 UChar sib = getIByte(delta);
1658 UChar scale = toUChar((sib >> 6) & 3);
1659 UChar index_r = toUChar((sib >> 3) & 7);
1660 UChar base_r = toUChar(sib & 7);
1661 UInt d = getSDisp8(delta+1);
1662
1663 if (index_r == R_ESP) {
1664 DIS(buf, "%s%d(%s,,)", sorbTxt(sorb),
1665 (Int)d, nameIReg(4,base_r));
1666 *len = 3;
1667 return disAMode_copy2tmp(
1668 handleSegOverride(sorb,
1669 binop(Iop_Add32, getIReg(4,base_r), mkU32(d)) ));
1670 } else {
1671 DIS(buf, "%s%d(%s,%s,%d)", sorbTxt(sorb), (Int)d,
1672 nameIReg(4,base_r), nameIReg(4,index_r), 1<<scale);
1673 *len = 3;
1674 return
1675 disAMode_copy2tmp(
1676 handleSegOverride(sorb,
1677 binop(Iop_Add32,
1678 binop(Iop_Add32,
1679 getIReg(4,base_r),
1680 binop(Iop_Shl32,
1681 getIReg(4,index_r), mkU8(scale))),
1682 mkU32(d))));
1683 }
1684 /*NOTREACHED*/
1685 vassert(0);
1686 }
1687
1688 /* SIB, with 32-bit displacement. Special cases:
1689 -- %esp cannot act as an index value.
1690 If index_r indicates %esp, zero is used for the index.
1691 Denoted value is:
1692 | %index == %ESP
1693 = d32 + %base
1694 | %index != %ESP
1695 = d32 + %base + (%index << scale)
1696 */
1697 case 0x14: {
1698 UChar sib = getIByte(delta);
1699 UChar scale = toUChar((sib >> 6) & 3);
1700 UChar index_r = toUChar((sib >> 3) & 7);
1701 UChar base_r = toUChar(sib & 7);
1702 UInt d = getUDisp32(delta+1);
1703
1704 if (index_r == R_ESP) {
1705 DIS(buf, "%s%d(%s,,)", sorbTxt(sorb),
1706 (Int)d, nameIReg(4,base_r));
1707 *len = 6;
1708 return disAMode_copy2tmp(
1709 handleSegOverride(sorb,
1710 binop(Iop_Add32, getIReg(4,base_r), mkU32(d)) ));
1711 } else {
1712 DIS(buf, "%s%d(%s,%s,%d)", sorbTxt(sorb), (Int)d,
1713 nameIReg(4,base_r), nameIReg(4,index_r), 1<<scale);
1714 *len = 6;
1715 return
1716 disAMode_copy2tmp(
1717 handleSegOverride(sorb,
1718 binop(Iop_Add32,
1719 binop(Iop_Add32,
1720 getIReg(4,base_r),
1721 binop(Iop_Shl32,
1722 getIReg(4,index_r), mkU8(scale))),
1723 mkU32(d))));
1724 }
1725 /*NOTREACHED*/
1726 vassert(0);
1727 }
1728
1729 default:
1730 vpanic("disAMode(x86)");
1731 return 0; /*notreached*/
1732 }
1733 }
1734
1735
1736 /* Figure out the number of (insn-stream) bytes constituting the amode
1737 beginning at delta. Is useful for getting hold of literals beyond
1738 the end of the amode before it has been disassembled. */
1739
lengthAMode(Int delta)1740 static UInt lengthAMode ( Int delta )
1741 {
1742 UChar mod_reg_rm = getIByte(delta); delta++;
1743
1744 /* squeeze out the reg field from mod_reg_rm, since a 256-entry
1745 jump table seems a bit excessive.
1746 */
1747 mod_reg_rm &= 0xC7; /* is now XX000YYY */
1748 mod_reg_rm = toUChar(mod_reg_rm | (mod_reg_rm >> 3));
1749 /* is now XX0XXYYY */
1750 mod_reg_rm &= 0x1F; /* is now 000XXYYY */
1751 switch (mod_reg_rm) {
1752
1753 /* (%eax) .. (%edi), not including (%esp) or (%ebp). */
1754 case 0x00: case 0x01: case 0x02: case 0x03:
1755 /* ! 04 */ /* ! 05 */ case 0x06: case 0x07:
1756 return 1;
1757
1758 /* d8(%eax) ... d8(%edi), not including d8(%esp). */
1759 case 0x08: case 0x09: case 0x0A: case 0x0B:
1760 /* ! 0C */ case 0x0D: case 0x0E: case 0x0F:
1761 return 2;
1762
1763 /* d32(%eax) ... d32(%edi), not including d32(%esp). */
1764 case 0x10: case 0x11: case 0x12: case 0x13:
1765 /* ! 14 */ case 0x15: case 0x16: case 0x17:
1766 return 5;
1767
1768 /* a register, %eax .. %edi. (Not an addr, but still handled.) */
1769 case 0x18: case 0x19: case 0x1A: case 0x1B:
1770 case 0x1C: case 0x1D: case 0x1E: case 0x1F:
1771 return 1;
1772
1773 /* a 32-bit literal address. */
1774 case 0x05: return 5;
1775
1776 /* SIB, no displacement. */
1777 case 0x04: {
1778 UChar sib = getIByte(delta);
1779 UChar base_r = toUChar(sib & 7);
1780 if (base_r == R_EBP) return 6; else return 2;
1781 }
1782 /* SIB, with 8-bit displacement. */
1783 case 0x0C: return 3;
1784
1785 /* SIB, with 32-bit displacement. */
1786 case 0x14: return 6;
1787
1788 default:
1789 vpanic("lengthAMode");
1790 return 0; /*notreached*/
1791 }
1792 }
1793
1794 /*------------------------------------------------------------*/
1795 /*--- Disassembling common idioms ---*/
1796 /*------------------------------------------------------------*/
1797
1798 /* Handle binary integer instructions of the form
1799 op E, G meaning
1800 op reg-or-mem, reg
1801 Is passed the a ptr to the modRM byte, the actual operation, and the
1802 data size. Returns the address advanced completely over this
1803 instruction.
1804
1805 E(src) is reg-or-mem
1806 G(dst) is reg.
1807
1808 If E is reg, --> GET %G, tmp
1809 OP %E, tmp
1810 PUT tmp, %G
1811
1812 If E is mem and OP is not reversible,
1813 --> (getAddr E) -> tmpa
1814 LD (tmpa), tmpa
1815 GET %G, tmp2
1816 OP tmpa, tmp2
1817 PUT tmp2, %G
1818
1819 If E is mem and OP is reversible
1820 --> (getAddr E) -> tmpa
1821 LD (tmpa), tmpa
1822 OP %G, tmpa
1823 PUT tmpa, %G
1824 */
1825 static
dis_op2_E_G(UChar sorb,Bool addSubCarry,IROp op8,Bool keep,Int size,Int delta0,const HChar * t_x86opc)1826 UInt dis_op2_E_G ( UChar sorb,
1827 Bool addSubCarry,
1828 IROp op8,
1829 Bool keep,
1830 Int size,
1831 Int delta0,
1832 const HChar* t_x86opc )
1833 {
1834 HChar dis_buf[50];
1835 Int len;
1836 IRType ty = szToITy(size);
1837 IRTemp dst1 = newTemp(ty);
1838 IRTemp src = newTemp(ty);
1839 IRTemp dst0 = newTemp(ty);
1840 UChar rm = getUChar(delta0);
1841 IRTemp addr = IRTemp_INVALID;
1842
1843 /* addSubCarry == True indicates the intended operation is
1844 add-with-carry or subtract-with-borrow. */
1845 if (addSubCarry) {
1846 vassert(op8 == Iop_Add8 || op8 == Iop_Sub8);
1847 vassert(keep);
1848 }
1849
1850 if (epartIsReg(rm)) {
1851 /* Specially handle XOR reg,reg, because that doesn't really
1852 depend on reg, and doing the obvious thing potentially
1853 generates a spurious value check failure due to the bogus
1854 dependency. Ditto SBB reg,reg. */
1855 if ((op8 == Iop_Xor8 || (op8 == Iop_Sub8 && addSubCarry))
1856 && gregOfRM(rm) == eregOfRM(rm)) {
1857 putIReg(size, gregOfRM(rm), mkU(ty,0));
1858 }
1859 assign( dst0, getIReg(size,gregOfRM(rm)) );
1860 assign( src, getIReg(size,eregOfRM(rm)) );
1861
1862 if (addSubCarry && op8 == Iop_Add8) {
1863 helper_ADC( size, dst1, dst0, src,
1864 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
1865 putIReg(size, gregOfRM(rm), mkexpr(dst1));
1866 } else
1867 if (addSubCarry && op8 == Iop_Sub8) {
1868 helper_SBB( size, dst1, dst0, src,
1869 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
1870 putIReg(size, gregOfRM(rm), mkexpr(dst1));
1871 } else {
1872 assign( dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
1873 if (isAddSub(op8))
1874 setFlags_DEP1_DEP2(op8, dst0, src, ty);
1875 else
1876 setFlags_DEP1(op8, dst1, ty);
1877 if (keep)
1878 putIReg(size, gregOfRM(rm), mkexpr(dst1));
1879 }
1880
1881 DIP("%s%c %s,%s\n", t_x86opc, nameISize(size),
1882 nameIReg(size,eregOfRM(rm)),
1883 nameIReg(size,gregOfRM(rm)));
1884 return 1+delta0;
1885 } else {
1886 /* E refers to memory */
1887 addr = disAMode ( &len, sorb, delta0, dis_buf);
1888 assign( dst0, getIReg(size,gregOfRM(rm)) );
1889 assign( src, loadLE(szToITy(size), mkexpr(addr)) );
1890
1891 if (addSubCarry && op8 == Iop_Add8) {
1892 helper_ADC( size, dst1, dst0, src,
1893 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
1894 putIReg(size, gregOfRM(rm), mkexpr(dst1));
1895 } else
1896 if (addSubCarry && op8 == Iop_Sub8) {
1897 helper_SBB( size, dst1, dst0, src,
1898 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
1899 putIReg(size, gregOfRM(rm), mkexpr(dst1));
1900 } else {
1901 assign( dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
1902 if (isAddSub(op8))
1903 setFlags_DEP1_DEP2(op8, dst0, src, ty);
1904 else
1905 setFlags_DEP1(op8, dst1, ty);
1906 if (keep)
1907 putIReg(size, gregOfRM(rm), mkexpr(dst1));
1908 }
1909
1910 DIP("%s%c %s,%s\n", t_x86opc, nameISize(size),
1911 dis_buf,nameIReg(size,gregOfRM(rm)));
1912 return len+delta0;
1913 }
1914 }
1915
1916
1917
1918 /* Handle binary integer instructions of the form
1919 op G, E meaning
1920 op reg, reg-or-mem
1921 Is passed the a ptr to the modRM byte, the actual operation, and the
1922 data size. Returns the address advanced completely over this
1923 instruction.
1924
1925 G(src) is reg.
1926 E(dst) is reg-or-mem
1927
1928 If E is reg, --> GET %E, tmp
1929 OP %G, tmp
1930 PUT tmp, %E
1931
1932 If E is mem, --> (getAddr E) -> tmpa
1933 LD (tmpa), tmpv
1934 OP %G, tmpv
1935 ST tmpv, (tmpa)
1936 */
1937 static
dis_op2_G_E(UChar sorb,Bool locked,Bool addSubCarry,IROp op8,Bool keep,Int size,Int delta0,const HChar * t_x86opc)1938 UInt dis_op2_G_E ( UChar sorb,
1939 Bool locked,
1940 Bool addSubCarry,
1941 IROp op8,
1942 Bool keep,
1943 Int size,
1944 Int delta0,
1945 const HChar* t_x86opc )
1946 {
1947 HChar dis_buf[50];
1948 Int len;
1949 IRType ty = szToITy(size);
1950 IRTemp dst1 = newTemp(ty);
1951 IRTemp src = newTemp(ty);
1952 IRTemp dst0 = newTemp(ty);
1953 UChar rm = getIByte(delta0);
1954 IRTemp addr = IRTemp_INVALID;
1955
1956 /* addSubCarry == True indicates the intended operation is
1957 add-with-carry or subtract-with-borrow. */
1958 if (addSubCarry) {
1959 vassert(op8 == Iop_Add8 || op8 == Iop_Sub8);
1960 vassert(keep);
1961 }
1962
1963 if (epartIsReg(rm)) {
1964 /* Specially handle XOR reg,reg, because that doesn't really
1965 depend on reg, and doing the obvious thing potentially
1966 generates a spurious value check failure due to the bogus
1967 dependency. Ditto SBB reg,reg.*/
1968 if ((op8 == Iop_Xor8 || (op8 == Iop_Sub8 && addSubCarry))
1969 && gregOfRM(rm) == eregOfRM(rm)) {
1970 putIReg(size, eregOfRM(rm), mkU(ty,0));
1971 }
1972 assign(dst0, getIReg(size,eregOfRM(rm)));
1973 assign(src, getIReg(size,gregOfRM(rm)));
1974
1975 if (addSubCarry && op8 == Iop_Add8) {
1976 helper_ADC( size, dst1, dst0, src,
1977 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
1978 putIReg(size, eregOfRM(rm), mkexpr(dst1));
1979 } else
1980 if (addSubCarry && op8 == Iop_Sub8) {
1981 helper_SBB( size, dst1, dst0, src,
1982 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
1983 putIReg(size, eregOfRM(rm), mkexpr(dst1));
1984 } else {
1985 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
1986 if (isAddSub(op8))
1987 setFlags_DEP1_DEP2(op8, dst0, src, ty);
1988 else
1989 setFlags_DEP1(op8, dst1, ty);
1990 if (keep)
1991 putIReg(size, eregOfRM(rm), mkexpr(dst1));
1992 }
1993
1994 DIP("%s%c %s,%s\n", t_x86opc, nameISize(size),
1995 nameIReg(size,gregOfRM(rm)),
1996 nameIReg(size,eregOfRM(rm)));
1997 return 1+delta0;
1998 }
1999
2000 /* E refers to memory */
2001 {
2002 addr = disAMode ( &len, sorb, delta0, dis_buf);
2003 assign(dst0, loadLE(ty,mkexpr(addr)));
2004 assign(src, getIReg(size,gregOfRM(rm)));
2005
2006 if (addSubCarry && op8 == Iop_Add8) {
2007 if (locked) {
2008 /* cas-style store */
2009 helper_ADC( size, dst1, dst0, src,
2010 /*store*/addr, dst0/*expVal*/, guest_EIP_curr_instr );
2011 } else {
2012 /* normal store */
2013 helper_ADC( size, dst1, dst0, src,
2014 /*store*/addr, IRTemp_INVALID, 0 );
2015 }
2016 } else
2017 if (addSubCarry && op8 == Iop_Sub8) {
2018 if (locked) {
2019 /* cas-style store */
2020 helper_SBB( size, dst1, dst0, src,
2021 /*store*/addr, dst0/*expVal*/, guest_EIP_curr_instr );
2022 } else {
2023 /* normal store */
2024 helper_SBB( size, dst1, dst0, src,
2025 /*store*/addr, IRTemp_INVALID, 0 );
2026 }
2027 } else {
2028 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
2029 if (keep) {
2030 if (locked) {
2031 if (0) vex_printf("locked case\n" );
2032 casLE( mkexpr(addr),
2033 mkexpr(dst0)/*expval*/,
2034 mkexpr(dst1)/*newval*/, guest_EIP_curr_instr );
2035 } else {
2036 if (0) vex_printf("nonlocked case\n");
2037 storeLE(mkexpr(addr), mkexpr(dst1));
2038 }
2039 }
2040 if (isAddSub(op8))
2041 setFlags_DEP1_DEP2(op8, dst0, src, ty);
2042 else
2043 setFlags_DEP1(op8, dst1, ty);
2044 }
2045
2046 DIP("%s%c %s,%s\n", t_x86opc, nameISize(size),
2047 nameIReg(size,gregOfRM(rm)), dis_buf);
2048 return len+delta0;
2049 }
2050 }
2051
2052
2053 /* Handle move instructions of the form
2054 mov E, G meaning
2055 mov reg-or-mem, reg
2056 Is passed the a ptr to the modRM byte, and the data size. Returns
2057 the address advanced completely over this instruction.
2058
2059 E(src) is reg-or-mem
2060 G(dst) is reg.
2061
2062 If E is reg, --> GET %E, tmpv
2063 PUT tmpv, %G
2064
2065 If E is mem --> (getAddr E) -> tmpa
2066 LD (tmpa), tmpb
2067 PUT tmpb, %G
2068 */
2069 static
dis_mov_E_G(UChar sorb,Int size,Int delta0)2070 UInt dis_mov_E_G ( UChar sorb,
2071 Int size,
2072 Int delta0 )
2073 {
2074 Int len;
2075 UChar rm = getIByte(delta0);
2076 HChar dis_buf[50];
2077
2078 if (epartIsReg(rm)) {
2079 putIReg(size, gregOfRM(rm), getIReg(size, eregOfRM(rm)));
2080 DIP("mov%c %s,%s\n", nameISize(size),
2081 nameIReg(size,eregOfRM(rm)),
2082 nameIReg(size,gregOfRM(rm)));
2083 return 1+delta0;
2084 }
2085
2086 /* E refers to memory */
2087 {
2088 IRTemp addr = disAMode ( &len, sorb, delta0, dis_buf );
2089 putIReg(size, gregOfRM(rm), loadLE(szToITy(size), mkexpr(addr)));
2090 DIP("mov%c %s,%s\n", nameISize(size),
2091 dis_buf,nameIReg(size,gregOfRM(rm)));
2092 return delta0+len;
2093 }
2094 }
2095
2096
2097 /* Handle move instructions of the form
2098 mov G, E meaning
2099 mov reg, reg-or-mem
2100 Is passed the a ptr to the modRM byte, and the data size. Returns
2101 the address advanced completely over this instruction.
2102
2103 G(src) is reg.
2104 E(dst) is reg-or-mem
2105
2106 If E is reg, --> GET %G, tmp
2107 PUT tmp, %E
2108
2109 If E is mem, --> (getAddr E) -> tmpa
2110 GET %G, tmpv
2111 ST tmpv, (tmpa)
2112 */
2113 static
dis_mov_G_E(UChar sorb,Int size,Int delta0)2114 UInt dis_mov_G_E ( UChar sorb,
2115 Int size,
2116 Int delta0 )
2117 {
2118 Int len;
2119 UChar rm = getIByte(delta0);
2120 HChar dis_buf[50];
2121
2122 if (epartIsReg(rm)) {
2123 putIReg(size, eregOfRM(rm), getIReg(size, gregOfRM(rm)));
2124 DIP("mov%c %s,%s\n", nameISize(size),
2125 nameIReg(size,gregOfRM(rm)),
2126 nameIReg(size,eregOfRM(rm)));
2127 return 1+delta0;
2128 }
2129
2130 /* E refers to memory */
2131 {
2132 IRTemp addr = disAMode ( &len, sorb, delta0, dis_buf);
2133 storeLE( mkexpr(addr), getIReg(size, gregOfRM(rm)) );
2134 DIP("mov%c %s,%s\n", nameISize(size),
2135 nameIReg(size,gregOfRM(rm)), dis_buf);
2136 return len+delta0;
2137 }
2138 }
2139
2140
2141 /* op $immediate, AL/AX/EAX. */
2142 static
dis_op_imm_A(Int size,Bool carrying,IROp op8,Bool keep,Int delta,const HChar * t_x86opc)2143 UInt dis_op_imm_A ( Int size,
2144 Bool carrying,
2145 IROp op8,
2146 Bool keep,
2147 Int delta,
2148 const HChar* t_x86opc )
2149 {
2150 IRType ty = szToITy(size);
2151 IRTemp dst0 = newTemp(ty);
2152 IRTemp src = newTemp(ty);
2153 IRTemp dst1 = newTemp(ty);
2154 UInt lit = getUDisp(size,delta);
2155 assign(dst0, getIReg(size,R_EAX));
2156 assign(src, mkU(ty,lit));
2157
2158 if (isAddSub(op8) && !carrying) {
2159 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
2160 setFlags_DEP1_DEP2(op8, dst0, src, ty);
2161 }
2162 else
2163 if (isLogic(op8)) {
2164 vassert(!carrying);
2165 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
2166 setFlags_DEP1(op8, dst1, ty);
2167 }
2168 else
2169 if (op8 == Iop_Add8 && carrying) {
2170 helper_ADC( size, dst1, dst0, src,
2171 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
2172 }
2173 else
2174 if (op8 == Iop_Sub8 && carrying) {
2175 helper_SBB( size, dst1, dst0, src,
2176 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
2177 }
2178 else
2179 vpanic("dis_op_imm_A(x86,guest)");
2180
2181 if (keep)
2182 putIReg(size, R_EAX, mkexpr(dst1));
2183
2184 DIP("%s%c $0x%x, %s\n", t_x86opc, nameISize(size),
2185 lit, nameIReg(size,R_EAX));
2186 return delta+size;
2187 }
2188
2189
2190 /* Sign- and Zero-extending moves. */
2191 static
dis_movx_E_G(UChar sorb,Int delta,Int szs,Int szd,Bool sign_extend)2192 UInt dis_movx_E_G ( UChar sorb,
2193 Int delta, Int szs, Int szd, Bool sign_extend )
2194 {
2195 UChar rm = getIByte(delta);
2196 if (epartIsReg(rm)) {
2197 if (szd == szs) {
2198 // mutant case. See #250799
2199 putIReg(szd, gregOfRM(rm),
2200 getIReg(szs,eregOfRM(rm)));
2201 } else {
2202 // normal case
2203 putIReg(szd, gregOfRM(rm),
2204 unop(mkWidenOp(szs,szd,sign_extend),
2205 getIReg(szs,eregOfRM(rm))));
2206 }
2207 DIP("mov%c%c%c %s,%s\n", sign_extend ? 's' : 'z',
2208 nameISize(szs), nameISize(szd),
2209 nameIReg(szs,eregOfRM(rm)),
2210 nameIReg(szd,gregOfRM(rm)));
2211 return 1+delta;
2212 }
2213
2214 /* E refers to memory */
2215 {
2216 Int len;
2217 HChar dis_buf[50];
2218 IRTemp addr = disAMode ( &len, sorb, delta, dis_buf );
2219 if (szd == szs) {
2220 // mutant case. See #250799
2221 putIReg(szd, gregOfRM(rm),
2222 loadLE(szToITy(szs),mkexpr(addr)));
2223 } else {
2224 // normal case
2225 putIReg(szd, gregOfRM(rm),
2226 unop(mkWidenOp(szs,szd,sign_extend),
2227 loadLE(szToITy(szs),mkexpr(addr))));
2228 }
2229 DIP("mov%c%c%c %s,%s\n", sign_extend ? 's' : 'z',
2230 nameISize(szs), nameISize(szd),
2231 dis_buf, nameIReg(szd,gregOfRM(rm)));
2232 return len+delta;
2233 }
2234 }
2235
2236
2237 /* Generate code to divide ArchRegs EDX:EAX / DX:AX / AX by the 32 /
2238 16 / 8 bit quantity in the given IRTemp. */
2239 static
codegen_div(Int sz,IRTemp t,Bool signed_divide)2240 void codegen_div ( Int sz, IRTemp t, Bool signed_divide )
2241 {
2242 IROp op = signed_divide ? Iop_DivModS64to32 : Iop_DivModU64to32;
2243 IRTemp src64 = newTemp(Ity_I64);
2244 IRTemp dst64 = newTemp(Ity_I64);
2245 switch (sz) {
2246 case 4:
2247 assign( src64, binop(Iop_32HLto64,
2248 getIReg(4,R_EDX), getIReg(4,R_EAX)) );
2249 assign( dst64, binop(op, mkexpr(src64), mkexpr(t)) );
2250 putIReg( 4, R_EAX, unop(Iop_64to32,mkexpr(dst64)) );
2251 putIReg( 4, R_EDX, unop(Iop_64HIto32,mkexpr(dst64)) );
2252 break;
2253 case 2: {
2254 IROp widen3264 = signed_divide ? Iop_32Sto64 : Iop_32Uto64;
2255 IROp widen1632 = signed_divide ? Iop_16Sto32 : Iop_16Uto32;
2256 assign( src64, unop(widen3264,
2257 binop(Iop_16HLto32,
2258 getIReg(2,R_EDX), getIReg(2,R_EAX))) );
2259 assign( dst64, binop(op, mkexpr(src64), unop(widen1632,mkexpr(t))) );
2260 putIReg( 2, R_EAX, unop(Iop_32to16,unop(Iop_64to32,mkexpr(dst64))) );
2261 putIReg( 2, R_EDX, unop(Iop_32to16,unop(Iop_64HIto32,mkexpr(dst64))) );
2262 break;
2263 }
2264 case 1: {
2265 IROp widen3264 = signed_divide ? Iop_32Sto64 : Iop_32Uto64;
2266 IROp widen1632 = signed_divide ? Iop_16Sto32 : Iop_16Uto32;
2267 IROp widen816 = signed_divide ? Iop_8Sto16 : Iop_8Uto16;
2268 assign( src64, unop(widen3264, unop(widen1632, getIReg(2,R_EAX))) );
2269 assign( dst64,
2270 binop(op, mkexpr(src64),
2271 unop(widen1632, unop(widen816, mkexpr(t)))) );
2272 putIReg( 1, R_AL, unop(Iop_16to8, unop(Iop_32to16,
2273 unop(Iop_64to32,mkexpr(dst64)))) );
2274 putIReg( 1, R_AH, unop(Iop_16to8, unop(Iop_32to16,
2275 unop(Iop_64HIto32,mkexpr(dst64)))) );
2276 break;
2277 }
2278 default: vpanic("codegen_div(x86)");
2279 }
2280 }
2281
2282
2283 static
dis_Grp1(UChar sorb,Bool locked,Int delta,UChar modrm,Int am_sz,Int d_sz,Int sz,UInt d32)2284 UInt dis_Grp1 ( UChar sorb, Bool locked,
2285 Int delta, UChar modrm,
2286 Int am_sz, Int d_sz, Int sz, UInt d32 )
2287 {
2288 Int len;
2289 HChar dis_buf[50];
2290 IRType ty = szToITy(sz);
2291 IRTemp dst1 = newTemp(ty);
2292 IRTemp src = newTemp(ty);
2293 IRTemp dst0 = newTemp(ty);
2294 IRTemp addr = IRTemp_INVALID;
2295 IROp op8 = Iop_INVALID;
2296 UInt mask = sz==1 ? 0xFF : (sz==2 ? 0xFFFF : 0xFFFFFFFF);
2297
2298 switch (gregOfRM(modrm)) {
2299 case 0: op8 = Iop_Add8; break; case 1: op8 = Iop_Or8; break;
2300 case 2: break; // ADC
2301 case 3: break; // SBB
2302 case 4: op8 = Iop_And8; break; case 5: op8 = Iop_Sub8; break;
2303 case 6: op8 = Iop_Xor8; break; case 7: op8 = Iop_Sub8; break;
2304 /*NOTREACHED*/
2305 default: vpanic("dis_Grp1: unhandled case");
2306 }
2307
2308 if (epartIsReg(modrm)) {
2309 vassert(am_sz == 1);
2310
2311 assign(dst0, getIReg(sz,eregOfRM(modrm)));
2312 assign(src, mkU(ty,d32 & mask));
2313
2314 if (gregOfRM(modrm) == 2 /* ADC */) {
2315 helper_ADC( sz, dst1, dst0, src,
2316 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
2317 } else
2318 if (gregOfRM(modrm) == 3 /* SBB */) {
2319 helper_SBB( sz, dst1, dst0, src,
2320 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
2321 } else {
2322 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
2323 if (isAddSub(op8))
2324 setFlags_DEP1_DEP2(op8, dst0, src, ty);
2325 else
2326 setFlags_DEP1(op8, dst1, ty);
2327 }
2328
2329 if (gregOfRM(modrm) < 7)
2330 putIReg(sz, eregOfRM(modrm), mkexpr(dst1));
2331
2332 delta += (am_sz + d_sz);
2333 DIP("%s%c $0x%x, %s\n", nameGrp1(gregOfRM(modrm)), nameISize(sz), d32,
2334 nameIReg(sz,eregOfRM(modrm)));
2335 } else {
2336 addr = disAMode ( &len, sorb, delta, dis_buf);
2337
2338 assign(dst0, loadLE(ty,mkexpr(addr)));
2339 assign(src, mkU(ty,d32 & mask));
2340
2341 if (gregOfRM(modrm) == 2 /* ADC */) {
2342 if (locked) {
2343 /* cas-style store */
2344 helper_ADC( sz, dst1, dst0, src,
2345 /*store*/addr, dst0/*expVal*/, guest_EIP_curr_instr );
2346 } else {
2347 /* normal store */
2348 helper_ADC( sz, dst1, dst0, src,
2349 /*store*/addr, IRTemp_INVALID, 0 );
2350 }
2351 } else
2352 if (gregOfRM(modrm) == 3 /* SBB */) {
2353 if (locked) {
2354 /* cas-style store */
2355 helper_SBB( sz, dst1, dst0, src,
2356 /*store*/addr, dst0/*expVal*/, guest_EIP_curr_instr );
2357 } else {
2358 /* normal store */
2359 helper_SBB( sz, dst1, dst0, src,
2360 /*store*/addr, IRTemp_INVALID, 0 );
2361 }
2362 } else {
2363 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
2364 if (gregOfRM(modrm) < 7) {
2365 if (locked) {
2366 casLE( mkexpr(addr), mkexpr(dst0)/*expVal*/,
2367 mkexpr(dst1)/*newVal*/,
2368 guest_EIP_curr_instr );
2369 } else {
2370 storeLE(mkexpr(addr), mkexpr(dst1));
2371 }
2372 }
2373 if (isAddSub(op8))
2374 setFlags_DEP1_DEP2(op8, dst0, src, ty);
2375 else
2376 setFlags_DEP1(op8, dst1, ty);
2377 }
2378
2379 delta += (len+d_sz);
2380 DIP("%s%c $0x%x, %s\n", nameGrp1(gregOfRM(modrm)), nameISize(sz),
2381 d32, dis_buf);
2382 }
2383 return delta;
2384 }
2385
2386
2387 /* Group 2 extended opcodes. shift_expr must be an 8-bit typed
2388 expression. */
2389
2390 static
dis_Grp2(UChar sorb,Int delta,UChar modrm,Int am_sz,Int d_sz,Int sz,IRExpr * shift_expr,const HChar * shift_expr_txt,Bool * decode_OK)2391 UInt dis_Grp2 ( UChar sorb,
2392 Int delta, UChar modrm,
2393 Int am_sz, Int d_sz, Int sz, IRExpr* shift_expr,
2394 const HChar* shift_expr_txt, Bool* decode_OK )
2395 {
2396 /* delta on entry points at the modrm byte. */
2397 HChar dis_buf[50];
2398 Int len;
2399 Bool isShift, isRotate, isRotateC;
2400 IRType ty = szToITy(sz);
2401 IRTemp dst0 = newTemp(ty);
2402 IRTemp dst1 = newTemp(ty);
2403 IRTemp addr = IRTemp_INVALID;
2404
2405 *decode_OK = True;
2406
2407 vassert(sz == 1 || sz == 2 || sz == 4);
2408
2409 /* Put value to shift/rotate in dst0. */
2410 if (epartIsReg(modrm)) {
2411 assign(dst0, getIReg(sz, eregOfRM(modrm)));
2412 delta += (am_sz + d_sz);
2413 } else {
2414 addr = disAMode ( &len, sorb, delta, dis_buf);
2415 assign(dst0, loadLE(ty,mkexpr(addr)));
2416 delta += len + d_sz;
2417 }
2418
2419 isShift = False;
2420 switch (gregOfRM(modrm)) { case 4: case 5: case 6: case 7: isShift = True; }
2421
2422 isRotate = False;
2423 switch (gregOfRM(modrm)) { case 0: case 1: isRotate = True; }
2424
2425 isRotateC = False;
2426 switch (gregOfRM(modrm)) { case 2: case 3: isRotateC = True; }
2427
2428 if (!isShift && !isRotate && !isRotateC) {
2429 /*NOTREACHED*/
2430 vpanic("dis_Grp2(Reg): unhandled case(x86)");
2431 }
2432
2433 if (isRotateC) {
2434 /* call a helper; these insns are so ridiculous they do not
2435 deserve better */
2436 Bool left = toBool(gregOfRM(modrm) == 2);
2437 IRTemp r64 = newTemp(Ity_I64);
2438 IRExpr** args
2439 = mkIRExprVec_4( widenUto32(mkexpr(dst0)), /* thing to rotate */
2440 widenUto32(shift_expr), /* rotate amount */
2441 widenUto32(mk_x86g_calculate_eflags_all()),
2442 mkU32(sz) );
2443 assign( r64, mkIRExprCCall(
2444 Ity_I64,
2445 0/*regparm*/,
2446 left ? "x86g_calculate_RCL" : "x86g_calculate_RCR",
2447 left ? &x86g_calculate_RCL : &x86g_calculate_RCR,
2448 args
2449 )
2450 );
2451 /* new eflags in hi half r64; new value in lo half r64 */
2452 assign( dst1, narrowTo(ty, unop(Iop_64to32, mkexpr(r64))) );
2453 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
2454 stmt( IRStmt_Put( OFFB_CC_DEP1, unop(Iop_64HIto32, mkexpr(r64)) ));
2455 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
2456 /* Set NDEP even though it isn't used. This makes redundant-PUT
2457 elimination of previous stores to this field work better. */
2458 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
2459 }
2460
2461 if (isShift) {
2462
2463 IRTemp pre32 = newTemp(Ity_I32);
2464 IRTemp res32 = newTemp(Ity_I32);
2465 IRTemp res32ss = newTemp(Ity_I32);
2466 IRTemp shift_amt = newTemp(Ity_I8);
2467 IROp op32;
2468
2469 switch (gregOfRM(modrm)) {
2470 case 4: op32 = Iop_Shl32; break;
2471 case 5: op32 = Iop_Shr32; break;
2472 case 6: op32 = Iop_Shl32; break;
2473 case 7: op32 = Iop_Sar32; break;
2474 /*NOTREACHED*/
2475 default: vpanic("dis_Grp2:shift"); break;
2476 }
2477
2478 /* Widen the value to be shifted to 32 bits, do the shift, and
2479 narrow back down. This seems surprisingly long-winded, but
2480 unfortunately the Intel semantics requires that 8/16-bit
2481 shifts give defined results for shift values all the way up
2482 to 31, and this seems the simplest way to do it. It has the
2483 advantage that the only IR level shifts generated are of 32
2484 bit values, and the shift amount is guaranteed to be in the
2485 range 0 .. 31, thereby observing the IR semantics requiring
2486 all shift values to be in the range 0 .. 2^word_size-1. */
2487
2488 /* shift_amt = shift_expr & 31, regardless of operation size */
2489 assign( shift_amt, binop(Iop_And8, shift_expr, mkU8(31)) );
2490
2491 /* suitably widen the value to be shifted to 32 bits. */
2492 assign( pre32, op32==Iop_Sar32 ? widenSto32(mkexpr(dst0))
2493 : widenUto32(mkexpr(dst0)) );
2494
2495 /* res32 = pre32 `shift` shift_amt */
2496 assign( res32, binop(op32, mkexpr(pre32), mkexpr(shift_amt)) );
2497
2498 /* res32ss = pre32 `shift` ((shift_amt - 1) & 31) */
2499 assign( res32ss,
2500 binop(op32,
2501 mkexpr(pre32),
2502 binop(Iop_And8,
2503 binop(Iop_Sub8,
2504 mkexpr(shift_amt), mkU8(1)),
2505 mkU8(31))) );
2506
2507 /* Build the flags thunk. */
2508 setFlags_DEP1_DEP2_shift(op32, res32, res32ss, ty, shift_amt);
2509
2510 /* Narrow the result back down. */
2511 assign( dst1, narrowTo(ty, mkexpr(res32)) );
2512
2513 } /* if (isShift) */
2514
2515 else
2516 if (isRotate) {
2517 Int ccOp = ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1 : 2);
2518 Bool left = toBool(gregOfRM(modrm) == 0);
2519 IRTemp rot_amt = newTemp(Ity_I8);
2520 IRTemp rot_amt32 = newTemp(Ity_I8);
2521 IRTemp oldFlags = newTemp(Ity_I32);
2522
2523 /* rot_amt = shift_expr & mask */
2524 /* By masking the rotate amount thusly, the IR-level Shl/Shr
2525 expressions never shift beyond the word size and thus remain
2526 well defined. */
2527 assign(rot_amt32, binop(Iop_And8, shift_expr, mkU8(31)));
2528
2529 if (ty == Ity_I32)
2530 assign(rot_amt, mkexpr(rot_amt32));
2531 else
2532 assign(rot_amt, binop(Iop_And8, mkexpr(rot_amt32), mkU8(8*sz-1)));
2533
2534 if (left) {
2535
2536 /* dst1 = (dst0 << rot_amt) | (dst0 >>u (wordsize-rot_amt)) */
2537 assign(dst1,
2538 binop( mkSizedOp(ty,Iop_Or8),
2539 binop( mkSizedOp(ty,Iop_Shl8),
2540 mkexpr(dst0),
2541 mkexpr(rot_amt)
2542 ),
2543 binop( mkSizedOp(ty,Iop_Shr8),
2544 mkexpr(dst0),
2545 binop(Iop_Sub8,mkU8(8*sz), mkexpr(rot_amt))
2546 )
2547 )
2548 );
2549 ccOp += X86G_CC_OP_ROLB;
2550
2551 } else { /* right */
2552
2553 /* dst1 = (dst0 >>u rot_amt) | (dst0 << (wordsize-rot_amt)) */
2554 assign(dst1,
2555 binop( mkSizedOp(ty,Iop_Or8),
2556 binop( mkSizedOp(ty,Iop_Shr8),
2557 mkexpr(dst0),
2558 mkexpr(rot_amt)
2559 ),
2560 binop( mkSizedOp(ty,Iop_Shl8),
2561 mkexpr(dst0),
2562 binop(Iop_Sub8,mkU8(8*sz), mkexpr(rot_amt))
2563 )
2564 )
2565 );
2566 ccOp += X86G_CC_OP_RORB;
2567
2568 }
2569
2570 /* dst1 now holds the rotated value. Build flag thunk. We
2571 need the resulting value for this, and the previous flags.
2572 Except don't set it if the rotate count is zero. */
2573
2574 assign(oldFlags, mk_x86g_calculate_eflags_all());
2575
2576 /* rot_amt32 :: Ity_I8. We need to convert it to I1. */
2577 IRTemp rot_amt32b = newTemp(Ity_I1);
2578 assign(rot_amt32b, binop(Iop_CmpNE8, mkexpr(rot_amt32), mkU8(0)) );
2579
2580 /* CC_DEP1 is the rotated value. CC_NDEP is flags before. */
2581 stmt( IRStmt_Put( OFFB_CC_OP,
2582 IRExpr_ITE( mkexpr(rot_amt32b),
2583 mkU32(ccOp),
2584 IRExpr_Get(OFFB_CC_OP,Ity_I32) ) ));
2585 stmt( IRStmt_Put( OFFB_CC_DEP1,
2586 IRExpr_ITE( mkexpr(rot_amt32b),
2587 widenUto32(mkexpr(dst1)),
2588 IRExpr_Get(OFFB_CC_DEP1,Ity_I32) ) ));
2589 stmt( IRStmt_Put( OFFB_CC_DEP2,
2590 IRExpr_ITE( mkexpr(rot_amt32b),
2591 mkU32(0),
2592 IRExpr_Get(OFFB_CC_DEP2,Ity_I32) ) ));
2593 stmt( IRStmt_Put( OFFB_CC_NDEP,
2594 IRExpr_ITE( mkexpr(rot_amt32b),
2595 mkexpr(oldFlags),
2596 IRExpr_Get(OFFB_CC_NDEP,Ity_I32) ) ));
2597 } /* if (isRotate) */
2598
2599 /* Save result, and finish up. */
2600 if (epartIsReg(modrm)) {
2601 putIReg(sz, eregOfRM(modrm), mkexpr(dst1));
2602 if (vex_traceflags & VEX_TRACE_FE) {
2603 vex_printf("%s%c ",
2604 nameGrp2(gregOfRM(modrm)), nameISize(sz) );
2605 if (shift_expr_txt)
2606 vex_printf("%s", shift_expr_txt);
2607 else
2608 ppIRExpr(shift_expr);
2609 vex_printf(", %s\n", nameIReg(sz,eregOfRM(modrm)));
2610 }
2611 } else {
2612 storeLE(mkexpr(addr), mkexpr(dst1));
2613 if (vex_traceflags & VEX_TRACE_FE) {
2614 vex_printf("%s%c ",
2615 nameGrp2(gregOfRM(modrm)), nameISize(sz) );
2616 if (shift_expr_txt)
2617 vex_printf("%s", shift_expr_txt);
2618 else
2619 ppIRExpr(shift_expr);
2620 vex_printf(", %s\n", dis_buf);
2621 }
2622 }
2623 return delta;
2624 }
2625
2626
2627 /* Group 8 extended opcodes (but BT/BTS/BTC/BTR only). */
2628 static
dis_Grp8_Imm(UChar sorb,Bool locked,Int delta,UChar modrm,Int am_sz,Int sz,UInt src_val,Bool * decode_OK)2629 UInt dis_Grp8_Imm ( UChar sorb,
2630 Bool locked,
2631 Int delta, UChar modrm,
2632 Int am_sz, Int sz, UInt src_val,
2633 Bool* decode_OK )
2634 {
2635 /* src_val denotes a d8.
2636 And delta on entry points at the modrm byte. */
2637
2638 IRType ty = szToITy(sz);
2639 IRTemp t2 = newTemp(Ity_I32);
2640 IRTemp t2m = newTemp(Ity_I32);
2641 IRTemp t_addr = IRTemp_INVALID;
2642 HChar dis_buf[50];
2643 UInt mask;
2644
2645 /* we're optimists :-) */
2646 *decode_OK = True;
2647
2648 /* Limit src_val -- the bit offset -- to something within a word.
2649 The Intel docs say that literal offsets larger than a word are
2650 masked in this way. */
2651 switch (sz) {
2652 case 2: src_val &= 15; break;
2653 case 4: src_val &= 31; break;
2654 default: *decode_OK = False; return delta;
2655 }
2656
2657 /* Invent a mask suitable for the operation. */
2658 switch (gregOfRM(modrm)) {
2659 case 4: /* BT */ mask = 0; break;
2660 case 5: /* BTS */ mask = 1 << src_val; break;
2661 case 6: /* BTR */ mask = ~(1 << src_val); break;
2662 case 7: /* BTC */ mask = 1 << src_val; break;
2663 /* If this needs to be extended, probably simplest to make a
2664 new function to handle the other cases (0 .. 3). The
2665 Intel docs do however not indicate any use for 0 .. 3, so
2666 we don't expect this to happen. */
2667 default: *decode_OK = False; return delta;
2668 }
2669
2670 /* Fetch the value to be tested and modified into t2, which is
2671 32-bits wide regardless of sz. */
2672 if (epartIsReg(modrm)) {
2673 vassert(am_sz == 1);
2674 assign( t2, widenUto32(getIReg(sz, eregOfRM(modrm))) );
2675 delta += (am_sz + 1);
2676 DIP("%s%c $0x%x, %s\n", nameGrp8(gregOfRM(modrm)), nameISize(sz),
2677 src_val, nameIReg(sz,eregOfRM(modrm)));
2678 } else {
2679 Int len;
2680 t_addr = disAMode ( &len, sorb, delta, dis_buf);
2681 delta += (len+1);
2682 assign( t2, widenUto32(loadLE(ty, mkexpr(t_addr))) );
2683 DIP("%s%c $0x%x, %s\n", nameGrp8(gregOfRM(modrm)), nameISize(sz),
2684 src_val, dis_buf);
2685 }
2686
2687 /* Compute the new value into t2m, if non-BT. */
2688 switch (gregOfRM(modrm)) {
2689 case 4: /* BT */
2690 break;
2691 case 5: /* BTS */
2692 assign( t2m, binop(Iop_Or32, mkU32(mask), mkexpr(t2)) );
2693 break;
2694 case 6: /* BTR */
2695 assign( t2m, binop(Iop_And32, mkU32(mask), mkexpr(t2)) );
2696 break;
2697 case 7: /* BTC */
2698 assign( t2m, binop(Iop_Xor32, mkU32(mask), mkexpr(t2)) );
2699 break;
2700 default:
2701 /*NOTREACHED*/ /*the previous switch guards this*/
2702 vassert(0);
2703 }
2704
2705 /* Write the result back, if non-BT. If the CAS fails then we
2706 side-exit from the trace at this point, and so the flag state is
2707 not affected. This is of course as required. */
2708 if (gregOfRM(modrm) != 4 /* BT */) {
2709 if (epartIsReg(modrm)) {
2710 putIReg(sz, eregOfRM(modrm), narrowTo(ty, mkexpr(t2m)));
2711 } else {
2712 if (locked) {
2713 casLE( mkexpr(t_addr),
2714 narrowTo(ty, mkexpr(t2))/*expd*/,
2715 narrowTo(ty, mkexpr(t2m))/*new*/,
2716 guest_EIP_curr_instr );
2717 } else {
2718 storeLE(mkexpr(t_addr), narrowTo(ty, mkexpr(t2m)));
2719 }
2720 }
2721 }
2722
2723 /* Copy relevant bit from t2 into the carry flag. */
2724 /* Flags: C=selected bit, O,S,Z,A,P undefined, so are set to zero. */
2725 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
2726 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
2727 stmt( IRStmt_Put(
2728 OFFB_CC_DEP1,
2729 binop(Iop_And32,
2730 binop(Iop_Shr32, mkexpr(t2), mkU8(src_val)),
2731 mkU32(1))
2732 ));
2733 /* Set NDEP even though it isn't used. This makes redundant-PUT
2734 elimination of previous stores to this field work better. */
2735 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
2736
2737 return delta;
2738 }
2739
2740
2741 /* Signed/unsigned widening multiply. Generate IR to multiply the
2742 value in EAX/AX/AL by the given IRTemp, and park the result in
2743 EDX:EAX/DX:AX/AX.
2744 */
codegen_mulL_A_D(Int sz,Bool syned,IRTemp tmp,const HChar * tmp_txt)2745 static void codegen_mulL_A_D ( Int sz, Bool syned,
2746 IRTemp tmp, const HChar* tmp_txt )
2747 {
2748 IRType ty = szToITy(sz);
2749 IRTemp t1 = newTemp(ty);
2750
2751 assign( t1, getIReg(sz, R_EAX) );
2752
2753 switch (ty) {
2754 case Ity_I32: {
2755 IRTemp res64 = newTemp(Ity_I64);
2756 IRTemp resHi = newTemp(Ity_I32);
2757 IRTemp resLo = newTemp(Ity_I32);
2758 IROp mulOp = syned ? Iop_MullS32 : Iop_MullU32;
2759 UInt tBaseOp = syned ? X86G_CC_OP_SMULB : X86G_CC_OP_UMULB;
2760 setFlags_MUL ( Ity_I32, t1, tmp, tBaseOp );
2761 assign( res64, binop(mulOp, mkexpr(t1), mkexpr(tmp)) );
2762 assign( resHi, unop(Iop_64HIto32,mkexpr(res64)));
2763 assign( resLo, unop(Iop_64to32,mkexpr(res64)));
2764 putIReg(4, R_EDX, mkexpr(resHi));
2765 putIReg(4, R_EAX, mkexpr(resLo));
2766 break;
2767 }
2768 case Ity_I16: {
2769 IRTemp res32 = newTemp(Ity_I32);
2770 IRTemp resHi = newTemp(Ity_I16);
2771 IRTemp resLo = newTemp(Ity_I16);
2772 IROp mulOp = syned ? Iop_MullS16 : Iop_MullU16;
2773 UInt tBaseOp = syned ? X86G_CC_OP_SMULB : X86G_CC_OP_UMULB;
2774 setFlags_MUL ( Ity_I16, t1, tmp, tBaseOp );
2775 assign( res32, binop(mulOp, mkexpr(t1), mkexpr(tmp)) );
2776 assign( resHi, unop(Iop_32HIto16,mkexpr(res32)));
2777 assign( resLo, unop(Iop_32to16,mkexpr(res32)));
2778 putIReg(2, R_EDX, mkexpr(resHi));
2779 putIReg(2, R_EAX, mkexpr(resLo));
2780 break;
2781 }
2782 case Ity_I8: {
2783 IRTemp res16 = newTemp(Ity_I16);
2784 IRTemp resHi = newTemp(Ity_I8);
2785 IRTemp resLo = newTemp(Ity_I8);
2786 IROp mulOp = syned ? Iop_MullS8 : Iop_MullU8;
2787 UInt tBaseOp = syned ? X86G_CC_OP_SMULB : X86G_CC_OP_UMULB;
2788 setFlags_MUL ( Ity_I8, t1, tmp, tBaseOp );
2789 assign( res16, binop(mulOp, mkexpr(t1), mkexpr(tmp)) );
2790 assign( resHi, unop(Iop_16HIto8,mkexpr(res16)));
2791 assign( resLo, unop(Iop_16to8,mkexpr(res16)));
2792 putIReg(2, R_EAX, mkexpr(res16));
2793 break;
2794 }
2795 default:
2796 vpanic("codegen_mulL_A_D(x86)");
2797 }
2798 DIP("%s%c %s\n", syned ? "imul" : "mul", nameISize(sz), tmp_txt);
2799 }
2800
2801
2802 /* Group 3 extended opcodes. */
2803 static
dis_Grp3(UChar sorb,Bool locked,Int sz,Int delta,Bool * decode_OK)2804 UInt dis_Grp3 ( UChar sorb, Bool locked, Int sz, Int delta, Bool* decode_OK )
2805 {
2806 UInt d32;
2807 UChar modrm;
2808 HChar dis_buf[50];
2809 Int len;
2810 IRTemp addr;
2811 IRType ty = szToITy(sz);
2812 IRTemp t1 = newTemp(ty);
2813 IRTemp dst1, src, dst0;
2814
2815 *decode_OK = True; /* may change this later */
2816
2817 modrm = getIByte(delta);
2818
2819 if (locked && (gregOfRM(modrm) != 2 && gregOfRM(modrm) != 3)) {
2820 /* LOCK prefix only allowed with not and neg subopcodes */
2821 *decode_OK = False;
2822 return delta;
2823 }
2824
2825 if (epartIsReg(modrm)) {
2826 switch (gregOfRM(modrm)) {
2827 case 0: { /* TEST */
2828 delta++; d32 = getUDisp(sz, delta); delta += sz;
2829 dst1 = newTemp(ty);
2830 assign(dst1, binop(mkSizedOp(ty,Iop_And8),
2831 getIReg(sz,eregOfRM(modrm)),
2832 mkU(ty,d32)));
2833 setFlags_DEP1( Iop_And8, dst1, ty );
2834 DIP("test%c $0x%x, %s\n", nameISize(sz), d32,
2835 nameIReg(sz, eregOfRM(modrm)));
2836 break;
2837 }
2838 case 1: /* UNDEFINED */
2839 /* The Intel docs imply this insn is undefined and binutils
2840 agrees. Unfortunately Core 2 will run it (with who
2841 knows what result?) sandpile.org reckons it's an alias
2842 for case 0. We play safe. */
2843 *decode_OK = False;
2844 break;
2845 case 2: /* NOT */
2846 delta++;
2847 putIReg(sz, eregOfRM(modrm),
2848 unop(mkSizedOp(ty,Iop_Not8),
2849 getIReg(sz, eregOfRM(modrm))));
2850 DIP("not%c %s\n", nameISize(sz), nameIReg(sz, eregOfRM(modrm)));
2851 break;
2852 case 3: /* NEG */
2853 delta++;
2854 dst0 = newTemp(ty);
2855 src = newTemp(ty);
2856 dst1 = newTemp(ty);
2857 assign(dst0, mkU(ty,0));
2858 assign(src, getIReg(sz,eregOfRM(modrm)));
2859 assign(dst1, binop(mkSizedOp(ty,Iop_Sub8), mkexpr(dst0), mkexpr(src)));
2860 setFlags_DEP1_DEP2(Iop_Sub8, dst0, src, ty);
2861 putIReg(sz, eregOfRM(modrm), mkexpr(dst1));
2862 DIP("neg%c %s\n", nameISize(sz), nameIReg(sz, eregOfRM(modrm)));
2863 break;
2864 case 4: /* MUL (unsigned widening) */
2865 delta++;
2866 src = newTemp(ty);
2867 assign(src, getIReg(sz,eregOfRM(modrm)));
2868 codegen_mulL_A_D ( sz, False, src, nameIReg(sz,eregOfRM(modrm)) );
2869 break;
2870 case 5: /* IMUL (signed widening) */
2871 delta++;
2872 src = newTemp(ty);
2873 assign(src, getIReg(sz,eregOfRM(modrm)));
2874 codegen_mulL_A_D ( sz, True, src, nameIReg(sz,eregOfRM(modrm)) );
2875 break;
2876 case 6: /* DIV */
2877 delta++;
2878 assign( t1, getIReg(sz, eregOfRM(modrm)) );
2879 codegen_div ( sz, t1, False );
2880 DIP("div%c %s\n", nameISize(sz), nameIReg(sz, eregOfRM(modrm)));
2881 break;
2882 case 7: /* IDIV */
2883 delta++;
2884 assign( t1, getIReg(sz, eregOfRM(modrm)) );
2885 codegen_div ( sz, t1, True );
2886 DIP("idiv%c %s\n", nameISize(sz), nameIReg(sz, eregOfRM(modrm)));
2887 break;
2888 default:
2889 /* This can't happen - gregOfRM should return 0 .. 7 only */
2890 vpanic("Grp3(x86)");
2891 }
2892 } else {
2893 addr = disAMode ( &len, sorb, delta, dis_buf );
2894 t1 = newTemp(ty);
2895 delta += len;
2896 assign(t1, loadLE(ty,mkexpr(addr)));
2897 switch (gregOfRM(modrm)) {
2898 case 0: { /* TEST */
2899 d32 = getUDisp(sz, delta); delta += sz;
2900 dst1 = newTemp(ty);
2901 assign(dst1, binop(mkSizedOp(ty,Iop_And8),
2902 mkexpr(t1), mkU(ty,d32)));
2903 setFlags_DEP1( Iop_And8, dst1, ty );
2904 DIP("test%c $0x%x, %s\n", nameISize(sz), d32, dis_buf);
2905 break;
2906 }
2907 case 1: /* UNDEFINED */
2908 /* See comment above on R case */
2909 *decode_OK = False;
2910 break;
2911 case 2: /* NOT */
2912 dst1 = newTemp(ty);
2913 assign(dst1, unop(mkSizedOp(ty,Iop_Not8), mkexpr(t1)));
2914 if (locked) {
2915 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(dst1)/*new*/,
2916 guest_EIP_curr_instr );
2917 } else {
2918 storeLE( mkexpr(addr), mkexpr(dst1) );
2919 }
2920 DIP("not%c %s\n", nameISize(sz), dis_buf);
2921 break;
2922 case 3: /* NEG */
2923 dst0 = newTemp(ty);
2924 src = newTemp(ty);
2925 dst1 = newTemp(ty);
2926 assign(dst0, mkU(ty,0));
2927 assign(src, mkexpr(t1));
2928 assign(dst1, binop(mkSizedOp(ty,Iop_Sub8),
2929 mkexpr(dst0), mkexpr(src)));
2930 if (locked) {
2931 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(dst1)/*new*/,
2932 guest_EIP_curr_instr );
2933 } else {
2934 storeLE( mkexpr(addr), mkexpr(dst1) );
2935 }
2936 setFlags_DEP1_DEP2(Iop_Sub8, dst0, src, ty);
2937 DIP("neg%c %s\n", nameISize(sz), dis_buf);
2938 break;
2939 case 4: /* MUL */
2940 codegen_mulL_A_D ( sz, False, t1, dis_buf );
2941 break;
2942 case 5: /* IMUL */
2943 codegen_mulL_A_D ( sz, True, t1, dis_buf );
2944 break;
2945 case 6: /* DIV */
2946 codegen_div ( sz, t1, False );
2947 DIP("div%c %s\n", nameISize(sz), dis_buf);
2948 break;
2949 case 7: /* IDIV */
2950 codegen_div ( sz, t1, True );
2951 DIP("idiv%c %s\n", nameISize(sz), dis_buf);
2952 break;
2953 default:
2954 /* This can't happen - gregOfRM should return 0 .. 7 only */
2955 vpanic("Grp3(x86)");
2956 }
2957 }
2958 return delta;
2959 }
2960
2961
2962 /* Group 4 extended opcodes. */
2963 static
dis_Grp4(UChar sorb,Bool locked,Int delta,Bool * decode_OK)2964 UInt dis_Grp4 ( UChar sorb, Bool locked, Int delta, Bool* decode_OK )
2965 {
2966 Int alen;
2967 UChar modrm;
2968 HChar dis_buf[50];
2969 IRType ty = Ity_I8;
2970 IRTemp t1 = newTemp(ty);
2971 IRTemp t2 = newTemp(ty);
2972
2973 *decode_OK = True;
2974
2975 modrm = getIByte(delta);
2976
2977 if (locked && (gregOfRM(modrm) != 0 && gregOfRM(modrm) != 1)) {
2978 /* LOCK prefix only allowed with inc and dec subopcodes */
2979 *decode_OK = False;
2980 return delta;
2981 }
2982
2983 if (epartIsReg(modrm)) {
2984 assign(t1, getIReg(1, eregOfRM(modrm)));
2985 switch (gregOfRM(modrm)) {
2986 case 0: /* INC */
2987 assign(t2, binop(Iop_Add8, mkexpr(t1), mkU8(1)));
2988 putIReg(1, eregOfRM(modrm), mkexpr(t2));
2989 setFlags_INC_DEC( True, t2, ty );
2990 break;
2991 case 1: /* DEC */
2992 assign(t2, binop(Iop_Sub8, mkexpr(t1), mkU8(1)));
2993 putIReg(1, eregOfRM(modrm), mkexpr(t2));
2994 setFlags_INC_DEC( False, t2, ty );
2995 break;
2996 default:
2997 *decode_OK = False;
2998 return delta;
2999 }
3000 delta++;
3001 DIP("%sb %s\n", nameGrp4(gregOfRM(modrm)),
3002 nameIReg(1, eregOfRM(modrm)));
3003 } else {
3004 IRTemp addr = disAMode ( &alen, sorb, delta, dis_buf );
3005 assign( t1, loadLE(ty, mkexpr(addr)) );
3006 switch (gregOfRM(modrm)) {
3007 case 0: /* INC */
3008 assign(t2, binop(Iop_Add8, mkexpr(t1), mkU8(1)));
3009 if (locked) {
3010 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(t2)/*new*/,
3011 guest_EIP_curr_instr );
3012 } else {
3013 storeLE( mkexpr(addr), mkexpr(t2) );
3014 }
3015 setFlags_INC_DEC( True, t2, ty );
3016 break;
3017 case 1: /* DEC */
3018 assign(t2, binop(Iop_Sub8, mkexpr(t1), mkU8(1)));
3019 if (locked) {
3020 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(t2)/*new*/,
3021 guest_EIP_curr_instr );
3022 } else {
3023 storeLE( mkexpr(addr), mkexpr(t2) );
3024 }
3025 setFlags_INC_DEC( False, t2, ty );
3026 break;
3027 default:
3028 *decode_OK = False;
3029 return delta;
3030 }
3031 delta += alen;
3032 DIP("%sb %s\n", nameGrp4(gregOfRM(modrm)), dis_buf);
3033 }
3034 return delta;
3035 }
3036
3037
3038 /* Group 5 extended opcodes. */
3039 static
dis_Grp5(UChar sorb,Bool locked,Int sz,Int delta,DisResult * dres,Bool * decode_OK)3040 UInt dis_Grp5 ( UChar sorb, Bool locked, Int sz, Int delta,
3041 /*MOD*/DisResult* dres, /*OUT*/Bool* decode_OK )
3042 {
3043 Int len;
3044 UChar modrm;
3045 HChar dis_buf[50];
3046 IRTemp addr = IRTemp_INVALID;
3047 IRType ty = szToITy(sz);
3048 IRTemp t1 = newTemp(ty);
3049 IRTemp t2 = IRTemp_INVALID;
3050
3051 *decode_OK = True;
3052
3053 modrm = getIByte(delta);
3054
3055 if (locked && (gregOfRM(modrm) != 0 && gregOfRM(modrm) != 1)) {
3056 /* LOCK prefix only allowed with inc and dec subopcodes */
3057 *decode_OK = False;
3058 return delta;
3059 }
3060
3061 if (epartIsReg(modrm)) {
3062 assign(t1, getIReg(sz,eregOfRM(modrm)));
3063 switch (gregOfRM(modrm)) {
3064 case 0: /* INC */
3065 vassert(sz == 2 || sz == 4);
3066 t2 = newTemp(ty);
3067 assign(t2, binop(mkSizedOp(ty,Iop_Add8),
3068 mkexpr(t1), mkU(ty,1)));
3069 setFlags_INC_DEC( True, t2, ty );
3070 putIReg(sz,eregOfRM(modrm),mkexpr(t2));
3071 break;
3072 case 1: /* DEC */
3073 vassert(sz == 2 || sz == 4);
3074 t2 = newTemp(ty);
3075 assign(t2, binop(mkSizedOp(ty,Iop_Sub8),
3076 mkexpr(t1), mkU(ty,1)));
3077 setFlags_INC_DEC( False, t2, ty );
3078 putIReg(sz,eregOfRM(modrm),mkexpr(t2));
3079 break;
3080 case 2: /* call Ev */
3081 vassert(sz == 4);
3082 t2 = newTemp(Ity_I32);
3083 assign(t2, binop(Iop_Sub32, getIReg(4,R_ESP), mkU32(4)));
3084 putIReg(4, R_ESP, mkexpr(t2));
3085 storeLE( mkexpr(t2), mkU32(guest_EIP_bbstart+delta+1));
3086 jmp_treg(dres, Ijk_Call, t1);
3087 vassert(dres->whatNext == Dis_StopHere);
3088 break;
3089 case 4: /* jmp Ev */
3090 vassert(sz == 4);
3091 jmp_treg(dres, Ijk_Boring, t1);
3092 vassert(dres->whatNext == Dis_StopHere);
3093 break;
3094 case 6: /* PUSH Ev */
3095 vassert(sz == 4 || sz == 2);
3096 t2 = newTemp(Ity_I32);
3097 assign( t2, binop(Iop_Sub32,getIReg(4,R_ESP),mkU32(sz)) );
3098 putIReg(4, R_ESP, mkexpr(t2) );
3099 storeLE( mkexpr(t2), mkexpr(t1) );
3100 break;
3101 default:
3102 *decode_OK = False;
3103 return delta;
3104 }
3105 delta++;
3106 DIP("%s%c %s\n", nameGrp5(gregOfRM(modrm)),
3107 nameISize(sz), nameIReg(sz, eregOfRM(modrm)));
3108 } else {
3109 addr = disAMode ( &len, sorb, delta, dis_buf );
3110 assign(t1, loadLE(ty,mkexpr(addr)));
3111 switch (gregOfRM(modrm)) {
3112 case 0: /* INC */
3113 t2 = newTemp(ty);
3114 assign(t2, binop(mkSizedOp(ty,Iop_Add8),
3115 mkexpr(t1), mkU(ty,1)));
3116 if (locked) {
3117 casLE( mkexpr(addr),
3118 mkexpr(t1), mkexpr(t2), guest_EIP_curr_instr );
3119 } else {
3120 storeLE(mkexpr(addr),mkexpr(t2));
3121 }
3122 setFlags_INC_DEC( True, t2, ty );
3123 break;
3124 case 1: /* DEC */
3125 t2 = newTemp(ty);
3126 assign(t2, binop(mkSizedOp(ty,Iop_Sub8),
3127 mkexpr(t1), mkU(ty,1)));
3128 if (locked) {
3129 casLE( mkexpr(addr),
3130 mkexpr(t1), mkexpr(t2), guest_EIP_curr_instr );
3131 } else {
3132 storeLE(mkexpr(addr),mkexpr(t2));
3133 }
3134 setFlags_INC_DEC( False, t2, ty );
3135 break;
3136 case 2: /* call Ev */
3137 vassert(sz == 4);
3138 t2 = newTemp(Ity_I32);
3139 assign(t2, binop(Iop_Sub32, getIReg(4,R_ESP), mkU32(4)));
3140 putIReg(4, R_ESP, mkexpr(t2));
3141 storeLE( mkexpr(t2), mkU32(guest_EIP_bbstart+delta+len));
3142 jmp_treg(dres, Ijk_Call, t1);
3143 vassert(dres->whatNext == Dis_StopHere);
3144 break;
3145 case 4: /* JMP Ev */
3146 vassert(sz == 4);
3147 jmp_treg(dres, Ijk_Boring, t1);
3148 vassert(dres->whatNext == Dis_StopHere);
3149 break;
3150 case 6: /* PUSH Ev */
3151 vassert(sz == 4 || sz == 2);
3152 t2 = newTemp(Ity_I32);
3153 assign( t2, binop(Iop_Sub32,getIReg(4,R_ESP),mkU32(sz)) );
3154 putIReg(4, R_ESP, mkexpr(t2) );
3155 storeLE( mkexpr(t2), mkexpr(t1) );
3156 break;
3157 default:
3158 *decode_OK = False;
3159 return delta;
3160 }
3161 delta += len;
3162 DIP("%s%c %s\n", nameGrp5(gregOfRM(modrm)),
3163 nameISize(sz), dis_buf);
3164 }
3165 return delta;
3166 }
3167
3168
3169 /*------------------------------------------------------------*/
3170 /*--- Disassembling string ops (including REP prefixes) ---*/
3171 /*------------------------------------------------------------*/
3172
3173 /* Code shared by all the string ops */
3174 static
dis_string_op_increment(Int sz,IRTemp t_inc)3175 void dis_string_op_increment(Int sz, IRTemp t_inc)
3176 {
3177 if (sz == 4 || sz == 2) {
3178 assign( t_inc,
3179 binop(Iop_Shl32, IRExpr_Get( OFFB_DFLAG, Ity_I32 ),
3180 mkU8(sz/2) ) );
3181 } else {
3182 assign( t_inc,
3183 IRExpr_Get( OFFB_DFLAG, Ity_I32 ) );
3184 }
3185 }
3186
3187 static
dis_string_op(void (* dis_OP)(Int,IRTemp),Int sz,const HChar * name,UChar sorb)3188 void dis_string_op( void (*dis_OP)( Int, IRTemp ),
3189 Int sz, const HChar* name, UChar sorb )
3190 {
3191 IRTemp t_inc = newTemp(Ity_I32);
3192 vassert(sorb == 0); /* hmm. so what was the point of passing it in? */
3193 dis_string_op_increment(sz, t_inc);
3194 dis_OP( sz, t_inc );
3195 DIP("%s%c\n", name, nameISize(sz));
3196 }
3197
3198 static
dis_MOVS(Int sz,IRTemp t_inc)3199 void dis_MOVS ( Int sz, IRTemp t_inc )
3200 {
3201 IRType ty = szToITy(sz);
3202 IRTemp td = newTemp(Ity_I32); /* EDI */
3203 IRTemp ts = newTemp(Ity_I32); /* ESI */
3204
3205 assign( td, getIReg(4, R_EDI) );
3206 assign( ts, getIReg(4, R_ESI) );
3207
3208 storeLE( mkexpr(td), loadLE(ty,mkexpr(ts)) );
3209
3210 putIReg( 4, R_EDI, binop(Iop_Add32, mkexpr(td), mkexpr(t_inc)) );
3211 putIReg( 4, R_ESI, binop(Iop_Add32, mkexpr(ts), mkexpr(t_inc)) );
3212 }
3213
3214 static
dis_LODS(Int sz,IRTemp t_inc)3215 void dis_LODS ( Int sz, IRTemp t_inc )
3216 {
3217 IRType ty = szToITy(sz);
3218 IRTemp ts = newTemp(Ity_I32); /* ESI */
3219
3220 assign( ts, getIReg(4, R_ESI) );
3221
3222 putIReg( sz, R_EAX, loadLE(ty, mkexpr(ts)) );
3223
3224 putIReg( 4, R_ESI, binop(Iop_Add32, mkexpr(ts), mkexpr(t_inc)) );
3225 }
3226
3227 static
dis_STOS(Int sz,IRTemp t_inc)3228 void dis_STOS ( Int sz, IRTemp t_inc )
3229 {
3230 IRType ty = szToITy(sz);
3231 IRTemp ta = newTemp(ty); /* EAX */
3232 IRTemp td = newTemp(Ity_I32); /* EDI */
3233
3234 assign( ta, getIReg(sz, R_EAX) );
3235 assign( td, getIReg(4, R_EDI) );
3236
3237 storeLE( mkexpr(td), mkexpr(ta) );
3238
3239 putIReg( 4, R_EDI, binop(Iop_Add32, mkexpr(td), mkexpr(t_inc)) );
3240 }
3241
3242 static
dis_CMPS(Int sz,IRTemp t_inc)3243 void dis_CMPS ( Int sz, IRTemp t_inc )
3244 {
3245 IRType ty = szToITy(sz);
3246 IRTemp tdv = newTemp(ty); /* (EDI) */
3247 IRTemp tsv = newTemp(ty); /* (ESI) */
3248 IRTemp td = newTemp(Ity_I32); /* EDI */
3249 IRTemp ts = newTemp(Ity_I32); /* ESI */
3250
3251 assign( td, getIReg(4, R_EDI) );
3252 assign( ts, getIReg(4, R_ESI) );
3253
3254 assign( tdv, loadLE(ty,mkexpr(td)) );
3255 assign( tsv, loadLE(ty,mkexpr(ts)) );
3256
3257 setFlags_DEP1_DEP2 ( Iop_Sub8, tsv, tdv, ty );
3258
3259 putIReg(4, R_EDI, binop(Iop_Add32, mkexpr(td), mkexpr(t_inc)) );
3260 putIReg(4, R_ESI, binop(Iop_Add32, mkexpr(ts), mkexpr(t_inc)) );
3261 }
3262
3263 static
dis_SCAS(Int sz,IRTemp t_inc)3264 void dis_SCAS ( Int sz, IRTemp t_inc )
3265 {
3266 IRType ty = szToITy(sz);
3267 IRTemp ta = newTemp(ty); /* EAX */
3268 IRTemp td = newTemp(Ity_I32); /* EDI */
3269 IRTemp tdv = newTemp(ty); /* (EDI) */
3270
3271 assign( ta, getIReg(sz, R_EAX) );
3272 assign( td, getIReg(4, R_EDI) );
3273
3274 assign( tdv, loadLE(ty,mkexpr(td)) );
3275 setFlags_DEP1_DEP2 ( Iop_Sub8, ta, tdv, ty );
3276
3277 putIReg(4, R_EDI, binop(Iop_Add32, mkexpr(td), mkexpr(t_inc)) );
3278 }
3279
3280
3281 /* Wrap the appropriate string op inside a REP/REPE/REPNE.
3282 We assume the insn is the last one in the basic block, and so emit a jump
3283 to the next insn, rather than just falling through. */
3284 static
dis_REP_op(DisResult * dres,X86Condcode cond,void (* dis_OP)(Int,IRTemp),Int sz,Addr32 eip,Addr32 eip_next,const HChar * name)3285 void dis_REP_op ( /*MOD*/DisResult* dres,
3286 X86Condcode cond,
3287 void (*dis_OP)(Int, IRTemp),
3288 Int sz, Addr32 eip, Addr32 eip_next, const HChar* name )
3289 {
3290 IRTemp t_inc = newTemp(Ity_I32);
3291 IRTemp tc = newTemp(Ity_I32); /* ECX */
3292
3293 assign( tc, getIReg(4,R_ECX) );
3294
3295 stmt( IRStmt_Exit( binop(Iop_CmpEQ32,mkexpr(tc),mkU32(0)),
3296 Ijk_Boring,
3297 IRConst_U32(eip_next), OFFB_EIP ) );
3298
3299 putIReg(4, R_ECX, binop(Iop_Sub32, mkexpr(tc), mkU32(1)) );
3300
3301 dis_string_op_increment(sz, t_inc);
3302 dis_OP (sz, t_inc);
3303
3304 if (cond == X86CondAlways) {
3305 jmp_lit(dres, Ijk_Boring, eip);
3306 vassert(dres->whatNext == Dis_StopHere);
3307 } else {
3308 stmt( IRStmt_Exit( mk_x86g_calculate_condition(cond),
3309 Ijk_Boring,
3310 IRConst_U32(eip), OFFB_EIP ) );
3311 jmp_lit(dres, Ijk_Boring, eip_next);
3312 vassert(dres->whatNext == Dis_StopHere);
3313 }
3314 DIP("%s%c\n", name, nameISize(sz));
3315 }
3316
3317
3318 /*------------------------------------------------------------*/
3319 /*--- Arithmetic, etc. ---*/
3320 /*------------------------------------------------------------*/
3321
3322 /* IMUL E, G. Supplied eip points to the modR/M byte. */
3323 static
dis_mul_E_G(UChar sorb,Int size,Int delta0)3324 UInt dis_mul_E_G ( UChar sorb,
3325 Int size,
3326 Int delta0 )
3327 {
3328 Int alen;
3329 HChar dis_buf[50];
3330 UChar rm = getIByte(delta0);
3331 IRType ty = szToITy(size);
3332 IRTemp te = newTemp(ty);
3333 IRTemp tg = newTemp(ty);
3334 IRTemp resLo = newTemp(ty);
3335
3336 assign( tg, getIReg(size, gregOfRM(rm)) );
3337 if (epartIsReg(rm)) {
3338 assign( te, getIReg(size, eregOfRM(rm)) );
3339 } else {
3340 IRTemp addr = disAMode( &alen, sorb, delta0, dis_buf );
3341 assign( te, loadLE(ty,mkexpr(addr)) );
3342 }
3343
3344 setFlags_MUL ( ty, te, tg, X86G_CC_OP_SMULB );
3345
3346 assign( resLo, binop( mkSizedOp(ty, Iop_Mul8), mkexpr(te), mkexpr(tg) ) );
3347
3348 putIReg(size, gregOfRM(rm), mkexpr(resLo) );
3349
3350 if (epartIsReg(rm)) {
3351 DIP("imul%c %s, %s\n", nameISize(size),
3352 nameIReg(size,eregOfRM(rm)),
3353 nameIReg(size,gregOfRM(rm)));
3354 return 1+delta0;
3355 } else {
3356 DIP("imul%c %s, %s\n", nameISize(size),
3357 dis_buf, nameIReg(size,gregOfRM(rm)));
3358 return alen+delta0;
3359 }
3360 }
3361
3362
3363 /* IMUL I * E -> G. Supplied eip points to the modR/M byte. */
3364 static
dis_imul_I_E_G(UChar sorb,Int size,Int delta,Int litsize)3365 UInt dis_imul_I_E_G ( UChar sorb,
3366 Int size,
3367 Int delta,
3368 Int litsize )
3369 {
3370 Int d32, alen;
3371 HChar dis_buf[50];
3372 UChar rm = getIByte(delta);
3373 IRType ty = szToITy(size);
3374 IRTemp te = newTemp(ty);
3375 IRTemp tl = newTemp(ty);
3376 IRTemp resLo = newTemp(ty);
3377
3378 vassert(size == 1 || size == 2 || size == 4);
3379
3380 if (epartIsReg(rm)) {
3381 assign(te, getIReg(size, eregOfRM(rm)));
3382 delta++;
3383 } else {
3384 IRTemp addr = disAMode( &alen, sorb, delta, dis_buf );
3385 assign(te, loadLE(ty, mkexpr(addr)));
3386 delta += alen;
3387 }
3388 d32 = getSDisp(litsize,delta);
3389 delta += litsize;
3390
3391 if (size == 1) d32 &= 0xFF;
3392 if (size == 2) d32 &= 0xFFFF;
3393
3394 assign(tl, mkU(ty,d32));
3395
3396 assign( resLo, binop( mkSizedOp(ty, Iop_Mul8), mkexpr(te), mkexpr(tl) ));
3397
3398 setFlags_MUL ( ty, te, tl, X86G_CC_OP_SMULB );
3399
3400 putIReg(size, gregOfRM(rm), mkexpr(resLo));
3401
3402 DIP("imul %d, %s, %s\n", d32,
3403 ( epartIsReg(rm) ? nameIReg(size,eregOfRM(rm)) : dis_buf ),
3404 nameIReg(size,gregOfRM(rm)) );
3405 return delta;
3406 }
3407
3408
3409 /* Generate an IR sequence to do a count-leading-zeroes operation on
3410 the supplied IRTemp, and return a new IRTemp holding the result.
3411 'ty' may be Ity_I16 or Ity_I32 only. In the case where the
3412 argument is zero, return the number of bits in the word (the
3413 natural semantics). */
gen_LZCNT(IRType ty,IRTemp src)3414 static IRTemp gen_LZCNT ( IRType ty, IRTemp src )
3415 {
3416 vassert(ty == Ity_I32 || ty == Ity_I16);
3417
3418 IRTemp src32 = newTemp(Ity_I32);
3419 assign(src32, widenUto32( mkexpr(src) ));
3420
3421 IRTemp src32x = newTemp(Ity_I32);
3422 assign(src32x,
3423 binop(Iop_Shl32, mkexpr(src32),
3424 mkU8(32 - 8 * sizeofIRType(ty))));
3425
3426 // Clz32 has undefined semantics when its input is zero, so
3427 // special-case around that.
3428 IRTemp res32 = newTemp(Ity_I32);
3429 assign(res32,
3430 IRExpr_ITE(
3431 binop(Iop_CmpEQ32, mkexpr(src32x), mkU32(0)),
3432 mkU32(8 * sizeofIRType(ty)),
3433 unop(Iop_Clz32, mkexpr(src32x))
3434 ));
3435
3436 IRTemp res = newTemp(ty);
3437 assign(res, narrowTo(ty, mkexpr(res32)));
3438 return res;
3439 }
3440
3441
3442 /*------------------------------------------------------------*/
3443 /*--- ---*/
3444 /*--- x87 FLOATING POINT INSTRUCTIONS ---*/
3445 /*--- ---*/
3446 /*------------------------------------------------------------*/
3447
3448 /* --- Helper functions for dealing with the register stack. --- */
3449
3450 /* --- Set the emulation-warning pseudo-register. --- */
3451
put_emwarn(IRExpr * e)3452 static void put_emwarn ( IRExpr* e /* :: Ity_I32 */ )
3453 {
3454 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
3455 stmt( IRStmt_Put( OFFB_EMNOTE, e ) );
3456 }
3457
3458 /* --- Produce an IRExpr* denoting a 64-bit QNaN. --- */
3459
mkQNaN64(void)3460 static IRExpr* mkQNaN64 ( void )
3461 {
3462 /* QNaN is 0 2047 1 0(51times)
3463 == 0b 11111111111b 1 0(51times)
3464 == 0x7FF8 0000 0000 0000
3465 */
3466 return IRExpr_Const(IRConst_F64i(0x7FF8000000000000ULL));
3467 }
3468
3469 /* --------- Get/put the top-of-stack pointer. --------- */
3470
get_ftop(void)3471 static IRExpr* get_ftop ( void )
3472 {
3473 return IRExpr_Get( OFFB_FTOP, Ity_I32 );
3474 }
3475
put_ftop(IRExpr * e)3476 static void put_ftop ( IRExpr* e )
3477 {
3478 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
3479 stmt( IRStmt_Put( OFFB_FTOP, e ) );
3480 }
3481
3482 /* --------- Get/put the C3210 bits. --------- */
3483
get_C3210(void)3484 static IRExpr* get_C3210 ( void )
3485 {
3486 return IRExpr_Get( OFFB_FC3210, Ity_I32 );
3487 }
3488
put_C3210(IRExpr * e)3489 static void put_C3210 ( IRExpr* e )
3490 {
3491 stmt( IRStmt_Put( OFFB_FC3210, e ) );
3492 }
3493
3494 /* --------- Get/put the FPU rounding mode. --------- */
get_fpround(void)3495 static IRExpr* /* :: Ity_I32 */ get_fpround ( void )
3496 {
3497 return IRExpr_Get( OFFB_FPROUND, Ity_I32 );
3498 }
3499
put_fpround(IRExpr * e)3500 static void put_fpround ( IRExpr* /* :: Ity_I32 */ e )
3501 {
3502 stmt( IRStmt_Put( OFFB_FPROUND, e ) );
3503 }
3504
3505
3506 /* --------- Synthesise a 2-bit FPU rounding mode. --------- */
3507 /* Produces a value in 0 .. 3, which is encoded as per the type
3508 IRRoundingMode. Since the guest_FPROUND value is also encoded as
3509 per IRRoundingMode, we merely need to get it and mask it for
3510 safety.
3511 */
get_roundingmode(void)3512 static IRExpr* /* :: Ity_I32 */ get_roundingmode ( void )
3513 {
3514 return binop( Iop_And32, get_fpround(), mkU32(3) );
3515 }
3516
get_FAKE_roundingmode(void)3517 static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void )
3518 {
3519 return mkU32(Irrm_NEAREST);
3520 }
3521
3522
3523 /* --------- Get/set FP register tag bytes. --------- */
3524
3525 /* Given i, and some expression e, generate 'ST_TAG(i) = e'. */
3526
put_ST_TAG(Int i,IRExpr * value)3527 static void put_ST_TAG ( Int i, IRExpr* value )
3528 {
3529 IRRegArray* descr;
3530 vassert(typeOfIRExpr(irsb->tyenv, value) == Ity_I8);
3531 descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 );
3532 stmt( IRStmt_PutI( mkIRPutI(descr, get_ftop(), i, value) ) );
3533 }
3534
3535 /* Given i, generate an expression yielding 'ST_TAG(i)'. This will be
3536 zero to indicate "Empty" and nonzero to indicate "NonEmpty". */
3537
get_ST_TAG(Int i)3538 static IRExpr* get_ST_TAG ( Int i )
3539 {
3540 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 );
3541 return IRExpr_GetI( descr, get_ftop(), i );
3542 }
3543
3544
3545 /* --------- Get/set FP registers. --------- */
3546
3547 /* Given i, and some expression e, emit 'ST(i) = e' and set the
3548 register's tag to indicate the register is full. The previous
3549 state of the register is not checked. */
3550
put_ST_UNCHECKED(Int i,IRExpr * value)3551 static void put_ST_UNCHECKED ( Int i, IRExpr* value )
3552 {
3553 IRRegArray* descr;
3554 vassert(typeOfIRExpr(irsb->tyenv, value) == Ity_F64);
3555 descr = mkIRRegArray( OFFB_FPREGS, Ity_F64, 8 );
3556 stmt( IRStmt_PutI( mkIRPutI(descr, get_ftop(), i, value) ) );
3557 /* Mark the register as in-use. */
3558 put_ST_TAG(i, mkU8(1));
3559 }
3560
3561 /* Given i, and some expression e, emit
3562 ST(i) = is_full(i) ? NaN : e
3563 and set the tag accordingly.
3564 */
3565
put_ST(Int i,IRExpr * value)3566 static void put_ST ( Int i, IRExpr* value )
3567 {
3568 put_ST_UNCHECKED(
3569 i,
3570 IRExpr_ITE( binop(Iop_CmpNE8, get_ST_TAG(i), mkU8(0)),
3571 /* non-0 means full */
3572 mkQNaN64(),
3573 /* 0 means empty */
3574 value
3575 )
3576 );
3577 }
3578
3579
3580 /* Given i, generate an expression yielding 'ST(i)'. */
3581
get_ST_UNCHECKED(Int i)3582 static IRExpr* get_ST_UNCHECKED ( Int i )
3583 {
3584 IRRegArray* descr = mkIRRegArray( OFFB_FPREGS, Ity_F64, 8 );
3585 return IRExpr_GetI( descr, get_ftop(), i );
3586 }
3587
3588
3589 /* Given i, generate an expression yielding
3590 is_full(i) ? ST(i) : NaN
3591 */
3592
get_ST(Int i)3593 static IRExpr* get_ST ( Int i )
3594 {
3595 return
3596 IRExpr_ITE( binop(Iop_CmpNE8, get_ST_TAG(i), mkU8(0)),
3597 /* non-0 means full */
3598 get_ST_UNCHECKED(i),
3599 /* 0 means empty */
3600 mkQNaN64());
3601 }
3602
3603
3604 /* Given i, and some expression e, and a condition cond, generate IR
3605 which has the same effect as put_ST(i,e) when cond is true and has
3606 no effect when cond is false. Given the lack of proper
3607 if-then-else in the IR, this is pretty tricky.
3608 */
3609
maybe_put_ST(IRTemp cond,Int i,IRExpr * value)3610 static void maybe_put_ST ( IRTemp cond, Int i, IRExpr* value )
3611 {
3612 // new_tag = if cond then FULL else old_tag
3613 // new_val = if cond then (if old_tag==FULL then NaN else val)
3614 // else old_val
3615
3616 IRTemp old_tag = newTemp(Ity_I8);
3617 assign(old_tag, get_ST_TAG(i));
3618 IRTemp new_tag = newTemp(Ity_I8);
3619 assign(new_tag,
3620 IRExpr_ITE(mkexpr(cond), mkU8(1)/*FULL*/, mkexpr(old_tag)));
3621
3622 IRTemp old_val = newTemp(Ity_F64);
3623 assign(old_val, get_ST_UNCHECKED(i));
3624 IRTemp new_val = newTemp(Ity_F64);
3625 assign(new_val,
3626 IRExpr_ITE(mkexpr(cond),
3627 IRExpr_ITE(binop(Iop_CmpNE8, mkexpr(old_tag), mkU8(0)),
3628 /* non-0 means full */
3629 mkQNaN64(),
3630 /* 0 means empty */
3631 value),
3632 mkexpr(old_val)));
3633
3634 put_ST_UNCHECKED(i, mkexpr(new_val));
3635 // put_ST_UNCHECKED incorrectly sets tag(i) to always be FULL. So
3636 // now set it to new_tag instead.
3637 put_ST_TAG(i, mkexpr(new_tag));
3638 }
3639
3640 /* Adjust FTOP downwards by one register. */
3641
fp_push(void)3642 static void fp_push ( void )
3643 {
3644 put_ftop( binop(Iop_Sub32, get_ftop(), mkU32(1)) );
3645 }
3646
3647 /* Adjust FTOP downwards by one register when COND is 1:I1. Else
3648 don't change it. */
3649
maybe_fp_push(IRTemp cond)3650 static void maybe_fp_push ( IRTemp cond )
3651 {
3652 put_ftop( binop(Iop_Sub32, get_ftop(), unop(Iop_1Uto32,mkexpr(cond))) );
3653 }
3654
3655 /* Adjust FTOP upwards by one register, and mark the vacated register
3656 as empty. */
3657
fp_pop(void)3658 static void fp_pop ( void )
3659 {
3660 put_ST_TAG(0, mkU8(0));
3661 put_ftop( binop(Iop_Add32, get_ftop(), mkU32(1)) );
3662 }
3663
3664 /* Set the C2 bit of the FPU status register to e[0]. Assumes that
3665 e[31:1] == 0.
3666 */
set_C2(IRExpr * e)3667 static void set_C2 ( IRExpr* e )
3668 {
3669 IRExpr* cleared = binop(Iop_And32, get_C3210(), mkU32(~X86G_FC_MASK_C2));
3670 put_C3210( binop(Iop_Or32,
3671 cleared,
3672 binop(Iop_Shl32, e, mkU8(X86G_FC_SHIFT_C2))) );
3673 }
3674
3675 /* Generate code to check that abs(d64) < 2^63 and is finite. This is
3676 used to do the range checks for FSIN, FCOS, FSINCOS and FPTAN. The
3677 test is simple, but the derivation of it is not so simple.
3678
3679 The exponent field for an IEEE754 double is 11 bits. That means it
3680 can take values 0 through 0x7FF. If the exponent has value 0x7FF,
3681 the number is either a NaN or an Infinity and so is not finite.
3682 Furthermore, a finite value of exactly 2^63 is the smallest value
3683 that has exponent value 0x43E. Hence, what we need to do is
3684 extract the exponent, ignoring the sign bit and mantissa, and check
3685 it is < 0x43E, or <= 0x43D.
3686
3687 To make this easily applicable to 32- and 64-bit targets, a
3688 roundabout approach is used. First the number is converted to I64,
3689 then the top 32 bits are taken. Shifting them right by 20 bits
3690 places the sign bit and exponent in the bottom 12 bits. Anding
3691 with 0x7FF gets rid of the sign bit, leaving just the exponent
3692 available for comparison.
3693 */
math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(IRTemp d64)3694 static IRTemp math_IS_TRIG_ARG_FINITE_AND_IN_RANGE ( IRTemp d64 )
3695 {
3696 IRTemp i64 = newTemp(Ity_I64);
3697 assign(i64, unop(Iop_ReinterpF64asI64, mkexpr(d64)) );
3698 IRTemp exponent = newTemp(Ity_I32);
3699 assign(exponent,
3700 binop(Iop_And32,
3701 binop(Iop_Shr32, unop(Iop_64HIto32, mkexpr(i64)), mkU8(20)),
3702 mkU32(0x7FF)));
3703 IRTemp in_range_and_finite = newTemp(Ity_I1);
3704 assign(in_range_and_finite,
3705 binop(Iop_CmpLE32U, mkexpr(exponent), mkU32(0x43D)));
3706 return in_range_and_finite;
3707 }
3708
3709 /* Invent a plausible-looking FPU status word value:
3710 ((ftop & 7) << 11) | (c3210 & 0x4700)
3711 */
get_FPU_sw(void)3712 static IRExpr* get_FPU_sw ( void )
3713 {
3714 return
3715 unop(Iop_32to16,
3716 binop(Iop_Or32,
3717 binop(Iop_Shl32,
3718 binop(Iop_And32, get_ftop(), mkU32(7)),
3719 mkU8(11)),
3720 binop(Iop_And32, get_C3210(), mkU32(0x4700))
3721 ));
3722 }
3723
3724
3725 /* ------------------------------------------------------- */
3726 /* Given all that stack-mangling junk, we can now go ahead
3727 and describe FP instructions.
3728 */
3729
3730 /* ST(0) = ST(0) `op` mem64/32(addr)
3731 Need to check ST(0)'s tag on read, but not on write.
3732 */
3733 static
fp_do_op_mem_ST_0(IRTemp addr,const HChar * op_txt,HChar * dis_buf,IROp op,Bool dbl)3734 void fp_do_op_mem_ST_0 ( IRTemp addr, const HChar* op_txt, HChar* dis_buf,
3735 IROp op, Bool dbl )
3736 {
3737 DIP("f%s%c %s\n", op_txt, dbl?'l':'s', dis_buf);
3738 if (dbl) {
3739 put_ST_UNCHECKED(0,
3740 triop( op,
3741 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
3742 get_ST(0),
3743 loadLE(Ity_F64,mkexpr(addr))
3744 ));
3745 } else {
3746 put_ST_UNCHECKED(0,
3747 triop( op,
3748 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
3749 get_ST(0),
3750 unop(Iop_F32toF64, loadLE(Ity_F32,mkexpr(addr)))
3751 ));
3752 }
3753 }
3754
3755
3756 /* ST(0) = mem64/32(addr) `op` ST(0)
3757 Need to check ST(0)'s tag on read, but not on write.
3758 */
3759 static
fp_do_oprev_mem_ST_0(IRTemp addr,const HChar * op_txt,HChar * dis_buf,IROp op,Bool dbl)3760 void fp_do_oprev_mem_ST_0 ( IRTemp addr, const HChar* op_txt, HChar* dis_buf,
3761 IROp op, Bool dbl )
3762 {
3763 DIP("f%s%c %s\n", op_txt, dbl?'l':'s', dis_buf);
3764 if (dbl) {
3765 put_ST_UNCHECKED(0,
3766 triop( op,
3767 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
3768 loadLE(Ity_F64,mkexpr(addr)),
3769 get_ST(0)
3770 ));
3771 } else {
3772 put_ST_UNCHECKED(0,
3773 triop( op,
3774 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
3775 unop(Iop_F32toF64, loadLE(Ity_F32,mkexpr(addr))),
3776 get_ST(0)
3777 ));
3778 }
3779 }
3780
3781
3782 /* ST(dst) = ST(dst) `op` ST(src).
3783 Check dst and src tags when reading but not on write.
3784 */
3785 static
fp_do_op_ST_ST(const HChar * op_txt,IROp op,UInt st_src,UInt st_dst,Bool pop_after)3786 void fp_do_op_ST_ST ( const HChar* op_txt, IROp op, UInt st_src, UInt st_dst,
3787 Bool pop_after )
3788 {
3789 DIP("f%s%s st(%u), st(%u)\n", op_txt, pop_after?"p":"",
3790 st_src, st_dst);
3791 put_ST_UNCHECKED(
3792 st_dst,
3793 triop( op,
3794 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
3795 get_ST(st_dst),
3796 get_ST(st_src) )
3797 );
3798 if (pop_after)
3799 fp_pop();
3800 }
3801
3802 /* ST(dst) = ST(src) `op` ST(dst).
3803 Check dst and src tags when reading but not on write.
3804 */
3805 static
fp_do_oprev_ST_ST(const HChar * op_txt,IROp op,UInt st_src,UInt st_dst,Bool pop_after)3806 void fp_do_oprev_ST_ST ( const HChar* op_txt, IROp op, UInt st_src,
3807 UInt st_dst, Bool pop_after )
3808 {
3809 DIP("f%s%s st(%u), st(%u)\n", op_txt, pop_after?"p":"",
3810 st_src, st_dst);
3811 put_ST_UNCHECKED(
3812 st_dst,
3813 triop( op,
3814 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
3815 get_ST(st_src),
3816 get_ST(st_dst) )
3817 );
3818 if (pop_after)
3819 fp_pop();
3820 }
3821
3822 /* %eflags(Z,P,C) = UCOMI( st(0), st(i) ) */
fp_do_ucomi_ST0_STi(UInt i,Bool pop_after)3823 static void fp_do_ucomi_ST0_STi ( UInt i, Bool pop_after )
3824 {
3825 DIP("fucomi%s %%st(0),%%st(%u)\n", pop_after ? "p" : "", i);
3826 /* This is a bit of a hack (and isn't really right). It sets
3827 Z,P,C,O correctly, but forces A and S to zero, whereas the Intel
3828 documentation implies A and S are unchanged.
3829 */
3830 /* It's also fishy in that it is used both for COMIP and
3831 UCOMIP, and they aren't the same (although similar). */
3832 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
3833 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
3834 stmt( IRStmt_Put( OFFB_CC_DEP1,
3835 binop( Iop_And32,
3836 binop(Iop_CmpF64, get_ST(0), get_ST(i)),
3837 mkU32(0x45)
3838 )));
3839 /* Set NDEP even though it isn't used. This makes redundant-PUT
3840 elimination of previous stores to this field work better. */
3841 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
3842 if (pop_after)
3843 fp_pop();
3844 }
3845
3846
3847 static
dis_FPU(Bool * decode_ok,UChar sorb,Int delta)3848 UInt dis_FPU ( Bool* decode_ok, UChar sorb, Int delta )
3849 {
3850 Int len;
3851 UInt r_src, r_dst;
3852 HChar dis_buf[50];
3853 IRTemp t1, t2;
3854
3855 /* On entry, delta points at the second byte of the insn (the modrm
3856 byte).*/
3857 UChar first_opcode = getIByte(delta-1);
3858 UChar modrm = getIByte(delta+0);
3859
3860 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD8 opcodes +-+-+-+-+-+-+-+ */
3861
3862 if (first_opcode == 0xD8) {
3863 if (modrm < 0xC0) {
3864
3865 /* bits 5,4,3 are an opcode extension, and the modRM also
3866 specifies an address. */
3867 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
3868 delta += len;
3869
3870 switch (gregOfRM(modrm)) {
3871
3872 case 0: /* FADD single-real */
3873 fp_do_op_mem_ST_0 ( addr, "add", dis_buf, Iop_AddF64, False );
3874 break;
3875
3876 case 1: /* FMUL single-real */
3877 fp_do_op_mem_ST_0 ( addr, "mul", dis_buf, Iop_MulF64, False );
3878 break;
3879
3880 case 2: /* FCOM single-real */
3881 DIP("fcoms %s\n", dis_buf);
3882 /* This forces C1 to zero, which isn't right. */
3883 put_C3210(
3884 binop( Iop_And32,
3885 binop(Iop_Shl32,
3886 binop(Iop_CmpF64,
3887 get_ST(0),
3888 unop(Iop_F32toF64,
3889 loadLE(Ity_F32,mkexpr(addr)))),
3890 mkU8(8)),
3891 mkU32(0x4500)
3892 ));
3893 break;
3894
3895 case 3: /* FCOMP single-real */
3896 DIP("fcomps %s\n", dis_buf);
3897 /* This forces C1 to zero, which isn't right. */
3898 put_C3210(
3899 binop( Iop_And32,
3900 binop(Iop_Shl32,
3901 binop(Iop_CmpF64,
3902 get_ST(0),
3903 unop(Iop_F32toF64,
3904 loadLE(Ity_F32,mkexpr(addr)))),
3905 mkU8(8)),
3906 mkU32(0x4500)
3907 ));
3908 fp_pop();
3909 break;
3910
3911 case 4: /* FSUB single-real */
3912 fp_do_op_mem_ST_0 ( addr, "sub", dis_buf, Iop_SubF64, False );
3913 break;
3914
3915 case 5: /* FSUBR single-real */
3916 fp_do_oprev_mem_ST_0 ( addr, "subr", dis_buf, Iop_SubF64, False );
3917 break;
3918
3919 case 6: /* FDIV single-real */
3920 fp_do_op_mem_ST_0 ( addr, "div", dis_buf, Iop_DivF64, False );
3921 break;
3922
3923 case 7: /* FDIVR single-real */
3924 fp_do_oprev_mem_ST_0 ( addr, "divr", dis_buf, Iop_DivF64, False );
3925 break;
3926
3927 default:
3928 vex_printf("unhandled opc_aux = 0x%2x\n", (UInt)gregOfRM(modrm));
3929 vex_printf("first_opcode == 0xD8\n");
3930 goto decode_fail;
3931 }
3932 } else {
3933 delta++;
3934 switch (modrm) {
3935
3936 case 0xC0 ... 0xC7: /* FADD %st(?),%st(0) */
3937 fp_do_op_ST_ST ( "add", Iop_AddF64, modrm - 0xC0, 0, False );
3938 break;
3939
3940 case 0xC8 ... 0xCF: /* FMUL %st(?),%st(0) */
3941 fp_do_op_ST_ST ( "mul", Iop_MulF64, modrm - 0xC8, 0, False );
3942 break;
3943
3944 /* Dunno if this is right */
3945 case 0xD0 ... 0xD7: /* FCOM %st(?),%st(0) */
3946 r_dst = (UInt)modrm - 0xD0;
3947 DIP("fcom %%st(0),%%st(%u)\n", r_dst);
3948 /* This forces C1 to zero, which isn't right. */
3949 put_C3210(
3950 binop( Iop_And32,
3951 binop(Iop_Shl32,
3952 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)),
3953 mkU8(8)),
3954 mkU32(0x4500)
3955 ));
3956 break;
3957
3958 /* Dunno if this is right */
3959 case 0xD8 ... 0xDF: /* FCOMP %st(?),%st(0) */
3960 r_dst = (UInt)modrm - 0xD8;
3961 DIP("fcomp %%st(0),%%st(%u)\n", r_dst);
3962 /* This forces C1 to zero, which isn't right. */
3963 put_C3210(
3964 binop( Iop_And32,
3965 binop(Iop_Shl32,
3966 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)),
3967 mkU8(8)),
3968 mkU32(0x4500)
3969 ));
3970 fp_pop();
3971 break;
3972
3973 case 0xE0 ... 0xE7: /* FSUB %st(?),%st(0) */
3974 fp_do_op_ST_ST ( "sub", Iop_SubF64, modrm - 0xE0, 0, False );
3975 break;
3976
3977 case 0xE8 ... 0xEF: /* FSUBR %st(?),%st(0) */
3978 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, modrm - 0xE8, 0, False );
3979 break;
3980
3981 case 0xF0 ... 0xF7: /* FDIV %st(?),%st(0) */
3982 fp_do_op_ST_ST ( "div", Iop_DivF64, modrm - 0xF0, 0, False );
3983 break;
3984
3985 case 0xF8 ... 0xFF: /* FDIVR %st(?),%st(0) */
3986 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, modrm - 0xF8, 0, False );
3987 break;
3988
3989 default:
3990 goto decode_fail;
3991 }
3992 }
3993 }
3994
3995 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD9 opcodes +-+-+-+-+-+-+-+ */
3996 else
3997 if (first_opcode == 0xD9) {
3998 if (modrm < 0xC0) {
3999
4000 /* bits 5,4,3 are an opcode extension, and the modRM also
4001 specifies an address. */
4002 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
4003 delta += len;
4004
4005 switch (gregOfRM(modrm)) {
4006
4007 case 0: /* FLD single-real */
4008 DIP("flds %s\n", dis_buf);
4009 fp_push();
4010 put_ST(0, unop(Iop_F32toF64,
4011 loadLE(Ity_F32, mkexpr(addr))));
4012 break;
4013
4014 case 2: /* FST single-real */
4015 DIP("fsts %s\n", dis_buf);
4016 storeLE(mkexpr(addr),
4017 binop(Iop_F64toF32, get_roundingmode(), get_ST(0)));
4018 break;
4019
4020 case 3: /* FSTP single-real */
4021 DIP("fstps %s\n", dis_buf);
4022 storeLE(mkexpr(addr),
4023 binop(Iop_F64toF32, get_roundingmode(), get_ST(0)));
4024 fp_pop();
4025 break;
4026
4027 case 4: { /* FLDENV m28 */
4028 /* Uses dirty helper:
4029 VexEmNote x86g_do_FLDENV ( VexGuestX86State*, HWord ) */
4030 IRTemp ew = newTemp(Ity_I32);
4031 IRDirty* d = unsafeIRDirty_0_N (
4032 0/*regparms*/,
4033 "x86g_dirtyhelper_FLDENV",
4034 &x86g_dirtyhelper_FLDENV,
4035 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr) )
4036 );
4037 d->tmp = ew;
4038 /* declare we're reading memory */
4039 d->mFx = Ifx_Read;
4040 d->mAddr = mkexpr(addr);
4041 d->mSize = 28;
4042
4043 /* declare we're writing guest state */
4044 d->nFxState = 4;
4045 vex_bzero(&d->fxState, sizeof(d->fxState));
4046
4047 d->fxState[0].fx = Ifx_Write;
4048 d->fxState[0].offset = OFFB_FTOP;
4049 d->fxState[0].size = sizeof(UInt);
4050
4051 d->fxState[1].fx = Ifx_Write;
4052 d->fxState[1].offset = OFFB_FPTAGS;
4053 d->fxState[1].size = 8 * sizeof(UChar);
4054
4055 d->fxState[2].fx = Ifx_Write;
4056 d->fxState[2].offset = OFFB_FPROUND;
4057 d->fxState[2].size = sizeof(UInt);
4058
4059 d->fxState[3].fx = Ifx_Write;
4060 d->fxState[3].offset = OFFB_FC3210;
4061 d->fxState[3].size = sizeof(UInt);
4062
4063 stmt( IRStmt_Dirty(d) );
4064
4065 /* ew contains any emulation warning we may need to
4066 issue. If needed, side-exit to the next insn,
4067 reporting the warning, so that Valgrind's dispatcher
4068 sees the warning. */
4069 put_emwarn( mkexpr(ew) );
4070 stmt(
4071 IRStmt_Exit(
4072 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)),
4073 Ijk_EmWarn,
4074 IRConst_U32( ((Addr32)guest_EIP_bbstart)+delta),
4075 OFFB_EIP
4076 )
4077 );
4078
4079 DIP("fldenv %s\n", dis_buf);
4080 break;
4081 }
4082
4083 case 5: {/* FLDCW */
4084 /* The only thing we observe in the control word is the
4085 rounding mode. Therefore, pass the 16-bit value
4086 (x87 native-format control word) to a clean helper,
4087 getting back a 64-bit value, the lower half of which
4088 is the FPROUND value to store, and the upper half of
4089 which is the emulation-warning token which may be
4090 generated.
4091 */
4092 /* ULong x86h_check_fldcw ( UInt ); */
4093 IRTemp t64 = newTemp(Ity_I64);
4094 IRTemp ew = newTemp(Ity_I32);
4095 DIP("fldcw %s\n", dis_buf);
4096 assign( t64, mkIRExprCCall(
4097 Ity_I64, 0/*regparms*/,
4098 "x86g_check_fldcw",
4099 &x86g_check_fldcw,
4100 mkIRExprVec_1(
4101 unop( Iop_16Uto32,
4102 loadLE(Ity_I16, mkexpr(addr)))
4103 )
4104 )
4105 );
4106
4107 put_fpround( unop(Iop_64to32, mkexpr(t64)) );
4108 assign( ew, unop(Iop_64HIto32, mkexpr(t64) ) );
4109 put_emwarn( mkexpr(ew) );
4110 /* Finally, if an emulation warning was reported,
4111 side-exit to the next insn, reporting the warning,
4112 so that Valgrind's dispatcher sees the warning. */
4113 stmt(
4114 IRStmt_Exit(
4115 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)),
4116 Ijk_EmWarn,
4117 IRConst_U32( ((Addr32)guest_EIP_bbstart)+delta),
4118 OFFB_EIP
4119 )
4120 );
4121 break;
4122 }
4123
4124 case 6: { /* FNSTENV m28 */
4125 /* Uses dirty helper:
4126 void x86g_do_FSTENV ( VexGuestX86State*, HWord ) */
4127 IRDirty* d = unsafeIRDirty_0_N (
4128 0/*regparms*/,
4129 "x86g_dirtyhelper_FSTENV",
4130 &x86g_dirtyhelper_FSTENV,
4131 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr) )
4132 );
4133 /* declare we're writing memory */
4134 d->mFx = Ifx_Write;
4135 d->mAddr = mkexpr(addr);
4136 d->mSize = 28;
4137
4138 /* declare we're reading guest state */
4139 d->nFxState = 4;
4140 vex_bzero(&d->fxState, sizeof(d->fxState));
4141
4142 d->fxState[0].fx = Ifx_Read;
4143 d->fxState[0].offset = OFFB_FTOP;
4144 d->fxState[0].size = sizeof(UInt);
4145
4146 d->fxState[1].fx = Ifx_Read;
4147 d->fxState[1].offset = OFFB_FPTAGS;
4148 d->fxState[1].size = 8 * sizeof(UChar);
4149
4150 d->fxState[2].fx = Ifx_Read;
4151 d->fxState[2].offset = OFFB_FPROUND;
4152 d->fxState[2].size = sizeof(UInt);
4153
4154 d->fxState[3].fx = Ifx_Read;
4155 d->fxState[3].offset = OFFB_FC3210;
4156 d->fxState[3].size = sizeof(UInt);
4157
4158 stmt( IRStmt_Dirty(d) );
4159
4160 DIP("fnstenv %s\n", dis_buf);
4161 break;
4162 }
4163
4164 case 7: /* FNSTCW */
4165 /* Fake up a native x87 FPU control word. The only
4166 thing it depends on is FPROUND[1:0], so call a clean
4167 helper to cook it up. */
4168 /* UInt x86h_create_fpucw ( UInt fpround ) */
4169 DIP("fnstcw %s\n", dis_buf);
4170 storeLE(
4171 mkexpr(addr),
4172 unop( Iop_32to16,
4173 mkIRExprCCall(
4174 Ity_I32, 0/*regp*/,
4175 "x86g_create_fpucw", &x86g_create_fpucw,
4176 mkIRExprVec_1( get_fpround() )
4177 )
4178 )
4179 );
4180 break;
4181
4182 default:
4183 vex_printf("unhandled opc_aux = 0x%2x\n", (UInt)gregOfRM(modrm));
4184 vex_printf("first_opcode == 0xD9\n");
4185 goto decode_fail;
4186 }
4187
4188 } else {
4189 delta++;
4190 switch (modrm) {
4191
4192 case 0xC0 ... 0xC7: /* FLD %st(?) */
4193 r_src = (UInt)modrm - 0xC0;
4194 DIP("fld %%st(%u)\n", r_src);
4195 t1 = newTemp(Ity_F64);
4196 assign(t1, get_ST(r_src));
4197 fp_push();
4198 put_ST(0, mkexpr(t1));
4199 break;
4200
4201 case 0xC8 ... 0xCF: /* FXCH %st(?) */
4202 r_src = (UInt)modrm - 0xC8;
4203 DIP("fxch %%st(%u)\n", r_src);
4204 t1 = newTemp(Ity_F64);
4205 t2 = newTemp(Ity_F64);
4206 assign(t1, get_ST(0));
4207 assign(t2, get_ST(r_src));
4208 put_ST_UNCHECKED(0, mkexpr(t2));
4209 put_ST_UNCHECKED(r_src, mkexpr(t1));
4210 break;
4211
4212 case 0xE0: /* FCHS */
4213 DIP("fchs\n");
4214 put_ST_UNCHECKED(0, unop(Iop_NegF64, get_ST(0)));
4215 break;
4216
4217 case 0xE1: /* FABS */
4218 DIP("fabs\n");
4219 put_ST_UNCHECKED(0, unop(Iop_AbsF64, get_ST(0)));
4220 break;
4221
4222 case 0xE4: /* FTST */
4223 DIP("ftst\n");
4224 /* This forces C1 to zero, which isn't right. */
4225 /* Well, in fact the Intel docs say (bizarrely): "C1 is
4226 set to 0 if stack underflow occurred; otherwise, set
4227 to 0" which is pretty nonsensical. I guess it's a
4228 typo. */
4229 put_C3210(
4230 binop( Iop_And32,
4231 binop(Iop_Shl32,
4232 binop(Iop_CmpF64,
4233 get_ST(0),
4234 IRExpr_Const(IRConst_F64i(0x0ULL))),
4235 mkU8(8)),
4236 mkU32(0x4500)
4237 ));
4238 break;
4239
4240 case 0xE5: { /* FXAM */
4241 /* This is an interesting one. It examines %st(0),
4242 regardless of whether the tag says it's empty or not.
4243 Here, just pass both the tag (in our format) and the
4244 value (as a double, actually a ULong) to a helper
4245 function. */
4246 IRExpr** args
4247 = mkIRExprVec_2( unop(Iop_8Uto32, get_ST_TAG(0)),
4248 unop(Iop_ReinterpF64asI64,
4249 get_ST_UNCHECKED(0)) );
4250 put_C3210(mkIRExprCCall(
4251 Ity_I32,
4252 0/*regparm*/,
4253 "x86g_calculate_FXAM", &x86g_calculate_FXAM,
4254 args
4255 ));
4256 DIP("fxam\n");
4257 break;
4258 }
4259
4260 case 0xE8: /* FLD1 */
4261 DIP("fld1\n");
4262 fp_push();
4263 /* put_ST(0, IRExpr_Const(IRConst_F64(1.0))); */
4264 put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff0000000000000ULL)));
4265 break;
4266
4267 case 0xE9: /* FLDL2T */
4268 DIP("fldl2t\n");
4269 fp_push();
4270 /* put_ST(0, IRExpr_Const(IRConst_F64(3.32192809488736234781))); */
4271 put_ST(0, IRExpr_Const(IRConst_F64i(0x400a934f0979a371ULL)));
4272 break;
4273
4274 case 0xEA: /* FLDL2E */
4275 DIP("fldl2e\n");
4276 fp_push();
4277 /* put_ST(0, IRExpr_Const(IRConst_F64(1.44269504088896340739))); */
4278 put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff71547652b82feULL)));
4279 break;
4280
4281 case 0xEB: /* FLDPI */
4282 DIP("fldpi\n");
4283 fp_push();
4284 /* put_ST(0, IRExpr_Const(IRConst_F64(3.14159265358979323851))); */
4285 put_ST(0, IRExpr_Const(IRConst_F64i(0x400921fb54442d18ULL)));
4286 break;
4287
4288 case 0xEC: /* FLDLG2 */
4289 DIP("fldlg2\n");
4290 fp_push();
4291 /* put_ST(0, IRExpr_Const(IRConst_F64(0.301029995663981143))); */
4292 put_ST(0, IRExpr_Const(IRConst_F64i(0x3fd34413509f79ffULL)));
4293 break;
4294
4295 case 0xED: /* FLDLN2 */
4296 DIP("fldln2\n");
4297 fp_push();
4298 /* put_ST(0, IRExpr_Const(IRConst_F64(0.69314718055994530942))); */
4299 put_ST(0, IRExpr_Const(IRConst_F64i(0x3fe62e42fefa39efULL)));
4300 break;
4301
4302 case 0xEE: /* FLDZ */
4303 DIP("fldz\n");
4304 fp_push();
4305 /* put_ST(0, IRExpr_Const(IRConst_F64(0.0))); */
4306 put_ST(0, IRExpr_Const(IRConst_F64i(0x0000000000000000ULL)));
4307 break;
4308
4309 case 0xF0: /* F2XM1 */
4310 DIP("f2xm1\n");
4311 put_ST_UNCHECKED(0,
4312 binop(Iop_2xm1F64,
4313 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4314 get_ST(0)));
4315 break;
4316
4317 case 0xF1: /* FYL2X */
4318 DIP("fyl2x\n");
4319 put_ST_UNCHECKED(1,
4320 triop(Iop_Yl2xF64,
4321 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4322 get_ST(1),
4323 get_ST(0)));
4324 fp_pop();
4325 break;
4326
4327 case 0xF2: { /* FPTAN */
4328 DIP("fptan\n");
4329 IRTemp argD = newTemp(Ity_F64);
4330 assign(argD, get_ST(0));
4331 IRTemp argOK = math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD);
4332 IRTemp resD = newTemp(Ity_F64);
4333 assign(resD,
4334 IRExpr_ITE(
4335 mkexpr(argOK),
4336 binop(Iop_TanF64,
4337 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4338 mkexpr(argD)),
4339 mkexpr(argD))
4340 );
4341 put_ST_UNCHECKED(0, mkexpr(resD));
4342 /* Conditionally push 1.0 on the stack, if the arg is
4343 in range */
4344 maybe_fp_push(argOK);
4345 maybe_put_ST(argOK, 0,
4346 IRExpr_Const(IRConst_F64(1.0)));
4347 set_C2( binop(Iop_Xor32,
4348 unop(Iop_1Uto32, mkexpr(argOK)),
4349 mkU32(1)) );
4350 break;
4351 }
4352
4353 case 0xF3: /* FPATAN */
4354 DIP("fpatan\n");
4355 put_ST_UNCHECKED(1,
4356 triop(Iop_AtanF64,
4357 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4358 get_ST(1),
4359 get_ST(0)));
4360 fp_pop();
4361 break;
4362
4363 case 0xF4: { /* FXTRACT */
4364 IRTemp argF = newTemp(Ity_F64);
4365 IRTemp sigF = newTemp(Ity_F64);
4366 IRTemp expF = newTemp(Ity_F64);
4367 IRTemp argI = newTemp(Ity_I64);
4368 IRTemp sigI = newTemp(Ity_I64);
4369 IRTemp expI = newTemp(Ity_I64);
4370 DIP("fxtract\n");
4371 assign( argF, get_ST(0) );
4372 assign( argI, unop(Iop_ReinterpF64asI64, mkexpr(argF)));
4373 assign( sigI,
4374 mkIRExprCCall(
4375 Ity_I64, 0/*regparms*/,
4376 "x86amd64g_calculate_FXTRACT",
4377 &x86amd64g_calculate_FXTRACT,
4378 mkIRExprVec_2( mkexpr(argI),
4379 mkIRExpr_HWord(0)/*sig*/ ))
4380 );
4381 assign( expI,
4382 mkIRExprCCall(
4383 Ity_I64, 0/*regparms*/,
4384 "x86amd64g_calculate_FXTRACT",
4385 &x86amd64g_calculate_FXTRACT,
4386 mkIRExprVec_2( mkexpr(argI),
4387 mkIRExpr_HWord(1)/*exp*/ ))
4388 );
4389 assign( sigF, unop(Iop_ReinterpI64asF64, mkexpr(sigI)) );
4390 assign( expF, unop(Iop_ReinterpI64asF64, mkexpr(expI)) );
4391 /* exponent */
4392 put_ST_UNCHECKED(0, mkexpr(expF) );
4393 fp_push();
4394 /* significand */
4395 put_ST(0, mkexpr(sigF) );
4396 break;
4397 }
4398
4399 case 0xF5: { /* FPREM1 -- IEEE compliant */
4400 IRTemp a1 = newTemp(Ity_F64);
4401 IRTemp a2 = newTemp(Ity_F64);
4402 DIP("fprem1\n");
4403 /* Do FPREM1 twice, once to get the remainder, and once
4404 to get the C3210 flag values. */
4405 assign( a1, get_ST(0) );
4406 assign( a2, get_ST(1) );
4407 put_ST_UNCHECKED(0,
4408 triop(Iop_PRem1F64,
4409 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4410 mkexpr(a1),
4411 mkexpr(a2)));
4412 put_C3210(
4413 triop(Iop_PRem1C3210F64,
4414 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4415 mkexpr(a1),
4416 mkexpr(a2)) );
4417 break;
4418 }
4419
4420 case 0xF7: /* FINCSTP */
4421 DIP("fprem\n");
4422 put_ftop( binop(Iop_Add32, get_ftop(), mkU32(1)) );
4423 break;
4424
4425 case 0xF8: { /* FPREM -- not IEEE compliant */
4426 IRTemp a1 = newTemp(Ity_F64);
4427 IRTemp a2 = newTemp(Ity_F64);
4428 DIP("fprem\n");
4429 /* Do FPREM twice, once to get the remainder, and once
4430 to get the C3210 flag values. */
4431 assign( a1, get_ST(0) );
4432 assign( a2, get_ST(1) );
4433 put_ST_UNCHECKED(0,
4434 triop(Iop_PRemF64,
4435 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4436 mkexpr(a1),
4437 mkexpr(a2)));
4438 put_C3210(
4439 triop(Iop_PRemC3210F64,
4440 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4441 mkexpr(a1),
4442 mkexpr(a2)) );
4443 break;
4444 }
4445
4446 case 0xF9: /* FYL2XP1 */
4447 DIP("fyl2xp1\n");
4448 put_ST_UNCHECKED(1,
4449 triop(Iop_Yl2xp1F64,
4450 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4451 get_ST(1),
4452 get_ST(0)));
4453 fp_pop();
4454 break;
4455
4456 case 0xFA: /* FSQRT */
4457 DIP("fsqrt\n");
4458 put_ST_UNCHECKED(0,
4459 binop(Iop_SqrtF64,
4460 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4461 get_ST(0)));
4462 break;
4463
4464 case 0xFB: { /* FSINCOS */
4465 DIP("fsincos\n");
4466 IRTemp argD = newTemp(Ity_F64);
4467 assign(argD, get_ST(0));
4468 IRTemp argOK = math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD);
4469 IRTemp resD = newTemp(Ity_F64);
4470 assign(resD,
4471 IRExpr_ITE(
4472 mkexpr(argOK),
4473 binop(Iop_SinF64,
4474 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4475 mkexpr(argD)),
4476 mkexpr(argD))
4477 );
4478 put_ST_UNCHECKED(0, mkexpr(resD));
4479 /* Conditionally push the cos value on the stack, if
4480 the arg is in range */
4481 maybe_fp_push(argOK);
4482 maybe_put_ST(argOK, 0,
4483 binop(Iop_CosF64,
4484 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4485 mkexpr(argD)));
4486 set_C2( binop(Iop_Xor32,
4487 unop(Iop_1Uto32, mkexpr(argOK)),
4488 mkU32(1)) );
4489 break;
4490 }
4491
4492 case 0xFC: /* FRNDINT */
4493 DIP("frndint\n");
4494 put_ST_UNCHECKED(0,
4495 binop(Iop_RoundF64toInt, get_roundingmode(), get_ST(0)) );
4496 break;
4497
4498 case 0xFD: /* FSCALE */
4499 DIP("fscale\n");
4500 put_ST_UNCHECKED(0,
4501 triop(Iop_ScaleF64,
4502 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4503 get_ST(0),
4504 get_ST(1)));
4505 break;
4506
4507 case 0xFE: /* FSIN */
4508 case 0xFF: { /* FCOS */
4509 Bool isSIN = modrm == 0xFE;
4510 DIP("%s\n", isSIN ? "fsin" : "fcos");
4511 IRTemp argD = newTemp(Ity_F64);
4512 assign(argD, get_ST(0));
4513 IRTemp argOK = math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD);
4514 IRTemp resD = newTemp(Ity_F64);
4515 assign(resD,
4516 IRExpr_ITE(
4517 mkexpr(argOK),
4518 binop(isSIN ? Iop_SinF64 : Iop_CosF64,
4519 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4520 mkexpr(argD)),
4521 mkexpr(argD))
4522 );
4523 put_ST_UNCHECKED(0, mkexpr(resD));
4524 set_C2( binop(Iop_Xor32,
4525 unop(Iop_1Uto32, mkexpr(argOK)),
4526 mkU32(1)) );
4527 break;
4528 }
4529
4530 default:
4531 goto decode_fail;
4532 }
4533 }
4534 }
4535
4536 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDA opcodes +-+-+-+-+-+-+-+ */
4537 else
4538 if (first_opcode == 0xDA) {
4539
4540 if (modrm < 0xC0) {
4541
4542 /* bits 5,4,3 are an opcode extension, and the modRM also
4543 specifies an address. */
4544 IROp fop;
4545 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
4546 delta += len;
4547 switch (gregOfRM(modrm)) {
4548
4549 case 0: /* FIADD m32int */ /* ST(0) += m32int */
4550 DIP("fiaddl %s\n", dis_buf);
4551 fop = Iop_AddF64;
4552 goto do_fop_m32;
4553
4554 case 1: /* FIMUL m32int */ /* ST(0) *= m32int */
4555 DIP("fimull %s\n", dis_buf);
4556 fop = Iop_MulF64;
4557 goto do_fop_m32;
4558
4559 case 2: /* FICOM m32int */
4560 DIP("ficoml %s\n", dis_buf);
4561 /* This forces C1 to zero, which isn't right. */
4562 put_C3210(
4563 binop( Iop_And32,
4564 binop(Iop_Shl32,
4565 binop(Iop_CmpF64,
4566 get_ST(0),
4567 unop(Iop_I32StoF64,
4568 loadLE(Ity_I32,mkexpr(addr)))),
4569 mkU8(8)),
4570 mkU32(0x4500)
4571 ));
4572 break;
4573
4574 case 3: /* FICOMP m32int */
4575 DIP("ficompl %s\n", dis_buf);
4576 /* This forces C1 to zero, which isn't right. */
4577 put_C3210(
4578 binop( Iop_And32,
4579 binop(Iop_Shl32,
4580 binop(Iop_CmpF64,
4581 get_ST(0),
4582 unop(Iop_I32StoF64,
4583 loadLE(Ity_I32,mkexpr(addr)))),
4584 mkU8(8)),
4585 mkU32(0x4500)
4586 ));
4587 fp_pop();
4588 break;
4589
4590 case 4: /* FISUB m32int */ /* ST(0) -= m32int */
4591 DIP("fisubl %s\n", dis_buf);
4592 fop = Iop_SubF64;
4593 goto do_fop_m32;
4594
4595 case 5: /* FISUBR m32int */ /* ST(0) = m32int - ST(0) */
4596 DIP("fisubrl %s\n", dis_buf);
4597 fop = Iop_SubF64;
4598 goto do_foprev_m32;
4599
4600 case 6: /* FIDIV m32int */ /* ST(0) /= m32int */
4601 DIP("fidivl %s\n", dis_buf);
4602 fop = Iop_DivF64;
4603 goto do_fop_m32;
4604
4605 case 7: /* FIDIVR m32int */ /* ST(0) = m32int / ST(0) */
4606 DIP("fidivrl %s\n", dis_buf);
4607 fop = Iop_DivF64;
4608 goto do_foprev_m32;
4609
4610 do_fop_m32:
4611 put_ST_UNCHECKED(0,
4612 triop(fop,
4613 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4614 get_ST(0),
4615 unop(Iop_I32StoF64,
4616 loadLE(Ity_I32, mkexpr(addr)))));
4617 break;
4618
4619 do_foprev_m32:
4620 put_ST_UNCHECKED(0,
4621 triop(fop,
4622 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4623 unop(Iop_I32StoF64,
4624 loadLE(Ity_I32, mkexpr(addr))),
4625 get_ST(0)));
4626 break;
4627
4628 default:
4629 vex_printf("unhandled opc_aux = 0x%2x\n", (UInt)gregOfRM(modrm));
4630 vex_printf("first_opcode == 0xDA\n");
4631 goto decode_fail;
4632 }
4633
4634 } else {
4635
4636 delta++;
4637 switch (modrm) {
4638
4639 case 0xC0 ... 0xC7: /* FCMOVB ST(i), ST(0) */
4640 r_src = (UInt)modrm - 0xC0;
4641 DIP("fcmovb %%st(%u), %%st(0)\n", r_src);
4642 put_ST_UNCHECKED(0,
4643 IRExpr_ITE(
4644 mk_x86g_calculate_condition(X86CondB),
4645 get_ST(r_src), get_ST(0)) );
4646 break;
4647
4648 case 0xC8 ... 0xCF: /* FCMOVE(Z) ST(i), ST(0) */
4649 r_src = (UInt)modrm - 0xC8;
4650 DIP("fcmovz %%st(%u), %%st(0)\n", r_src);
4651 put_ST_UNCHECKED(0,
4652 IRExpr_ITE(
4653 mk_x86g_calculate_condition(X86CondZ),
4654 get_ST(r_src), get_ST(0)) );
4655 break;
4656
4657 case 0xD0 ... 0xD7: /* FCMOVBE ST(i), ST(0) */
4658 r_src = (UInt)modrm - 0xD0;
4659 DIP("fcmovbe %%st(%u), %%st(0)\n", r_src);
4660 put_ST_UNCHECKED(0,
4661 IRExpr_ITE(
4662 mk_x86g_calculate_condition(X86CondBE),
4663 get_ST(r_src), get_ST(0)) );
4664 break;
4665
4666 case 0xD8 ... 0xDF: /* FCMOVU ST(i), ST(0) */
4667 r_src = (UInt)modrm - 0xD8;
4668 DIP("fcmovu %%st(%u), %%st(0)\n", r_src);
4669 put_ST_UNCHECKED(0,
4670 IRExpr_ITE(
4671 mk_x86g_calculate_condition(X86CondP),
4672 get_ST(r_src), get_ST(0)) );
4673 break;
4674
4675 case 0xE9: /* FUCOMPP %st(0),%st(1) */
4676 DIP("fucompp %%st(0),%%st(1)\n");
4677 /* This forces C1 to zero, which isn't right. */
4678 put_C3210(
4679 binop( Iop_And32,
4680 binop(Iop_Shl32,
4681 binop(Iop_CmpF64, get_ST(0), get_ST(1)),
4682 mkU8(8)),
4683 mkU32(0x4500)
4684 ));
4685 fp_pop();
4686 fp_pop();
4687 break;
4688
4689 default:
4690 goto decode_fail;
4691 }
4692
4693 }
4694 }
4695
4696 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDB opcodes +-+-+-+-+-+-+-+ */
4697 else
4698 if (first_opcode == 0xDB) {
4699 if (modrm < 0xC0) {
4700
4701 /* bits 5,4,3 are an opcode extension, and the modRM also
4702 specifies an address. */
4703 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
4704 delta += len;
4705
4706 switch (gregOfRM(modrm)) {
4707
4708 case 0: /* FILD m32int */
4709 DIP("fildl %s\n", dis_buf);
4710 fp_push();
4711 put_ST(0, unop(Iop_I32StoF64,
4712 loadLE(Ity_I32, mkexpr(addr))));
4713 break;
4714
4715 case 1: /* FISTTPL m32 (SSE3) */
4716 DIP("fisttpl %s\n", dis_buf);
4717 storeLE( mkexpr(addr),
4718 binop(Iop_F64toI32S, mkU32(Irrm_ZERO), get_ST(0)) );
4719 fp_pop();
4720 break;
4721
4722 case 2: /* FIST m32 */
4723 DIP("fistl %s\n", dis_buf);
4724 storeLE( mkexpr(addr),
4725 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) );
4726 break;
4727
4728 case 3: /* FISTP m32 */
4729 DIP("fistpl %s\n", dis_buf);
4730 storeLE( mkexpr(addr),
4731 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) );
4732 fp_pop();
4733 break;
4734
4735 case 5: { /* FLD extended-real */
4736 /* Uses dirty helper:
4737 ULong x86g_loadF80le ( UInt )
4738 addr holds the address. First, do a dirty call to
4739 get hold of the data. */
4740 IRTemp val = newTemp(Ity_I64);
4741 IRExpr** args = mkIRExprVec_1 ( mkexpr(addr) );
4742
4743 IRDirty* d = unsafeIRDirty_1_N (
4744 val,
4745 0/*regparms*/,
4746 "x86g_dirtyhelper_loadF80le",
4747 &x86g_dirtyhelper_loadF80le,
4748 args
4749 );
4750 /* declare that we're reading memory */
4751 d->mFx = Ifx_Read;
4752 d->mAddr = mkexpr(addr);
4753 d->mSize = 10;
4754
4755 /* execute the dirty call, dumping the result in val. */
4756 stmt( IRStmt_Dirty(d) );
4757 fp_push();
4758 put_ST(0, unop(Iop_ReinterpI64asF64, mkexpr(val)));
4759
4760 DIP("fldt %s\n", dis_buf);
4761 break;
4762 }
4763
4764 case 7: { /* FSTP extended-real */
4765 /* Uses dirty helper: void x86g_storeF80le ( UInt, ULong ) */
4766 IRExpr** args
4767 = mkIRExprVec_2( mkexpr(addr),
4768 unop(Iop_ReinterpF64asI64, get_ST(0)) );
4769
4770 IRDirty* d = unsafeIRDirty_0_N (
4771 0/*regparms*/,
4772 "x86g_dirtyhelper_storeF80le",
4773 &x86g_dirtyhelper_storeF80le,
4774 args
4775 );
4776 /* declare we're writing memory */
4777 d->mFx = Ifx_Write;
4778 d->mAddr = mkexpr(addr);
4779 d->mSize = 10;
4780
4781 /* execute the dirty call. */
4782 stmt( IRStmt_Dirty(d) );
4783 fp_pop();
4784
4785 DIP("fstpt\n %s", dis_buf);
4786 break;
4787 }
4788
4789 default:
4790 vex_printf("unhandled opc_aux = 0x%2x\n", (UInt)gregOfRM(modrm));
4791 vex_printf("first_opcode == 0xDB\n");
4792 goto decode_fail;
4793 }
4794
4795 } else {
4796
4797 delta++;
4798 switch (modrm) {
4799
4800 case 0xC0 ... 0xC7: /* FCMOVNB ST(i), ST(0) */
4801 r_src = (UInt)modrm - 0xC0;
4802 DIP("fcmovnb %%st(%u), %%st(0)\n", r_src);
4803 put_ST_UNCHECKED(0,
4804 IRExpr_ITE(
4805 mk_x86g_calculate_condition(X86CondNB),
4806 get_ST(r_src), get_ST(0)) );
4807 break;
4808
4809 case 0xC8 ... 0xCF: /* FCMOVNE(NZ) ST(i), ST(0) */
4810 r_src = (UInt)modrm - 0xC8;
4811 DIP("fcmovnz %%st(%u), %%st(0)\n", r_src);
4812 put_ST_UNCHECKED(0,
4813 IRExpr_ITE(
4814 mk_x86g_calculate_condition(X86CondNZ),
4815 get_ST(r_src), get_ST(0)) );
4816 break;
4817
4818 case 0xD0 ... 0xD7: /* FCMOVNBE ST(i), ST(0) */
4819 r_src = (UInt)modrm - 0xD0;
4820 DIP("fcmovnbe %%st(%u), %%st(0)\n", r_src);
4821 put_ST_UNCHECKED(0,
4822 IRExpr_ITE(
4823 mk_x86g_calculate_condition(X86CondNBE),
4824 get_ST(r_src), get_ST(0)) );
4825 break;
4826
4827 case 0xD8 ... 0xDF: /* FCMOVNU ST(i), ST(0) */
4828 r_src = (UInt)modrm - 0xD8;
4829 DIP("fcmovnu %%st(%u), %%st(0)\n", r_src);
4830 put_ST_UNCHECKED(0,
4831 IRExpr_ITE(
4832 mk_x86g_calculate_condition(X86CondNP),
4833 get_ST(r_src), get_ST(0)) );
4834 break;
4835
4836 case 0xE2:
4837 DIP("fnclex\n");
4838 break;
4839
4840 case 0xE3: {
4841 /* Uses dirty helper:
4842 void x86g_do_FINIT ( VexGuestX86State* ) */
4843 IRDirty* d = unsafeIRDirty_0_N (
4844 0/*regparms*/,
4845 "x86g_dirtyhelper_FINIT",
4846 &x86g_dirtyhelper_FINIT,
4847 mkIRExprVec_1(IRExpr_GSPTR())
4848 );
4849
4850 /* declare we're writing guest state */
4851 d->nFxState = 5;
4852 vex_bzero(&d->fxState, sizeof(d->fxState));
4853
4854 d->fxState[0].fx = Ifx_Write;
4855 d->fxState[0].offset = OFFB_FTOP;
4856 d->fxState[0].size = sizeof(UInt);
4857
4858 d->fxState[1].fx = Ifx_Write;
4859 d->fxState[1].offset = OFFB_FPREGS;
4860 d->fxState[1].size = 8 * sizeof(ULong);
4861
4862 d->fxState[2].fx = Ifx_Write;
4863 d->fxState[2].offset = OFFB_FPTAGS;
4864 d->fxState[2].size = 8 * sizeof(UChar);
4865
4866 d->fxState[3].fx = Ifx_Write;
4867 d->fxState[3].offset = OFFB_FPROUND;
4868 d->fxState[3].size = sizeof(UInt);
4869
4870 d->fxState[4].fx = Ifx_Write;
4871 d->fxState[4].offset = OFFB_FC3210;
4872 d->fxState[4].size = sizeof(UInt);
4873
4874 stmt( IRStmt_Dirty(d) );
4875
4876 DIP("fninit\n");
4877 break;
4878 }
4879
4880 case 0xE8 ... 0xEF: /* FUCOMI %st(0),%st(?) */
4881 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xE8, False );
4882 break;
4883
4884 case 0xF0 ... 0xF7: /* FCOMI %st(0),%st(?) */
4885 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xF0, False );
4886 break;
4887
4888 default:
4889 goto decode_fail;
4890 }
4891 }
4892 }
4893
4894 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDC opcodes +-+-+-+-+-+-+-+ */
4895 else
4896 if (first_opcode == 0xDC) {
4897 if (modrm < 0xC0) {
4898
4899 /* bits 5,4,3 are an opcode extension, and the modRM also
4900 specifies an address. */
4901 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
4902 delta += len;
4903
4904 switch (gregOfRM(modrm)) {
4905
4906 case 0: /* FADD double-real */
4907 fp_do_op_mem_ST_0 ( addr, "add", dis_buf, Iop_AddF64, True );
4908 break;
4909
4910 case 1: /* FMUL double-real */
4911 fp_do_op_mem_ST_0 ( addr, "mul", dis_buf, Iop_MulF64, True );
4912 break;
4913
4914 case 2: /* FCOM double-real */
4915 DIP("fcoml %s\n", dis_buf);
4916 /* This forces C1 to zero, which isn't right. */
4917 put_C3210(
4918 binop( Iop_And32,
4919 binop(Iop_Shl32,
4920 binop(Iop_CmpF64,
4921 get_ST(0),
4922 loadLE(Ity_F64,mkexpr(addr))),
4923 mkU8(8)),
4924 mkU32(0x4500)
4925 ));
4926 break;
4927
4928 case 3: /* FCOMP double-real */
4929 DIP("fcompl %s\n", dis_buf);
4930 /* This forces C1 to zero, which isn't right. */
4931 put_C3210(
4932 binop( Iop_And32,
4933 binop(Iop_Shl32,
4934 binop(Iop_CmpF64,
4935 get_ST(0),
4936 loadLE(Ity_F64,mkexpr(addr))),
4937 mkU8(8)),
4938 mkU32(0x4500)
4939 ));
4940 fp_pop();
4941 break;
4942
4943 case 4: /* FSUB double-real */
4944 fp_do_op_mem_ST_0 ( addr, "sub", dis_buf, Iop_SubF64, True );
4945 break;
4946
4947 case 5: /* FSUBR double-real */
4948 fp_do_oprev_mem_ST_0 ( addr, "subr", dis_buf, Iop_SubF64, True );
4949 break;
4950
4951 case 6: /* FDIV double-real */
4952 fp_do_op_mem_ST_0 ( addr, "div", dis_buf, Iop_DivF64, True );
4953 break;
4954
4955 case 7: /* FDIVR double-real */
4956 fp_do_oprev_mem_ST_0 ( addr, "divr", dis_buf, Iop_DivF64, True );
4957 break;
4958
4959 default:
4960 vex_printf("unhandled opc_aux = 0x%2x\n", (UInt)gregOfRM(modrm));
4961 vex_printf("first_opcode == 0xDC\n");
4962 goto decode_fail;
4963 }
4964
4965 } else {
4966
4967 delta++;
4968 switch (modrm) {
4969
4970 case 0xC0 ... 0xC7: /* FADD %st(0),%st(?) */
4971 fp_do_op_ST_ST ( "add", Iop_AddF64, 0, modrm - 0xC0, False );
4972 break;
4973
4974 case 0xC8 ... 0xCF: /* FMUL %st(0),%st(?) */
4975 fp_do_op_ST_ST ( "mul", Iop_MulF64, 0, modrm - 0xC8, False );
4976 break;
4977
4978 case 0xE0 ... 0xE7: /* FSUBR %st(0),%st(?) */
4979 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, 0, modrm - 0xE0, False );
4980 break;
4981
4982 case 0xE8 ... 0xEF: /* FSUB %st(0),%st(?) */
4983 fp_do_op_ST_ST ( "sub", Iop_SubF64, 0, modrm - 0xE8, False );
4984 break;
4985
4986 case 0xF0 ... 0xF7: /* FDIVR %st(0),%st(?) */
4987 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, 0, modrm - 0xF0, False );
4988 break;
4989
4990 case 0xF8 ... 0xFF: /* FDIV %st(0),%st(?) */
4991 fp_do_op_ST_ST ( "div", Iop_DivF64, 0, modrm - 0xF8, False );
4992 break;
4993
4994 default:
4995 goto decode_fail;
4996 }
4997
4998 }
4999 }
5000
5001 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDD opcodes +-+-+-+-+-+-+-+ */
5002 else
5003 if (first_opcode == 0xDD) {
5004
5005 if (modrm < 0xC0) {
5006
5007 /* bits 5,4,3 are an opcode extension, and the modRM also
5008 specifies an address. */
5009 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
5010 delta += len;
5011
5012 switch (gregOfRM(modrm)) {
5013
5014 case 0: /* FLD double-real */
5015 DIP("fldl %s\n", dis_buf);
5016 fp_push();
5017 put_ST(0, loadLE(Ity_F64, mkexpr(addr)));
5018 break;
5019
5020 case 1: /* FISTTPQ m64 (SSE3) */
5021 DIP("fistppll %s\n", dis_buf);
5022 storeLE( mkexpr(addr),
5023 binop(Iop_F64toI64S, mkU32(Irrm_ZERO), get_ST(0)) );
5024 fp_pop();
5025 break;
5026
5027 case 2: /* FST double-real */
5028 DIP("fstl %s\n", dis_buf);
5029 storeLE(mkexpr(addr), get_ST(0));
5030 break;
5031
5032 case 3: /* FSTP double-real */
5033 DIP("fstpl %s\n", dis_buf);
5034 storeLE(mkexpr(addr), get_ST(0));
5035 fp_pop();
5036 break;
5037
5038 case 4: { /* FRSTOR m108 */
5039 /* Uses dirty helper:
5040 VexEmNote x86g_do_FRSTOR ( VexGuestX86State*, Addr32 ) */
5041 IRTemp ew = newTemp(Ity_I32);
5042 IRDirty* d = unsafeIRDirty_0_N (
5043 0/*regparms*/,
5044 "x86g_dirtyhelper_FRSTOR",
5045 &x86g_dirtyhelper_FRSTOR,
5046 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr) )
5047 );
5048 d->tmp = ew;
5049 /* declare we're reading memory */
5050 d->mFx = Ifx_Read;
5051 d->mAddr = mkexpr(addr);
5052 d->mSize = 108;
5053
5054 /* declare we're writing guest state */
5055 d->nFxState = 5;
5056 vex_bzero(&d->fxState, sizeof(d->fxState));
5057
5058 d->fxState[0].fx = Ifx_Write;
5059 d->fxState[0].offset = OFFB_FTOP;
5060 d->fxState[0].size = sizeof(UInt);
5061
5062 d->fxState[1].fx = Ifx_Write;
5063 d->fxState[1].offset = OFFB_FPREGS;
5064 d->fxState[1].size = 8 * sizeof(ULong);
5065
5066 d->fxState[2].fx = Ifx_Write;
5067 d->fxState[2].offset = OFFB_FPTAGS;
5068 d->fxState[2].size = 8 * sizeof(UChar);
5069
5070 d->fxState[3].fx = Ifx_Write;
5071 d->fxState[3].offset = OFFB_FPROUND;
5072 d->fxState[3].size = sizeof(UInt);
5073
5074 d->fxState[4].fx = Ifx_Write;
5075 d->fxState[4].offset = OFFB_FC3210;
5076 d->fxState[4].size = sizeof(UInt);
5077
5078 stmt( IRStmt_Dirty(d) );
5079
5080 /* ew contains any emulation warning we may need to
5081 issue. If needed, side-exit to the next insn,
5082 reporting the warning, so that Valgrind's dispatcher
5083 sees the warning. */
5084 put_emwarn( mkexpr(ew) );
5085 stmt(
5086 IRStmt_Exit(
5087 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)),
5088 Ijk_EmWarn,
5089 IRConst_U32( ((Addr32)guest_EIP_bbstart)+delta),
5090 OFFB_EIP
5091 )
5092 );
5093
5094 DIP("frstor %s\n", dis_buf);
5095 break;
5096 }
5097
5098 case 6: { /* FNSAVE m108 */
5099 /* Uses dirty helper:
5100 void x86g_do_FSAVE ( VexGuestX86State*, UInt ) */
5101 IRDirty* d = unsafeIRDirty_0_N (
5102 0/*regparms*/,
5103 "x86g_dirtyhelper_FSAVE",
5104 &x86g_dirtyhelper_FSAVE,
5105 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr) )
5106 );
5107 /* declare we're writing memory */
5108 d->mFx = Ifx_Write;
5109 d->mAddr = mkexpr(addr);
5110 d->mSize = 108;
5111
5112 /* declare we're reading guest state */
5113 d->nFxState = 5;
5114 vex_bzero(&d->fxState, sizeof(d->fxState));
5115
5116 d->fxState[0].fx = Ifx_Read;
5117 d->fxState[0].offset = OFFB_FTOP;
5118 d->fxState[0].size = sizeof(UInt);
5119
5120 d->fxState[1].fx = Ifx_Read;
5121 d->fxState[1].offset = OFFB_FPREGS;
5122 d->fxState[1].size = 8 * sizeof(ULong);
5123
5124 d->fxState[2].fx = Ifx_Read;
5125 d->fxState[2].offset = OFFB_FPTAGS;
5126 d->fxState[2].size = 8 * sizeof(UChar);
5127
5128 d->fxState[3].fx = Ifx_Read;
5129 d->fxState[3].offset = OFFB_FPROUND;
5130 d->fxState[3].size = sizeof(UInt);
5131
5132 d->fxState[4].fx = Ifx_Read;
5133 d->fxState[4].offset = OFFB_FC3210;
5134 d->fxState[4].size = sizeof(UInt);
5135
5136 stmt( IRStmt_Dirty(d) );
5137
5138 DIP("fnsave %s\n", dis_buf);
5139 break;
5140 }
5141
5142 case 7: { /* FNSTSW m16 */
5143 IRExpr* sw = get_FPU_sw();
5144 vassert(typeOfIRExpr(irsb->tyenv, sw) == Ity_I16);
5145 storeLE( mkexpr(addr), sw );
5146 DIP("fnstsw %s\n", dis_buf);
5147 break;
5148 }
5149
5150 default:
5151 vex_printf("unhandled opc_aux = 0x%2x\n", (UInt)gregOfRM(modrm));
5152 vex_printf("first_opcode == 0xDD\n");
5153 goto decode_fail;
5154 }
5155 } else {
5156 delta++;
5157 switch (modrm) {
5158
5159 case 0xC0 ... 0xC7: /* FFREE %st(?) */
5160 r_dst = (UInt)modrm - 0xC0;
5161 DIP("ffree %%st(%u)\n", r_dst);
5162 put_ST_TAG ( r_dst, mkU8(0) );
5163 break;
5164
5165 case 0xD0 ... 0xD7: /* FST %st(0),%st(?) */
5166 r_dst = (UInt)modrm - 0xD0;
5167 DIP("fst %%st(0),%%st(%u)\n", r_dst);
5168 /* P4 manual says: "If the destination operand is a
5169 non-empty register, the invalid-operation exception
5170 is not generated. Hence put_ST_UNCHECKED. */
5171 put_ST_UNCHECKED(r_dst, get_ST(0));
5172 break;
5173
5174 case 0xD8 ... 0xDF: /* FSTP %st(0),%st(?) */
5175 r_dst = (UInt)modrm - 0xD8;
5176 DIP("fstp %%st(0),%%st(%u)\n", r_dst);
5177 /* P4 manual says: "If the destination operand is a
5178 non-empty register, the invalid-operation exception
5179 is not generated. Hence put_ST_UNCHECKED. */
5180 put_ST_UNCHECKED(r_dst, get_ST(0));
5181 fp_pop();
5182 break;
5183
5184 case 0xE0 ... 0xE7: /* FUCOM %st(0),%st(?) */
5185 r_dst = (UInt)modrm - 0xE0;
5186 DIP("fucom %%st(0),%%st(%u)\n", r_dst);
5187 /* This forces C1 to zero, which isn't right. */
5188 put_C3210(
5189 binop( Iop_And32,
5190 binop(Iop_Shl32,
5191 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)),
5192 mkU8(8)),
5193 mkU32(0x4500)
5194 ));
5195 break;
5196
5197 case 0xE8 ... 0xEF: /* FUCOMP %st(0),%st(?) */
5198 r_dst = (UInt)modrm - 0xE8;
5199 DIP("fucomp %%st(0),%%st(%u)\n", r_dst);
5200 /* This forces C1 to zero, which isn't right. */
5201 put_C3210(
5202 binop( Iop_And32,
5203 binop(Iop_Shl32,
5204 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)),
5205 mkU8(8)),
5206 mkU32(0x4500)
5207 ));
5208 fp_pop();
5209 break;
5210
5211 default:
5212 goto decode_fail;
5213 }
5214 }
5215 }
5216
5217 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDE opcodes +-+-+-+-+-+-+-+ */
5218 else
5219 if (first_opcode == 0xDE) {
5220
5221 if (modrm < 0xC0) {
5222
5223 /* bits 5,4,3 are an opcode extension, and the modRM also
5224 specifies an address. */
5225 IROp fop;
5226 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
5227 delta += len;
5228
5229 switch (gregOfRM(modrm)) {
5230
5231 case 0: /* FIADD m16int */ /* ST(0) += m16int */
5232 DIP("fiaddw %s\n", dis_buf);
5233 fop = Iop_AddF64;
5234 goto do_fop_m16;
5235
5236 case 1: /* FIMUL m16int */ /* ST(0) *= m16int */
5237 DIP("fimulw %s\n", dis_buf);
5238 fop = Iop_MulF64;
5239 goto do_fop_m16;
5240
5241 case 2: /* FICOM m16int */
5242 DIP("ficomw %s\n", dis_buf);
5243 /* This forces C1 to zero, which isn't right. */
5244 put_C3210(
5245 binop( Iop_And32,
5246 binop(Iop_Shl32,
5247 binop(Iop_CmpF64,
5248 get_ST(0),
5249 unop(Iop_I32StoF64,
5250 unop(Iop_16Sto32,
5251 loadLE(Ity_I16,mkexpr(addr))))),
5252 mkU8(8)),
5253 mkU32(0x4500)
5254 ));
5255 break;
5256
5257 case 3: /* FICOMP m16int */
5258 DIP("ficompw %s\n", dis_buf);
5259 /* This forces C1 to zero, which isn't right. */
5260 put_C3210(
5261 binop( Iop_And32,
5262 binop(Iop_Shl32,
5263 binop(Iop_CmpF64,
5264 get_ST(0),
5265 unop(Iop_I32StoF64,
5266 unop(Iop_16Sto32,
5267 loadLE(Ity_I16,mkexpr(addr))))),
5268 mkU8(8)),
5269 mkU32(0x4500)
5270 ));
5271 fp_pop();
5272 break;
5273
5274 case 4: /* FISUB m16int */ /* ST(0) -= m16int */
5275 DIP("fisubw %s\n", dis_buf);
5276 fop = Iop_SubF64;
5277 goto do_fop_m16;
5278
5279 case 5: /* FISUBR m16int */ /* ST(0) = m16int - ST(0) */
5280 DIP("fisubrw %s\n", dis_buf);
5281 fop = Iop_SubF64;
5282 goto do_foprev_m16;
5283
5284 case 6: /* FIDIV m16int */ /* ST(0) /= m16int */
5285 DIP("fisubw %s\n", dis_buf);
5286 fop = Iop_DivF64;
5287 goto do_fop_m16;
5288
5289 case 7: /* FIDIVR m16int */ /* ST(0) = m16int / ST(0) */
5290 DIP("fidivrw %s\n", dis_buf);
5291 fop = Iop_DivF64;
5292 goto do_foprev_m16;
5293
5294 do_fop_m16:
5295 put_ST_UNCHECKED(0,
5296 triop(fop,
5297 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5298 get_ST(0),
5299 unop(Iop_I32StoF64,
5300 unop(Iop_16Sto32,
5301 loadLE(Ity_I16, mkexpr(addr))))));
5302 break;
5303
5304 do_foprev_m16:
5305 put_ST_UNCHECKED(0,
5306 triop(fop,
5307 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5308 unop(Iop_I32StoF64,
5309 unop(Iop_16Sto32,
5310 loadLE(Ity_I16, mkexpr(addr)))),
5311 get_ST(0)));
5312 break;
5313
5314 default:
5315 vex_printf("unhandled opc_aux = 0x%2x\n", (UInt)gregOfRM(modrm));
5316 vex_printf("first_opcode == 0xDE\n");
5317 goto decode_fail;
5318 }
5319
5320 } else {
5321
5322 delta++;
5323 switch (modrm) {
5324
5325 case 0xC0 ... 0xC7: /* FADDP %st(0),%st(?) */
5326 fp_do_op_ST_ST ( "add", Iop_AddF64, 0, modrm - 0xC0, True );
5327 break;
5328
5329 case 0xC8 ... 0xCF: /* FMULP %st(0),%st(?) */
5330 fp_do_op_ST_ST ( "mul", Iop_MulF64, 0, modrm - 0xC8, True );
5331 break;
5332
5333 case 0xD9: /* FCOMPP %st(0),%st(1) */
5334 DIP("fuompp %%st(0),%%st(1)\n");
5335 /* This forces C1 to zero, which isn't right. */
5336 put_C3210(
5337 binop( Iop_And32,
5338 binop(Iop_Shl32,
5339 binop(Iop_CmpF64, get_ST(0), get_ST(1)),
5340 mkU8(8)),
5341 mkU32(0x4500)
5342 ));
5343 fp_pop();
5344 fp_pop();
5345 break;
5346
5347 case 0xE0 ... 0xE7: /* FSUBRP %st(0),%st(?) */
5348 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, 0, modrm - 0xE0, True );
5349 break;
5350
5351 case 0xE8 ... 0xEF: /* FSUBP %st(0),%st(?) */
5352 fp_do_op_ST_ST ( "sub", Iop_SubF64, 0, modrm - 0xE8, True );
5353 break;
5354
5355 case 0xF0 ... 0xF7: /* FDIVRP %st(0),%st(?) */
5356 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, 0, modrm - 0xF0, True );
5357 break;
5358
5359 case 0xF8 ... 0xFF: /* FDIVP %st(0),%st(?) */
5360 fp_do_op_ST_ST ( "div", Iop_DivF64, 0, modrm - 0xF8, True );
5361 break;
5362
5363 default:
5364 goto decode_fail;
5365 }
5366
5367 }
5368 }
5369
5370 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDF opcodes +-+-+-+-+-+-+-+ */
5371 else
5372 if (first_opcode == 0xDF) {
5373
5374 if (modrm < 0xC0) {
5375
5376 /* bits 5,4,3 are an opcode extension, and the modRM also
5377 specifies an address. */
5378 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
5379 delta += len;
5380
5381 switch (gregOfRM(modrm)) {
5382
5383 case 0: /* FILD m16int */
5384 DIP("fildw %s\n", dis_buf);
5385 fp_push();
5386 put_ST(0, unop(Iop_I32StoF64,
5387 unop(Iop_16Sto32,
5388 loadLE(Ity_I16, mkexpr(addr)))));
5389 break;
5390
5391 case 1: /* FISTTPS m16 (SSE3) */
5392 DIP("fisttps %s\n", dis_buf);
5393 storeLE( mkexpr(addr),
5394 binop(Iop_F64toI16S, mkU32(Irrm_ZERO), get_ST(0)) );
5395 fp_pop();
5396 break;
5397
5398 case 2: /* FIST m16 */
5399 DIP("fistp %s\n", dis_buf);
5400 storeLE( mkexpr(addr),
5401 binop(Iop_F64toI16S, get_roundingmode(), get_ST(0)) );
5402 break;
5403
5404 case 3: /* FISTP m16 */
5405 DIP("fistps %s\n", dis_buf);
5406 storeLE( mkexpr(addr),
5407 binop(Iop_F64toI16S, get_roundingmode(), get_ST(0)) );
5408 fp_pop();
5409 break;
5410
5411 case 5: /* FILD m64 */
5412 DIP("fildll %s\n", dis_buf);
5413 fp_push();
5414 put_ST(0, binop(Iop_I64StoF64,
5415 get_roundingmode(),
5416 loadLE(Ity_I64, mkexpr(addr))));
5417 break;
5418
5419 case 7: /* FISTP m64 */
5420 DIP("fistpll %s\n", dis_buf);
5421 storeLE( mkexpr(addr),
5422 binop(Iop_F64toI64S, get_roundingmode(), get_ST(0)) );
5423 fp_pop();
5424 break;
5425
5426 default:
5427 vex_printf("unhandled opc_aux = 0x%2x\n", (UInt)gregOfRM(modrm));
5428 vex_printf("first_opcode == 0xDF\n");
5429 goto decode_fail;
5430 }
5431
5432 } else {
5433
5434 delta++;
5435 switch (modrm) {
5436
5437 case 0xC0: /* FFREEP %st(0) */
5438 DIP("ffreep %%st(%d)\n", 0);
5439 put_ST_TAG ( 0, mkU8(0) );
5440 fp_pop();
5441 break;
5442
5443 case 0xE0: /* FNSTSW %ax */
5444 DIP("fnstsw %%ax\n");
5445 /* Get the FPU status word value and dump it in %AX. */
5446 if (0) {
5447 /* The obvious thing to do is simply dump the 16-bit
5448 status word value in %AX. However, due to a
5449 limitation in Memcheck's origin tracking
5450 machinery, this causes Memcheck not to track the
5451 origin of any undefinedness into %AH (only into
5452 %AL/%AX/%EAX), which means origins are lost in
5453 the sequence "fnstsw %ax; test $M,%ah; jcond .." */
5454 putIReg(2, R_EAX, get_FPU_sw());
5455 } else {
5456 /* So a somewhat lame kludge is to make it very
5457 clear to Memcheck that the value is written to
5458 both %AH and %AL. This generates marginally
5459 worse code, but I don't think it matters much. */
5460 IRTemp t16 = newTemp(Ity_I16);
5461 assign(t16, get_FPU_sw());
5462 putIReg( 1, R_AL, unop(Iop_16to8, mkexpr(t16)) );
5463 putIReg( 1, R_AH, unop(Iop_16HIto8, mkexpr(t16)) );
5464 }
5465 break;
5466
5467 case 0xE8 ... 0xEF: /* FUCOMIP %st(0),%st(?) */
5468 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xE8, True );
5469 break;
5470
5471 case 0xF0 ... 0xF7: /* FCOMIP %st(0),%st(?) */
5472 /* not really right since COMIP != UCOMIP */
5473 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xF0, True );
5474 break;
5475
5476 default:
5477 goto decode_fail;
5478 }
5479 }
5480
5481 }
5482
5483 else
5484 vpanic("dis_FPU(x86): invalid primary opcode");
5485
5486 *decode_ok = True;
5487 return delta;
5488
5489 decode_fail:
5490 *decode_ok = False;
5491 return delta;
5492 }
5493
5494
5495 /*------------------------------------------------------------*/
5496 /*--- ---*/
5497 /*--- MMX INSTRUCTIONS ---*/
5498 /*--- ---*/
5499 /*------------------------------------------------------------*/
5500
5501 /* Effect of MMX insns on x87 FPU state (table 11-2 of
5502 IA32 arch manual, volume 3):
5503
5504 Read from, or write to MMX register (viz, any insn except EMMS):
5505 * All tags set to Valid (non-empty) -- FPTAGS[i] := nonzero
5506 * FP stack pointer set to zero
5507
5508 EMMS:
5509 * All tags set to Invalid (empty) -- FPTAGS[i] := zero
5510 * FP stack pointer set to zero
5511 */
5512
do_MMX_preamble(void)5513 static void do_MMX_preamble ( void )
5514 {
5515 Int i;
5516 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 );
5517 IRExpr* zero = mkU32(0);
5518 IRExpr* tag1 = mkU8(1);
5519 put_ftop(zero);
5520 for (i = 0; i < 8; i++)
5521 stmt( IRStmt_PutI( mkIRPutI(descr, zero, i, tag1) ) );
5522 }
5523
do_EMMS_preamble(void)5524 static void do_EMMS_preamble ( void )
5525 {
5526 Int i;
5527 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 );
5528 IRExpr* zero = mkU32(0);
5529 IRExpr* tag0 = mkU8(0);
5530 put_ftop(zero);
5531 for (i = 0; i < 8; i++)
5532 stmt( IRStmt_PutI( mkIRPutI(descr, zero, i, tag0) ) );
5533 }
5534
5535
getMMXReg(UInt archreg)5536 static IRExpr* getMMXReg ( UInt archreg )
5537 {
5538 vassert(archreg < 8);
5539 return IRExpr_Get( OFFB_FPREGS + 8 * archreg, Ity_I64 );
5540 }
5541
5542
putMMXReg(UInt archreg,IRExpr * e)5543 static void putMMXReg ( UInt archreg, IRExpr* e )
5544 {
5545 vassert(archreg < 8);
5546 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64);
5547 stmt( IRStmt_Put( OFFB_FPREGS + 8 * archreg, e ) );
5548 }
5549
5550
5551 /* Helper for non-shift MMX insns. Note this is incomplete in the
5552 sense that it does not first call do_MMX_preamble() -- that is the
5553 responsibility of its caller. */
5554
5555 static
dis_MMXop_regmem_to_reg(UChar sorb,Int delta,UChar opc,const HChar * name,Bool show_granularity)5556 UInt dis_MMXop_regmem_to_reg ( UChar sorb,
5557 Int delta,
5558 UChar opc,
5559 const HChar* name,
5560 Bool show_granularity )
5561 {
5562 HChar dis_buf[50];
5563 UChar modrm = getIByte(delta);
5564 Bool isReg = epartIsReg(modrm);
5565 IRExpr* argL = NULL;
5566 IRExpr* argR = NULL;
5567 IRExpr* argG = NULL;
5568 IRExpr* argE = NULL;
5569 IRTemp res = newTemp(Ity_I64);
5570
5571 Bool invG = False;
5572 IROp op = Iop_INVALID;
5573 void* hAddr = NULL;
5574 Bool eLeft = False;
5575 const HChar* hName = NULL;
5576
5577 # define XXX(_name) do { hAddr = &_name; hName = #_name; } while (0)
5578
5579 switch (opc) {
5580 /* Original MMX ones */
5581 case 0xFC: op = Iop_Add8x8; break;
5582 case 0xFD: op = Iop_Add16x4; break;
5583 case 0xFE: op = Iop_Add32x2; break;
5584
5585 case 0xEC: op = Iop_QAdd8Sx8; break;
5586 case 0xED: op = Iop_QAdd16Sx4; break;
5587
5588 case 0xDC: op = Iop_QAdd8Ux8; break;
5589 case 0xDD: op = Iop_QAdd16Ux4; break;
5590
5591 case 0xF8: op = Iop_Sub8x8; break;
5592 case 0xF9: op = Iop_Sub16x4; break;
5593 case 0xFA: op = Iop_Sub32x2; break;
5594
5595 case 0xE8: op = Iop_QSub8Sx8; break;
5596 case 0xE9: op = Iop_QSub16Sx4; break;
5597
5598 case 0xD8: op = Iop_QSub8Ux8; break;
5599 case 0xD9: op = Iop_QSub16Ux4; break;
5600
5601 case 0xE5: op = Iop_MulHi16Sx4; break;
5602 case 0xD5: op = Iop_Mul16x4; break;
5603 case 0xF5: XXX(x86g_calculate_mmx_pmaddwd); break;
5604
5605 case 0x74: op = Iop_CmpEQ8x8; break;
5606 case 0x75: op = Iop_CmpEQ16x4; break;
5607 case 0x76: op = Iop_CmpEQ32x2; break;
5608
5609 case 0x64: op = Iop_CmpGT8Sx8; break;
5610 case 0x65: op = Iop_CmpGT16Sx4; break;
5611 case 0x66: op = Iop_CmpGT32Sx2; break;
5612
5613 case 0x6B: op = Iop_QNarrowBin32Sto16Sx4; eLeft = True; break;
5614 case 0x63: op = Iop_QNarrowBin16Sto8Sx8; eLeft = True; break;
5615 case 0x67: op = Iop_QNarrowBin16Sto8Ux8; eLeft = True; break;
5616
5617 case 0x68: op = Iop_InterleaveHI8x8; eLeft = True; break;
5618 case 0x69: op = Iop_InterleaveHI16x4; eLeft = True; break;
5619 case 0x6A: op = Iop_InterleaveHI32x2; eLeft = True; break;
5620
5621 case 0x60: op = Iop_InterleaveLO8x8; eLeft = True; break;
5622 case 0x61: op = Iop_InterleaveLO16x4; eLeft = True; break;
5623 case 0x62: op = Iop_InterleaveLO32x2; eLeft = True; break;
5624
5625 case 0xDB: op = Iop_And64; break;
5626 case 0xDF: op = Iop_And64; invG = True; break;
5627 case 0xEB: op = Iop_Or64; break;
5628 case 0xEF: /* Possibly do better here if argL and argR are the
5629 same reg */
5630 op = Iop_Xor64; break;
5631
5632 /* Introduced in SSE1 */
5633 case 0xE0: op = Iop_Avg8Ux8; break;
5634 case 0xE3: op = Iop_Avg16Ux4; break;
5635 case 0xEE: op = Iop_Max16Sx4; break;
5636 case 0xDE: op = Iop_Max8Ux8; break;
5637 case 0xEA: op = Iop_Min16Sx4; break;
5638 case 0xDA: op = Iop_Min8Ux8; break;
5639 case 0xE4: op = Iop_MulHi16Ux4; break;
5640 case 0xF6: XXX(x86g_calculate_mmx_psadbw); break;
5641
5642 /* Introduced in SSE2 */
5643 case 0xD4: op = Iop_Add64; break;
5644 case 0xFB: op = Iop_Sub64; break;
5645
5646 default:
5647 vex_printf("\n0x%x\n", opc);
5648 vpanic("dis_MMXop_regmem_to_reg");
5649 }
5650
5651 # undef XXX
5652
5653 argG = getMMXReg(gregOfRM(modrm));
5654 if (invG)
5655 argG = unop(Iop_Not64, argG);
5656
5657 if (isReg) {
5658 delta++;
5659 argE = getMMXReg(eregOfRM(modrm));
5660 } else {
5661 Int len;
5662 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
5663 delta += len;
5664 argE = loadLE(Ity_I64, mkexpr(addr));
5665 }
5666
5667 if (eLeft) {
5668 argL = argE;
5669 argR = argG;
5670 } else {
5671 argL = argG;
5672 argR = argE;
5673 }
5674
5675 if (op != Iop_INVALID) {
5676 vassert(hName == NULL);
5677 vassert(hAddr == NULL);
5678 assign(res, binop(op, argL, argR));
5679 } else {
5680 vassert(hName != NULL);
5681 vassert(hAddr != NULL);
5682 assign( res,
5683 mkIRExprCCall(
5684 Ity_I64,
5685 0/*regparms*/, hName, hAddr,
5686 mkIRExprVec_2( argL, argR )
5687 )
5688 );
5689 }
5690
5691 putMMXReg( gregOfRM(modrm), mkexpr(res) );
5692
5693 DIP("%s%s %s, %s\n",
5694 name, show_granularity ? nameMMXGran(opc & 3) : "",
5695 ( isReg ? nameMMXReg(eregOfRM(modrm)) : dis_buf ),
5696 nameMMXReg(gregOfRM(modrm)) );
5697
5698 return delta;
5699 }
5700
5701
5702 /* Vector by scalar shift of G by the amount specified at the bottom
5703 of E. This is a straight copy of dis_SSE_shiftG_byE. */
5704
dis_MMX_shiftG_byE(UChar sorb,Int delta,const HChar * opname,IROp op)5705 static UInt dis_MMX_shiftG_byE ( UChar sorb, Int delta,
5706 const HChar* opname, IROp op )
5707 {
5708 HChar dis_buf[50];
5709 Int alen, size;
5710 IRTemp addr;
5711 Bool shl, shr, sar;
5712 UChar rm = getIByte(delta);
5713 IRTemp g0 = newTemp(Ity_I64);
5714 IRTemp g1 = newTemp(Ity_I64);
5715 IRTemp amt = newTemp(Ity_I32);
5716 IRTemp amt8 = newTemp(Ity_I8);
5717
5718 if (epartIsReg(rm)) {
5719 assign( amt, unop(Iop_64to32, getMMXReg(eregOfRM(rm))) );
5720 DIP("%s %s,%s\n", opname,
5721 nameMMXReg(eregOfRM(rm)),
5722 nameMMXReg(gregOfRM(rm)) );
5723 delta++;
5724 } else {
5725 addr = disAMode ( &alen, sorb, delta, dis_buf );
5726 assign( amt, loadLE(Ity_I32, mkexpr(addr)) );
5727 DIP("%s %s,%s\n", opname,
5728 dis_buf,
5729 nameMMXReg(gregOfRM(rm)) );
5730 delta += alen;
5731 }
5732 assign( g0, getMMXReg(gregOfRM(rm)) );
5733 assign( amt8, unop(Iop_32to8, mkexpr(amt)) );
5734
5735 shl = shr = sar = False;
5736 size = 0;
5737 switch (op) {
5738 case Iop_ShlN16x4: shl = True; size = 32; break;
5739 case Iop_ShlN32x2: shl = True; size = 32; break;
5740 case Iop_Shl64: shl = True; size = 64; break;
5741 case Iop_ShrN16x4: shr = True; size = 16; break;
5742 case Iop_ShrN32x2: shr = True; size = 32; break;
5743 case Iop_Shr64: shr = True; size = 64; break;
5744 case Iop_SarN16x4: sar = True; size = 16; break;
5745 case Iop_SarN32x2: sar = True; size = 32; break;
5746 default: vassert(0);
5747 }
5748
5749 if (shl || shr) {
5750 assign(
5751 g1,
5752 IRExpr_ITE(
5753 binop(Iop_CmpLT32U,mkexpr(amt),mkU32(size)),
5754 binop(op, mkexpr(g0), mkexpr(amt8)),
5755 mkU64(0)
5756 )
5757 );
5758 } else
5759 if (sar) {
5760 assign(
5761 g1,
5762 IRExpr_ITE(
5763 binop(Iop_CmpLT32U,mkexpr(amt),mkU32(size)),
5764 binop(op, mkexpr(g0), mkexpr(amt8)),
5765 binop(op, mkexpr(g0), mkU8(size-1))
5766 )
5767 );
5768 } else {
5769 /*NOTREACHED*/
5770 vassert(0);
5771 }
5772
5773 putMMXReg( gregOfRM(rm), mkexpr(g1) );
5774 return delta;
5775 }
5776
5777
5778 /* Vector by scalar shift of E by an immediate byte. This is a
5779 straight copy of dis_SSE_shiftE_imm. */
5780
5781 static
dis_MMX_shiftE_imm(Int delta,const HChar * opname,IROp op)5782 UInt dis_MMX_shiftE_imm ( Int delta, const HChar* opname, IROp op )
5783 {
5784 Bool shl, shr, sar;
5785 UChar rm = getIByte(delta);
5786 IRTemp e0 = newTemp(Ity_I64);
5787 IRTemp e1 = newTemp(Ity_I64);
5788 UChar amt, size;
5789 vassert(epartIsReg(rm));
5790 vassert(gregOfRM(rm) == 2
5791 || gregOfRM(rm) == 4 || gregOfRM(rm) == 6);
5792 amt = getIByte(delta+1);
5793 delta += 2;
5794 DIP("%s $%d,%s\n", opname,
5795 (Int)amt,
5796 nameMMXReg(eregOfRM(rm)) );
5797
5798 assign( e0, getMMXReg(eregOfRM(rm)) );
5799
5800 shl = shr = sar = False;
5801 size = 0;
5802 switch (op) {
5803 case Iop_ShlN16x4: shl = True; size = 16; break;
5804 case Iop_ShlN32x2: shl = True; size = 32; break;
5805 case Iop_Shl64: shl = True; size = 64; break;
5806 case Iop_SarN16x4: sar = True; size = 16; break;
5807 case Iop_SarN32x2: sar = True; size = 32; break;
5808 case Iop_ShrN16x4: shr = True; size = 16; break;
5809 case Iop_ShrN32x2: shr = True; size = 32; break;
5810 case Iop_Shr64: shr = True; size = 64; break;
5811 default: vassert(0);
5812 }
5813
5814 if (shl || shr) {
5815 assign( e1, amt >= size
5816 ? mkU64(0)
5817 : binop(op, mkexpr(e0), mkU8(amt))
5818 );
5819 } else
5820 if (sar) {
5821 assign( e1, amt >= size
5822 ? binop(op, mkexpr(e0), mkU8(size-1))
5823 : binop(op, mkexpr(e0), mkU8(amt))
5824 );
5825 } else {
5826 /*NOTREACHED*/
5827 vassert(0);
5828 }
5829
5830 putMMXReg( eregOfRM(rm), mkexpr(e1) );
5831 return delta;
5832 }
5833
5834
5835 /* Completely handle all MMX instructions except emms. */
5836
5837 static
dis_MMX(Bool * decode_ok,UChar sorb,Int sz,Int delta)5838 UInt dis_MMX ( Bool* decode_ok, UChar sorb, Int sz, Int delta )
5839 {
5840 Int len;
5841 UChar modrm;
5842 HChar dis_buf[50];
5843 UChar opc = getIByte(delta);
5844 delta++;
5845
5846 /* dis_MMX handles all insns except emms. */
5847 do_MMX_preamble();
5848
5849 switch (opc) {
5850
5851 case 0x6E:
5852 /* MOVD (src)ireg-or-mem (E), (dst)mmxreg (G)*/
5853 if (sz != 4)
5854 goto mmx_decode_failure;
5855 modrm = getIByte(delta);
5856 if (epartIsReg(modrm)) {
5857 delta++;
5858 putMMXReg(
5859 gregOfRM(modrm),
5860 binop( Iop_32HLto64,
5861 mkU32(0),
5862 getIReg(4, eregOfRM(modrm)) ) );
5863 DIP("movd %s, %s\n",
5864 nameIReg(4,eregOfRM(modrm)), nameMMXReg(gregOfRM(modrm)));
5865 } else {
5866 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
5867 delta += len;
5868 putMMXReg(
5869 gregOfRM(modrm),
5870 binop( Iop_32HLto64,
5871 mkU32(0),
5872 loadLE(Ity_I32, mkexpr(addr)) ) );
5873 DIP("movd %s, %s\n", dis_buf, nameMMXReg(gregOfRM(modrm)));
5874 }
5875 break;
5876
5877 case 0x7E: /* MOVD (src)mmxreg (G), (dst)ireg-or-mem (E) */
5878 if (sz != 4)
5879 goto mmx_decode_failure;
5880 modrm = getIByte(delta);
5881 if (epartIsReg(modrm)) {
5882 delta++;
5883 putIReg( 4, eregOfRM(modrm),
5884 unop(Iop_64to32, getMMXReg(gregOfRM(modrm)) ) );
5885 DIP("movd %s, %s\n",
5886 nameMMXReg(gregOfRM(modrm)), nameIReg(4,eregOfRM(modrm)));
5887 } else {
5888 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
5889 delta += len;
5890 storeLE( mkexpr(addr),
5891 unop(Iop_64to32, getMMXReg(gregOfRM(modrm)) ) );
5892 DIP("movd %s, %s\n", nameMMXReg(gregOfRM(modrm)), dis_buf);
5893 }
5894 break;
5895
5896 case 0x6F:
5897 /* MOVQ (src)mmxreg-or-mem, (dst)mmxreg */
5898 if (sz != 4)
5899 goto mmx_decode_failure;
5900 modrm = getIByte(delta);
5901 if (epartIsReg(modrm)) {
5902 delta++;
5903 putMMXReg( gregOfRM(modrm), getMMXReg(eregOfRM(modrm)) );
5904 DIP("movq %s, %s\n",
5905 nameMMXReg(eregOfRM(modrm)), nameMMXReg(gregOfRM(modrm)));
5906 } else {
5907 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
5908 delta += len;
5909 putMMXReg( gregOfRM(modrm), loadLE(Ity_I64, mkexpr(addr)) );
5910 DIP("movq %s, %s\n",
5911 dis_buf, nameMMXReg(gregOfRM(modrm)));
5912 }
5913 break;
5914
5915 case 0x7F:
5916 /* MOVQ (src)mmxreg, (dst)mmxreg-or-mem */
5917 if (sz != 4)
5918 goto mmx_decode_failure;
5919 modrm = getIByte(delta);
5920 if (epartIsReg(modrm)) {
5921 delta++;
5922 putMMXReg( eregOfRM(modrm), getMMXReg(gregOfRM(modrm)) );
5923 DIP("movq %s, %s\n",
5924 nameMMXReg(gregOfRM(modrm)), nameMMXReg(eregOfRM(modrm)));
5925 } else {
5926 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
5927 delta += len;
5928 storeLE( mkexpr(addr), getMMXReg(gregOfRM(modrm)) );
5929 DIP("mov(nt)q %s, %s\n",
5930 nameMMXReg(gregOfRM(modrm)), dis_buf);
5931 }
5932 break;
5933
5934 case 0xFC:
5935 case 0xFD:
5936 case 0xFE: /* PADDgg (src)mmxreg-or-mem, (dst)mmxreg */
5937 if (sz != 4)
5938 goto mmx_decode_failure;
5939 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "padd", True );
5940 break;
5941
5942 case 0xEC:
5943 case 0xED: /* PADDSgg (src)mmxreg-or-mem, (dst)mmxreg */
5944 if (sz != 4)
5945 goto mmx_decode_failure;
5946 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "padds", True );
5947 break;
5948
5949 case 0xDC:
5950 case 0xDD: /* PADDUSgg (src)mmxreg-or-mem, (dst)mmxreg */
5951 if (sz != 4)
5952 goto mmx_decode_failure;
5953 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "paddus", True );
5954 break;
5955
5956 case 0xF8:
5957 case 0xF9:
5958 case 0xFA: /* PSUBgg (src)mmxreg-or-mem, (dst)mmxreg */
5959 if (sz != 4)
5960 goto mmx_decode_failure;
5961 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "psub", True );
5962 break;
5963
5964 case 0xE8:
5965 case 0xE9: /* PSUBSgg (src)mmxreg-or-mem, (dst)mmxreg */
5966 if (sz != 4)
5967 goto mmx_decode_failure;
5968 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "psubs", True );
5969 break;
5970
5971 case 0xD8:
5972 case 0xD9: /* PSUBUSgg (src)mmxreg-or-mem, (dst)mmxreg */
5973 if (sz != 4)
5974 goto mmx_decode_failure;
5975 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "psubus", True );
5976 break;
5977
5978 case 0xE5: /* PMULHW (src)mmxreg-or-mem, (dst)mmxreg */
5979 if (sz != 4)
5980 goto mmx_decode_failure;
5981 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pmulhw", False );
5982 break;
5983
5984 case 0xD5: /* PMULLW (src)mmxreg-or-mem, (dst)mmxreg */
5985 if (sz != 4)
5986 goto mmx_decode_failure;
5987 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pmullw", False );
5988 break;
5989
5990 case 0xF5: /* PMADDWD (src)mmxreg-or-mem, (dst)mmxreg */
5991 vassert(sz == 4);
5992 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pmaddwd", False );
5993 break;
5994
5995 case 0x74:
5996 case 0x75:
5997 case 0x76: /* PCMPEQgg (src)mmxreg-or-mem, (dst)mmxreg */
5998 if (sz != 4)
5999 goto mmx_decode_failure;
6000 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pcmpeq", True );
6001 break;
6002
6003 case 0x64:
6004 case 0x65:
6005 case 0x66: /* PCMPGTgg (src)mmxreg-or-mem, (dst)mmxreg */
6006 if (sz != 4)
6007 goto mmx_decode_failure;
6008 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pcmpgt", True );
6009 break;
6010
6011 case 0x6B: /* PACKSSDW (src)mmxreg-or-mem, (dst)mmxreg */
6012 if (sz != 4)
6013 goto mmx_decode_failure;
6014 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "packssdw", False );
6015 break;
6016
6017 case 0x63: /* PACKSSWB (src)mmxreg-or-mem, (dst)mmxreg */
6018 if (sz != 4)
6019 goto mmx_decode_failure;
6020 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "packsswb", False );
6021 break;
6022
6023 case 0x67: /* PACKUSWB (src)mmxreg-or-mem, (dst)mmxreg */
6024 if (sz != 4)
6025 goto mmx_decode_failure;
6026 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "packuswb", False );
6027 break;
6028
6029 case 0x68:
6030 case 0x69:
6031 case 0x6A: /* PUNPCKHgg (src)mmxreg-or-mem, (dst)mmxreg */
6032 if (sz != 4)
6033 goto mmx_decode_failure;
6034 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "punpckh", True );
6035 break;
6036
6037 case 0x60:
6038 case 0x61:
6039 case 0x62: /* PUNPCKLgg (src)mmxreg-or-mem, (dst)mmxreg */
6040 if (sz != 4)
6041 goto mmx_decode_failure;
6042 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "punpckl", True );
6043 break;
6044
6045 case 0xDB: /* PAND (src)mmxreg-or-mem, (dst)mmxreg */
6046 if (sz != 4)
6047 goto mmx_decode_failure;
6048 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pand", False );
6049 break;
6050
6051 case 0xDF: /* PANDN (src)mmxreg-or-mem, (dst)mmxreg */
6052 if (sz != 4)
6053 goto mmx_decode_failure;
6054 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pandn", False );
6055 break;
6056
6057 case 0xEB: /* POR (src)mmxreg-or-mem, (dst)mmxreg */
6058 if (sz != 4)
6059 goto mmx_decode_failure;
6060 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "por", False );
6061 break;
6062
6063 case 0xEF: /* PXOR (src)mmxreg-or-mem, (dst)mmxreg */
6064 if (sz != 4)
6065 goto mmx_decode_failure;
6066 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pxor", False );
6067 break;
6068
6069 # define SHIFT_BY_REG(_name,_op) \
6070 delta = dis_MMX_shiftG_byE(sorb, delta, _name, _op); \
6071 break;
6072
6073 /* PSLLgg (src)mmxreg-or-mem, (dst)mmxreg */
6074 case 0xF1: SHIFT_BY_REG("psllw", Iop_ShlN16x4);
6075 case 0xF2: SHIFT_BY_REG("pslld", Iop_ShlN32x2);
6076 case 0xF3: SHIFT_BY_REG("psllq", Iop_Shl64);
6077
6078 /* PSRLgg (src)mmxreg-or-mem, (dst)mmxreg */
6079 case 0xD1: SHIFT_BY_REG("psrlw", Iop_ShrN16x4);
6080 case 0xD2: SHIFT_BY_REG("psrld", Iop_ShrN32x2);
6081 case 0xD3: SHIFT_BY_REG("psrlq", Iop_Shr64);
6082
6083 /* PSRAgg (src)mmxreg-or-mem, (dst)mmxreg */
6084 case 0xE1: SHIFT_BY_REG("psraw", Iop_SarN16x4);
6085 case 0xE2: SHIFT_BY_REG("psrad", Iop_SarN32x2);
6086
6087 # undef SHIFT_BY_REG
6088
6089 case 0x71:
6090 case 0x72:
6091 case 0x73: {
6092 /* (sz==4): PSLLgg/PSRAgg/PSRLgg mmxreg by imm8 */
6093 UChar byte2, subopc;
6094 if (sz != 4)
6095 goto mmx_decode_failure;
6096 byte2 = getIByte(delta); /* amode / sub-opcode */
6097 subopc = toUChar( (byte2 >> 3) & 7 );
6098
6099 # define SHIFT_BY_IMM(_name,_op) \
6100 do { delta = dis_MMX_shiftE_imm(delta,_name,_op); \
6101 } while (0)
6102
6103 if (subopc == 2 /*SRL*/ && opc == 0x71)
6104 SHIFT_BY_IMM("psrlw", Iop_ShrN16x4);
6105 else if (subopc == 2 /*SRL*/ && opc == 0x72)
6106 SHIFT_BY_IMM("psrld", Iop_ShrN32x2);
6107 else if (subopc == 2 /*SRL*/ && opc == 0x73)
6108 SHIFT_BY_IMM("psrlq", Iop_Shr64);
6109
6110 else if (subopc == 4 /*SAR*/ && opc == 0x71)
6111 SHIFT_BY_IMM("psraw", Iop_SarN16x4);
6112 else if (subopc == 4 /*SAR*/ && opc == 0x72)
6113 SHIFT_BY_IMM("psrad", Iop_SarN32x2);
6114
6115 else if (subopc == 6 /*SHL*/ && opc == 0x71)
6116 SHIFT_BY_IMM("psllw", Iop_ShlN16x4);
6117 else if (subopc == 6 /*SHL*/ && opc == 0x72)
6118 SHIFT_BY_IMM("pslld", Iop_ShlN32x2);
6119 else if (subopc == 6 /*SHL*/ && opc == 0x73)
6120 SHIFT_BY_IMM("psllq", Iop_Shl64);
6121
6122 else goto mmx_decode_failure;
6123
6124 # undef SHIFT_BY_IMM
6125 break;
6126 }
6127
6128 case 0xF7: {
6129 IRTemp addr = newTemp(Ity_I32);
6130 IRTemp regD = newTemp(Ity_I64);
6131 IRTemp regM = newTemp(Ity_I64);
6132 IRTemp mask = newTemp(Ity_I64);
6133 IRTemp olddata = newTemp(Ity_I64);
6134 IRTemp newdata = newTemp(Ity_I64);
6135
6136 modrm = getIByte(delta);
6137 if (sz != 4 || (!epartIsReg(modrm)))
6138 goto mmx_decode_failure;
6139 delta++;
6140
6141 assign( addr, handleSegOverride( sorb, getIReg(4, R_EDI) ));
6142 assign( regM, getMMXReg( eregOfRM(modrm) ));
6143 assign( regD, getMMXReg( gregOfRM(modrm) ));
6144 assign( mask, binop(Iop_SarN8x8, mkexpr(regM), mkU8(7)) );
6145 assign( olddata, loadLE( Ity_I64, mkexpr(addr) ));
6146 assign( newdata,
6147 binop(Iop_Or64,
6148 binop(Iop_And64,
6149 mkexpr(regD),
6150 mkexpr(mask) ),
6151 binop(Iop_And64,
6152 mkexpr(olddata),
6153 unop(Iop_Not64, mkexpr(mask)))) );
6154 storeLE( mkexpr(addr), mkexpr(newdata) );
6155 DIP("maskmovq %s,%s\n", nameMMXReg( eregOfRM(modrm) ),
6156 nameMMXReg( gregOfRM(modrm) ) );
6157 break;
6158 }
6159
6160 /* --- MMX decode failure --- */
6161 default:
6162 mmx_decode_failure:
6163 *decode_ok = False;
6164 return delta; /* ignored */
6165
6166 }
6167
6168 *decode_ok = True;
6169 return delta;
6170 }
6171
6172
6173 /*------------------------------------------------------------*/
6174 /*--- More misc arithmetic and other obscure insns. ---*/
6175 /*------------------------------------------------------------*/
6176
6177 /* Double length left and right shifts. Apparently only required in
6178 v-size (no b- variant). */
6179 static
dis_SHLRD_Gv_Ev(UChar sorb,Int delta,UChar modrm,Int sz,IRExpr * shift_amt,Bool amt_is_literal,const HChar * shift_amt_txt,Bool left_shift)6180 UInt dis_SHLRD_Gv_Ev ( UChar sorb,
6181 Int delta, UChar modrm,
6182 Int sz,
6183 IRExpr* shift_amt,
6184 Bool amt_is_literal,
6185 const HChar* shift_amt_txt,
6186 Bool left_shift )
6187 {
6188 /* shift_amt :: Ity_I8 is the amount to shift. shift_amt_txt is used
6189 for printing it. And eip on entry points at the modrm byte. */
6190 Int len;
6191 HChar dis_buf[50];
6192
6193 IRType ty = szToITy(sz);
6194 IRTemp gsrc = newTemp(ty);
6195 IRTemp esrc = newTemp(ty);
6196 IRTemp addr = IRTemp_INVALID;
6197 IRTemp tmpSH = newTemp(Ity_I8);
6198 IRTemp tmpL = IRTemp_INVALID;
6199 IRTemp tmpRes = IRTemp_INVALID;
6200 IRTemp tmpSubSh = IRTemp_INVALID;
6201 IROp mkpair;
6202 IROp getres;
6203 IROp shift;
6204 IRExpr* mask = NULL;
6205
6206 vassert(sz == 2 || sz == 4);
6207
6208 /* The E-part is the destination; this is shifted. The G-part
6209 supplies bits to be shifted into the E-part, but is not
6210 changed.
6211
6212 If shifting left, form a double-length word with E at the top
6213 and G at the bottom, and shift this left. The result is then in
6214 the high part.
6215
6216 If shifting right, form a double-length word with G at the top
6217 and E at the bottom, and shift this right. The result is then
6218 at the bottom. */
6219
6220 /* Fetch the operands. */
6221
6222 assign( gsrc, getIReg(sz, gregOfRM(modrm)) );
6223
6224 if (epartIsReg(modrm)) {
6225 delta++;
6226 assign( esrc, getIReg(sz, eregOfRM(modrm)) );
6227 DIP("sh%cd%c %s, %s, %s\n",
6228 ( left_shift ? 'l' : 'r' ), nameISize(sz),
6229 shift_amt_txt,
6230 nameIReg(sz, gregOfRM(modrm)), nameIReg(sz, eregOfRM(modrm)));
6231 } else {
6232 addr = disAMode ( &len, sorb, delta, dis_buf );
6233 delta += len;
6234 assign( esrc, loadLE(ty, mkexpr(addr)) );
6235 DIP("sh%cd%c %s, %s, %s\n",
6236 ( left_shift ? 'l' : 'r' ), nameISize(sz),
6237 shift_amt_txt,
6238 nameIReg(sz, gregOfRM(modrm)), dis_buf);
6239 }
6240
6241 /* Round up the relevant primops. */
6242
6243 if (sz == 4) {
6244 tmpL = newTemp(Ity_I64);
6245 tmpRes = newTemp(Ity_I32);
6246 tmpSubSh = newTemp(Ity_I32);
6247 mkpair = Iop_32HLto64;
6248 getres = left_shift ? Iop_64HIto32 : Iop_64to32;
6249 shift = left_shift ? Iop_Shl64 : Iop_Shr64;
6250 mask = mkU8(31);
6251 } else {
6252 /* sz == 2 */
6253 tmpL = newTemp(Ity_I32);
6254 tmpRes = newTemp(Ity_I16);
6255 tmpSubSh = newTemp(Ity_I16);
6256 mkpair = Iop_16HLto32;
6257 getres = left_shift ? Iop_32HIto16 : Iop_32to16;
6258 shift = left_shift ? Iop_Shl32 : Iop_Shr32;
6259 mask = mkU8(15);
6260 }
6261
6262 /* Do the shift, calculate the subshift value, and set
6263 the flag thunk. */
6264
6265 assign( tmpSH, binop(Iop_And8, shift_amt, mask) );
6266
6267 if (left_shift)
6268 assign( tmpL, binop(mkpair, mkexpr(esrc), mkexpr(gsrc)) );
6269 else
6270 assign( tmpL, binop(mkpair, mkexpr(gsrc), mkexpr(esrc)) );
6271
6272 assign( tmpRes, unop(getres, binop(shift, mkexpr(tmpL), mkexpr(tmpSH)) ) );
6273 assign( tmpSubSh,
6274 unop(getres,
6275 binop(shift,
6276 mkexpr(tmpL),
6277 binop(Iop_And8,
6278 binop(Iop_Sub8, mkexpr(tmpSH), mkU8(1) ),
6279 mask))) );
6280
6281 setFlags_DEP1_DEP2_shift ( left_shift ? Iop_Shl32 : Iop_Sar32,
6282 tmpRes, tmpSubSh, ty, tmpSH );
6283
6284 /* Put result back. */
6285
6286 if (epartIsReg(modrm)) {
6287 putIReg(sz, eregOfRM(modrm), mkexpr(tmpRes));
6288 } else {
6289 storeLE( mkexpr(addr), mkexpr(tmpRes) );
6290 }
6291
6292 if (amt_is_literal) delta++;
6293 return delta;
6294 }
6295
6296
6297 /* Handle BT/BTS/BTR/BTC Gv, Ev. Apparently b-size is not
6298 required. */
6299
6300 typedef enum { BtOpNone, BtOpSet, BtOpReset, BtOpComp } BtOp;
6301
nameBtOp(BtOp op)6302 static const HChar* nameBtOp ( BtOp op )
6303 {
6304 switch (op) {
6305 case BtOpNone: return "";
6306 case BtOpSet: return "s";
6307 case BtOpReset: return "r";
6308 case BtOpComp: return "c";
6309 default: vpanic("nameBtOp(x86)");
6310 }
6311 }
6312
6313
6314 static
dis_bt_G_E(const VexAbiInfo * vbi,UChar sorb,Bool locked,Int sz,Int delta,BtOp op)6315 UInt dis_bt_G_E ( const VexAbiInfo* vbi,
6316 UChar sorb, Bool locked, Int sz, Int delta, BtOp op )
6317 {
6318 HChar dis_buf[50];
6319 UChar modrm;
6320 Int len;
6321 IRTemp t_fetched, t_bitno0, t_bitno1, t_bitno2, t_addr0,
6322 t_addr1, t_esp, t_mask, t_new;
6323
6324 vassert(sz == 2 || sz == 4);
6325
6326 t_fetched = t_bitno0 = t_bitno1 = t_bitno2
6327 = t_addr0 = t_addr1 = t_esp
6328 = t_mask = t_new = IRTemp_INVALID;
6329
6330 t_fetched = newTemp(Ity_I8);
6331 t_new = newTemp(Ity_I8);
6332 t_bitno0 = newTemp(Ity_I32);
6333 t_bitno1 = newTemp(Ity_I32);
6334 t_bitno2 = newTemp(Ity_I8);
6335 t_addr1 = newTemp(Ity_I32);
6336 modrm = getIByte(delta);
6337
6338 assign( t_bitno0, widenSto32(getIReg(sz, gregOfRM(modrm))) );
6339
6340 if (epartIsReg(modrm)) {
6341 delta++;
6342 /* Get it onto the client's stack. */
6343 t_esp = newTemp(Ity_I32);
6344 t_addr0 = newTemp(Ity_I32);
6345
6346 /* For the choice of the value 128, see comment in dis_bt_G_E in
6347 guest_amd64_toIR.c. We point out here only that 128 is
6348 fast-cased in Memcheck and is > 0, so seems like a good
6349 choice. */
6350 vassert(vbi->guest_stack_redzone_size == 0);
6351 assign( t_esp, binop(Iop_Sub32, getIReg(4, R_ESP), mkU32(128)) );
6352 putIReg(4, R_ESP, mkexpr(t_esp));
6353
6354 storeLE( mkexpr(t_esp), getIReg(sz, eregOfRM(modrm)) );
6355
6356 /* Make t_addr0 point at it. */
6357 assign( t_addr0, mkexpr(t_esp) );
6358
6359 /* Mask out upper bits of the shift amount, since we're doing a
6360 reg. */
6361 assign( t_bitno1, binop(Iop_And32,
6362 mkexpr(t_bitno0),
6363 mkU32(sz == 4 ? 31 : 15)) );
6364
6365 } else {
6366 t_addr0 = disAMode ( &len, sorb, delta, dis_buf );
6367 delta += len;
6368 assign( t_bitno1, mkexpr(t_bitno0) );
6369 }
6370
6371 /* At this point: t_addr0 is the address being operated on. If it
6372 was a reg, we will have pushed it onto the client's stack.
6373 t_bitno1 is the bit number, suitably masked in the case of a
6374 reg. */
6375
6376 /* Now the main sequence. */
6377 assign( t_addr1,
6378 binop(Iop_Add32,
6379 mkexpr(t_addr0),
6380 binop(Iop_Sar32, mkexpr(t_bitno1), mkU8(3))) );
6381
6382 /* t_addr1 now holds effective address */
6383
6384 assign( t_bitno2,
6385 unop(Iop_32to8,
6386 binop(Iop_And32, mkexpr(t_bitno1), mkU32(7))) );
6387
6388 /* t_bitno2 contains offset of bit within byte */
6389
6390 if (op != BtOpNone) {
6391 t_mask = newTemp(Ity_I8);
6392 assign( t_mask, binop(Iop_Shl8, mkU8(1), mkexpr(t_bitno2)) );
6393 }
6394
6395 /* t_mask is now a suitable byte mask */
6396
6397 assign( t_fetched, loadLE(Ity_I8, mkexpr(t_addr1)) );
6398
6399 if (op != BtOpNone) {
6400 switch (op) {
6401 case BtOpSet:
6402 assign( t_new,
6403 binop(Iop_Or8, mkexpr(t_fetched), mkexpr(t_mask)) );
6404 break;
6405 case BtOpComp:
6406 assign( t_new,
6407 binop(Iop_Xor8, mkexpr(t_fetched), mkexpr(t_mask)) );
6408 break;
6409 case BtOpReset:
6410 assign( t_new,
6411 binop(Iop_And8, mkexpr(t_fetched),
6412 unop(Iop_Not8, mkexpr(t_mask))) );
6413 break;
6414 default:
6415 vpanic("dis_bt_G_E(x86)");
6416 }
6417 if (locked && !epartIsReg(modrm)) {
6418 casLE( mkexpr(t_addr1), mkexpr(t_fetched)/*expd*/,
6419 mkexpr(t_new)/*new*/,
6420 guest_EIP_curr_instr );
6421 } else {
6422 storeLE( mkexpr(t_addr1), mkexpr(t_new) );
6423 }
6424 }
6425
6426 /* Side effect done; now get selected bit into Carry flag */
6427 /* Flags: C=selected bit, O,S,Z,A,P undefined, so are set to zero. */
6428 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
6429 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
6430 stmt( IRStmt_Put(
6431 OFFB_CC_DEP1,
6432 binop(Iop_And32,
6433 binop(Iop_Shr32,
6434 unop(Iop_8Uto32, mkexpr(t_fetched)),
6435 mkexpr(t_bitno2)),
6436 mkU32(1)))
6437 );
6438 /* Set NDEP even though it isn't used. This makes redundant-PUT
6439 elimination of previous stores to this field work better. */
6440 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
6441
6442 /* Move reg operand from stack back to reg */
6443 if (epartIsReg(modrm)) {
6444 /* t_esp still points at it. */
6445 putIReg(sz, eregOfRM(modrm), loadLE(szToITy(sz), mkexpr(t_esp)) );
6446 putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(t_esp), mkU32(128)) );
6447 }
6448
6449 DIP("bt%s%c %s, %s\n",
6450 nameBtOp(op), nameISize(sz), nameIReg(sz, gregOfRM(modrm)),
6451 ( epartIsReg(modrm) ? nameIReg(sz, eregOfRM(modrm)) : dis_buf ) );
6452
6453 return delta;
6454 }
6455
6456
6457
6458 /* Handle BSF/BSR. Only v-size seems necessary. */
6459 static
dis_bs_E_G(UChar sorb,Int sz,Int delta,Bool fwds)6460 UInt dis_bs_E_G ( UChar sorb, Int sz, Int delta, Bool fwds )
6461 {
6462 Bool isReg;
6463 UChar modrm;
6464 HChar dis_buf[50];
6465
6466 IRType ty = szToITy(sz);
6467 IRTemp src = newTemp(ty);
6468 IRTemp dst = newTemp(ty);
6469
6470 IRTemp src32 = newTemp(Ity_I32);
6471 IRTemp dst32 = newTemp(Ity_I32);
6472 IRTemp srcB = newTemp(Ity_I1);
6473
6474 vassert(sz == 4 || sz == 2);
6475
6476 modrm = getIByte(delta);
6477
6478 isReg = epartIsReg(modrm);
6479 if (isReg) {
6480 delta++;
6481 assign( src, getIReg(sz, eregOfRM(modrm)) );
6482 } else {
6483 Int len;
6484 IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
6485 delta += len;
6486 assign( src, loadLE(ty, mkexpr(addr)) );
6487 }
6488
6489 DIP("bs%c%c %s, %s\n",
6490 fwds ? 'f' : 'r', nameISize(sz),
6491 ( isReg ? nameIReg(sz, eregOfRM(modrm)) : dis_buf ),
6492 nameIReg(sz, gregOfRM(modrm)));
6493
6494 /* Generate a bool expression which is zero iff the original is
6495 zero, and nonzero otherwise. Ask for a CmpNE version which, if
6496 instrumented by Memcheck, is instrumented expensively, since
6497 this may be used on the output of a preceding movmskb insn,
6498 which has been known to be partially defined, and in need of
6499 careful handling. */
6500 assign( srcB, binop(mkSizedOp(ty,Iop_ExpCmpNE8),
6501 mkexpr(src), mkU(ty,0)) );
6502
6503 /* Flags: Z is 1 iff source value is zero. All others
6504 are undefined -- we force them to zero. */
6505 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
6506 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
6507 stmt( IRStmt_Put(
6508 OFFB_CC_DEP1,
6509 IRExpr_ITE( mkexpr(srcB),
6510 /* src!=0 */
6511 mkU32(0),
6512 /* src==0 */
6513 mkU32(X86G_CC_MASK_Z)
6514 )
6515 ));
6516 /* Set NDEP even though it isn't used. This makes redundant-PUT
6517 elimination of previous stores to this field work better. */
6518 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
6519
6520 /* Result: iff source value is zero, we can't use
6521 Iop_Clz32/Iop_Ctz32 as they have no defined result in that case.
6522 But anyway, Intel x86 semantics say the result is undefined in
6523 such situations. Hence handle the zero case specially. */
6524
6525 /* Bleh. What we compute:
6526
6527 bsf32: if src == 0 then 0 else Ctz32(src)
6528 bsr32: if src == 0 then 0 else 31 - Clz32(src)
6529
6530 bsf16: if src == 0 then 0 else Ctz32(16Uto32(src))
6531 bsr16: if src == 0 then 0 else 31 - Clz32(16Uto32(src))
6532
6533 First, widen src to 32 bits if it is not already.
6534
6535 Postscript 15 Oct 04: it seems that at least VIA Nehemiah leaves the
6536 dst register unchanged when src == 0. Hence change accordingly.
6537 */
6538 if (sz == 2)
6539 assign( src32, unop(Iop_16Uto32, mkexpr(src)) );
6540 else
6541 assign( src32, mkexpr(src) );
6542
6543 /* The main computation, guarding against zero. */
6544 assign( dst32,
6545 IRExpr_ITE(
6546 mkexpr(srcB),
6547 /* src != 0 */
6548 fwds ? unop(Iop_Ctz32, mkexpr(src32))
6549 : binop(Iop_Sub32,
6550 mkU32(31),
6551 unop(Iop_Clz32, mkexpr(src32))),
6552 /* src == 0 -- leave dst unchanged */
6553 widenUto32( getIReg( sz, gregOfRM(modrm) ) )
6554 )
6555 );
6556
6557 if (sz == 2)
6558 assign( dst, unop(Iop_32to16, mkexpr(dst32)) );
6559 else
6560 assign( dst, mkexpr(dst32) );
6561
6562 /* dump result back */
6563 putIReg( sz, gregOfRM(modrm), mkexpr(dst) );
6564
6565 return delta;
6566 }
6567
6568
6569 static
codegen_xchg_eAX_Reg(Int sz,Int reg)6570 void codegen_xchg_eAX_Reg ( Int sz, Int reg )
6571 {
6572 IRType ty = szToITy(sz);
6573 IRTemp t1 = newTemp(ty);
6574 IRTemp t2 = newTemp(ty);
6575 vassert(sz == 2 || sz == 4);
6576 assign( t1, getIReg(sz, R_EAX) );
6577 assign( t2, getIReg(sz, reg) );
6578 putIReg( sz, R_EAX, mkexpr(t2) );
6579 putIReg( sz, reg, mkexpr(t1) );
6580 DIP("xchg%c %s, %s\n",
6581 nameISize(sz), nameIReg(sz, R_EAX), nameIReg(sz, reg));
6582 }
6583
6584
6585 static
codegen_SAHF(void)6586 void codegen_SAHF ( void )
6587 {
6588 /* Set the flags to:
6589 (x86g_calculate_flags_all() & X86G_CC_MASK_O) -- retain the old O flag
6590 | (%AH & (X86G_CC_MASK_S|X86G_CC_MASK_Z|X86G_CC_MASK_A
6591 |X86G_CC_MASK_P|X86G_CC_MASK_C)
6592 */
6593 UInt mask_SZACP = X86G_CC_MASK_S|X86G_CC_MASK_Z|X86G_CC_MASK_A
6594 |X86G_CC_MASK_C|X86G_CC_MASK_P;
6595 IRTemp oldflags = newTemp(Ity_I32);
6596 assign( oldflags, mk_x86g_calculate_eflags_all() );
6597 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
6598 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
6599 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
6600 stmt( IRStmt_Put( OFFB_CC_DEP1,
6601 binop(Iop_Or32,
6602 binop(Iop_And32, mkexpr(oldflags), mkU32(X86G_CC_MASK_O)),
6603 binop(Iop_And32,
6604 binop(Iop_Shr32, getIReg(4, R_EAX), mkU8(8)),
6605 mkU32(mask_SZACP))
6606 )
6607 ));
6608 /* Set NDEP even though it isn't used. This makes redundant-PUT
6609 elimination of previous stores to this field work better. */
6610 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
6611 }
6612
6613
6614 static
codegen_LAHF(void)6615 void codegen_LAHF ( void )
6616 {
6617 /* AH <- EFLAGS(SF:ZF:0:AF:0:PF:1:CF) */
6618 IRExpr* eax_with_hole;
6619 IRExpr* new_byte;
6620 IRExpr* new_eax;
6621 UInt mask_SZACP = X86G_CC_MASK_S|X86G_CC_MASK_Z|X86G_CC_MASK_A
6622 |X86G_CC_MASK_C|X86G_CC_MASK_P;
6623
6624 IRTemp flags = newTemp(Ity_I32);
6625 assign( flags, mk_x86g_calculate_eflags_all() );
6626
6627 eax_with_hole
6628 = binop(Iop_And32, getIReg(4, R_EAX), mkU32(0xFFFF00FF));
6629 new_byte
6630 = binop(Iop_Or32, binop(Iop_And32, mkexpr(flags), mkU32(mask_SZACP)),
6631 mkU32(1<<1));
6632 new_eax
6633 = binop(Iop_Or32, eax_with_hole,
6634 binop(Iop_Shl32, new_byte, mkU8(8)));
6635 putIReg(4, R_EAX, new_eax);
6636 }
6637
6638
6639 static
dis_cmpxchg_G_E(UChar sorb,Bool locked,Int size,Int delta0)6640 UInt dis_cmpxchg_G_E ( UChar sorb,
6641 Bool locked,
6642 Int size,
6643 Int delta0 )
6644 {
6645 HChar dis_buf[50];
6646 Int len;
6647
6648 IRType ty = szToITy(size);
6649 IRTemp acc = newTemp(ty);
6650 IRTemp src = newTemp(ty);
6651 IRTemp dest = newTemp(ty);
6652 IRTemp dest2 = newTemp(ty);
6653 IRTemp acc2 = newTemp(ty);
6654 IRTemp cond = newTemp(Ity_I1);
6655 IRTemp addr = IRTemp_INVALID;
6656 UChar rm = getUChar(delta0);
6657
6658 /* There are 3 cases to consider:
6659
6660 reg-reg: ignore any lock prefix, generate sequence based
6661 on ITE
6662
6663 reg-mem, not locked: ignore any lock prefix, generate sequence
6664 based on ITE
6665
6666 reg-mem, locked: use IRCAS
6667 */
6668 if (epartIsReg(rm)) {
6669 /* case 1 */
6670 assign( dest, getIReg(size, eregOfRM(rm)) );
6671 delta0++;
6672 assign( src, getIReg(size, gregOfRM(rm)) );
6673 assign( acc, getIReg(size, R_EAX) );
6674 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty);
6675 assign( cond, mk_x86g_calculate_condition(X86CondZ) );
6676 assign( dest2, IRExpr_ITE(mkexpr(cond), mkexpr(src), mkexpr(dest)) );
6677 assign( acc2, IRExpr_ITE(mkexpr(cond), mkexpr(acc), mkexpr(dest)) );
6678 putIReg(size, R_EAX, mkexpr(acc2));
6679 putIReg(size, eregOfRM(rm), mkexpr(dest2));
6680 DIP("cmpxchg%c %s,%s\n", nameISize(size),
6681 nameIReg(size,gregOfRM(rm)),
6682 nameIReg(size,eregOfRM(rm)) );
6683 }
6684 else if (!epartIsReg(rm) && !locked) {
6685 /* case 2 */
6686 addr = disAMode ( &len, sorb, delta0, dis_buf );
6687 assign( dest, loadLE(ty, mkexpr(addr)) );
6688 delta0 += len;
6689 assign( src, getIReg(size, gregOfRM(rm)) );
6690 assign( acc, getIReg(size, R_EAX) );
6691 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty);
6692 assign( cond, mk_x86g_calculate_condition(X86CondZ) );
6693 assign( dest2, IRExpr_ITE(mkexpr(cond), mkexpr(src), mkexpr(dest)) );
6694 assign( acc2, IRExpr_ITE(mkexpr(cond), mkexpr(acc), mkexpr(dest)) );
6695 putIReg(size, R_EAX, mkexpr(acc2));
6696 storeLE( mkexpr(addr), mkexpr(dest2) );
6697 DIP("cmpxchg%c %s,%s\n", nameISize(size),
6698 nameIReg(size,gregOfRM(rm)), dis_buf);
6699 }
6700 else if (!epartIsReg(rm) && locked) {
6701 /* case 3 */
6702 /* src is new value. acc is expected value. dest is old value.
6703 Compute success from the output of the IRCAS, and steer the
6704 new value for EAX accordingly: in case of success, EAX is
6705 unchanged. */
6706 addr = disAMode ( &len, sorb, delta0, dis_buf );
6707 delta0 += len;
6708 assign( src, getIReg(size, gregOfRM(rm)) );
6709 assign( acc, getIReg(size, R_EAX) );
6710 stmt( IRStmt_CAS(
6711 mkIRCAS( IRTemp_INVALID, dest, Iend_LE, mkexpr(addr),
6712 NULL, mkexpr(acc), NULL, mkexpr(src) )
6713 ));
6714 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty);
6715 assign( cond, mk_x86g_calculate_condition(X86CondZ) );
6716 assign( acc2, IRExpr_ITE(mkexpr(cond), mkexpr(acc), mkexpr(dest)) );
6717 putIReg(size, R_EAX, mkexpr(acc2));
6718 DIP("cmpxchg%c %s,%s\n", nameISize(size),
6719 nameIReg(size,gregOfRM(rm)), dis_buf);
6720 }
6721 else vassert(0);
6722
6723 return delta0;
6724 }
6725
6726
6727 /* Handle conditional move instructions of the form
6728 cmovcc E(reg-or-mem), G(reg)
6729
6730 E(src) is reg-or-mem
6731 G(dst) is reg.
6732
6733 If E is reg, --> GET %E, tmps
6734 GET %G, tmpd
6735 CMOVcc tmps, tmpd
6736 PUT tmpd, %G
6737
6738 If E is mem --> (getAddr E) -> tmpa
6739 LD (tmpa), tmps
6740 GET %G, tmpd
6741 CMOVcc tmps, tmpd
6742 PUT tmpd, %G
6743 */
6744 static
dis_cmov_E_G(UChar sorb,Int sz,X86Condcode cond,Int delta0)6745 UInt dis_cmov_E_G ( UChar sorb,
6746 Int sz,
6747 X86Condcode cond,
6748 Int delta0 )
6749 {
6750 UChar rm = getIByte(delta0);
6751 HChar dis_buf[50];
6752 Int len;
6753
6754 IRType ty = szToITy(sz);
6755 IRTemp tmps = newTemp(ty);
6756 IRTemp tmpd = newTemp(ty);
6757
6758 if (epartIsReg(rm)) {
6759 assign( tmps, getIReg(sz, eregOfRM(rm)) );
6760 assign( tmpd, getIReg(sz, gregOfRM(rm)) );
6761
6762 putIReg(sz, gregOfRM(rm),
6763 IRExpr_ITE( mk_x86g_calculate_condition(cond),
6764 mkexpr(tmps),
6765 mkexpr(tmpd) )
6766 );
6767 DIP("cmov%c%s %s,%s\n", nameISize(sz),
6768 name_X86Condcode(cond),
6769 nameIReg(sz,eregOfRM(rm)),
6770 nameIReg(sz,gregOfRM(rm)));
6771 return 1+delta0;
6772 }
6773
6774 /* E refers to memory */
6775 {
6776 IRTemp addr = disAMode ( &len, sorb, delta0, dis_buf );
6777 assign( tmps, loadLE(ty, mkexpr(addr)) );
6778 assign( tmpd, getIReg(sz, gregOfRM(rm)) );
6779
6780 putIReg(sz, gregOfRM(rm),
6781 IRExpr_ITE( mk_x86g_calculate_condition(cond),
6782 mkexpr(tmps),
6783 mkexpr(tmpd) )
6784 );
6785
6786 DIP("cmov%c%s %s,%s\n", nameISize(sz),
6787 name_X86Condcode(cond),
6788 dis_buf,
6789 nameIReg(sz,gregOfRM(rm)));
6790 return len+delta0;
6791 }
6792 }
6793
6794
6795 static
dis_xadd_G_E(UChar sorb,Bool locked,Int sz,Int delta0,Bool * decodeOK)6796 UInt dis_xadd_G_E ( UChar sorb, Bool locked, Int sz, Int delta0,
6797 Bool* decodeOK )
6798 {
6799 Int len;
6800 UChar rm = getIByte(delta0);
6801 HChar dis_buf[50];
6802
6803 IRType ty = szToITy(sz);
6804 IRTemp tmpd = newTemp(ty);
6805 IRTemp tmpt0 = newTemp(ty);
6806 IRTemp tmpt1 = newTemp(ty);
6807
6808 /* There are 3 cases to consider:
6809
6810 reg-reg: ignore any lock prefix,
6811 generate 'naive' (non-atomic) sequence
6812
6813 reg-mem, not locked: ignore any lock prefix, generate 'naive'
6814 (non-atomic) sequence
6815
6816 reg-mem, locked: use IRCAS
6817 */
6818
6819 if (epartIsReg(rm)) {
6820 /* case 1 */
6821 assign( tmpd, getIReg(sz, eregOfRM(rm)));
6822 assign( tmpt0, getIReg(sz, gregOfRM(rm)) );
6823 assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8),
6824 mkexpr(tmpd), mkexpr(tmpt0)) );
6825 setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty );
6826 putIReg(sz, eregOfRM(rm), mkexpr(tmpt1));
6827 putIReg(sz, gregOfRM(rm), mkexpr(tmpd));
6828 DIP("xadd%c %s, %s\n",
6829 nameISize(sz), nameIReg(sz,gregOfRM(rm)),
6830 nameIReg(sz,eregOfRM(rm)));
6831 *decodeOK = True;
6832 return 1+delta0;
6833 }
6834 else if (!epartIsReg(rm) && !locked) {
6835 /* case 2 */
6836 IRTemp addr = disAMode ( &len, sorb, delta0, dis_buf );
6837 assign( tmpd, loadLE(ty, mkexpr(addr)) );
6838 assign( tmpt0, getIReg(sz, gregOfRM(rm)) );
6839 assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8),
6840 mkexpr(tmpd), mkexpr(tmpt0)) );
6841 storeLE( mkexpr(addr), mkexpr(tmpt1) );
6842 setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty );
6843 putIReg(sz, gregOfRM(rm), mkexpr(tmpd));
6844 DIP("xadd%c %s, %s\n",
6845 nameISize(sz), nameIReg(sz,gregOfRM(rm)), dis_buf);
6846 *decodeOK = True;
6847 return len+delta0;
6848 }
6849 else if (!epartIsReg(rm) && locked) {
6850 /* case 3 */
6851 IRTemp addr = disAMode ( &len, sorb, delta0, dis_buf );
6852 assign( tmpd, loadLE(ty, mkexpr(addr)) );
6853 assign( tmpt0, getIReg(sz, gregOfRM(rm)) );
6854 assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8),
6855 mkexpr(tmpd), mkexpr(tmpt0)) );
6856 casLE( mkexpr(addr), mkexpr(tmpd)/*expVal*/,
6857 mkexpr(tmpt1)/*newVal*/, guest_EIP_curr_instr );
6858 setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty );
6859 putIReg(sz, gregOfRM(rm), mkexpr(tmpd));
6860 DIP("xadd%c %s, %s\n",
6861 nameISize(sz), nameIReg(sz,gregOfRM(rm)), dis_buf);
6862 *decodeOK = True;
6863 return len+delta0;
6864 }
6865 /*UNREACHED*/
6866 vassert(0);
6867 }
6868
6869 /* Move 16 bits from Ew (ireg or mem) to G (a segment register). */
6870
6871 static
dis_mov_Ew_Sw(UChar sorb,Int delta0)6872 UInt dis_mov_Ew_Sw ( UChar sorb, Int delta0 )
6873 {
6874 Int len;
6875 IRTemp addr;
6876 UChar rm = getIByte(delta0);
6877 HChar dis_buf[50];
6878
6879 if (epartIsReg(rm)) {
6880 putSReg( gregOfRM(rm), getIReg(2, eregOfRM(rm)) );
6881 DIP("movw %s,%s\n", nameIReg(2,eregOfRM(rm)), nameSReg(gregOfRM(rm)));
6882 return 1+delta0;
6883 } else {
6884 addr = disAMode ( &len, sorb, delta0, dis_buf );
6885 putSReg( gregOfRM(rm), loadLE(Ity_I16, mkexpr(addr)) );
6886 DIP("movw %s,%s\n", dis_buf, nameSReg(gregOfRM(rm)));
6887 return len+delta0;
6888 }
6889 }
6890
6891 /* Move 16 bits from G (a segment register) to Ew (ireg or mem). If
6892 dst is ireg and sz==4, zero out top half of it. */
6893
6894 static
dis_mov_Sw_Ew(UChar sorb,Int sz,Int delta0)6895 UInt dis_mov_Sw_Ew ( UChar sorb,
6896 Int sz,
6897 Int delta0 )
6898 {
6899 Int len;
6900 IRTemp addr;
6901 UChar rm = getIByte(delta0);
6902 HChar dis_buf[50];
6903
6904 vassert(sz == 2 || sz == 4);
6905
6906 if (epartIsReg(rm)) {
6907 if (sz == 4)
6908 putIReg(4, eregOfRM(rm), unop(Iop_16Uto32, getSReg(gregOfRM(rm))));
6909 else
6910 putIReg(2, eregOfRM(rm), getSReg(gregOfRM(rm)));
6911
6912 DIP("mov %s,%s\n", nameSReg(gregOfRM(rm)), nameIReg(sz,eregOfRM(rm)));
6913 return 1+delta0;
6914 } else {
6915 addr = disAMode ( &len, sorb, delta0, dis_buf );
6916 storeLE( mkexpr(addr), getSReg(gregOfRM(rm)) );
6917 DIP("mov %s,%s\n", nameSReg(gregOfRM(rm)), dis_buf);
6918 return len+delta0;
6919 }
6920 }
6921
6922
6923 static
dis_push_segreg(UInt sreg,Int sz)6924 void dis_push_segreg ( UInt sreg, Int sz )
6925 {
6926 IRTemp t1 = newTemp(Ity_I16);
6927 IRTemp ta = newTemp(Ity_I32);
6928 vassert(sz == 2 || sz == 4);
6929
6930 assign( t1, getSReg(sreg) );
6931 assign( ta, binop(Iop_Sub32, getIReg(4, R_ESP), mkU32(sz)) );
6932 putIReg(4, R_ESP, mkexpr(ta));
6933 storeLE( mkexpr(ta), mkexpr(t1) );
6934
6935 DIP("push%c %s\n", sz==2 ? 'w' : 'l', nameSReg(sreg));
6936 }
6937
6938 static
dis_pop_segreg(UInt sreg,Int sz)6939 void dis_pop_segreg ( UInt sreg, Int sz )
6940 {
6941 IRTemp t1 = newTemp(Ity_I16);
6942 IRTemp ta = newTemp(Ity_I32);
6943 vassert(sz == 2 || sz == 4);
6944
6945 assign( ta, getIReg(4, R_ESP) );
6946 assign( t1, loadLE(Ity_I16, mkexpr(ta)) );
6947
6948 putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(ta), mkU32(sz)) );
6949 putSReg( sreg, mkexpr(t1) );
6950 DIP("pop%c %s\n", sz==2 ? 'w' : 'l', nameSReg(sreg));
6951 }
6952
6953 static
dis_ret(DisResult * dres,UInt d32)6954 void dis_ret ( /*MOD*/DisResult* dres, UInt d32 )
6955 {
6956 IRTemp t1 = newTemp(Ity_I32);
6957 IRTemp t2 = newTemp(Ity_I32);
6958 assign(t1, getIReg(4,R_ESP));
6959 assign(t2, loadLE(Ity_I32,mkexpr(t1)));
6960 putIReg(4, R_ESP,binop(Iop_Add32, mkexpr(t1), mkU32(4+d32)));
6961 jmp_treg(dres, Ijk_Ret, t2);
6962 vassert(dres->whatNext == Dis_StopHere);
6963 }
6964
6965 /*------------------------------------------------------------*/
6966 /*--- SSE/SSE2/SSE3 helpers ---*/
6967 /*------------------------------------------------------------*/
6968
6969 /* Indicates whether the op requires a rounding-mode argument. Note
6970 that this covers only vector floating point arithmetic ops, and
6971 omits the scalar ones that need rounding modes. Note also that
6972 inconsistencies here will get picked up later by the IR sanity
6973 checker, so this isn't correctness-critical. */
requiresRMode(IROp op)6974 static Bool requiresRMode ( IROp op )
6975 {
6976 switch (op) {
6977 /* 128 bit ops */
6978 case Iop_Add32Fx4: case Iop_Sub32Fx4:
6979 case Iop_Mul32Fx4: case Iop_Div32Fx4:
6980 case Iop_Add64Fx2: case Iop_Sub64Fx2:
6981 case Iop_Mul64Fx2: case Iop_Div64Fx2:
6982 return True;
6983 default:
6984 break;
6985 }
6986 return False;
6987 }
6988
6989
6990 /* Worker function; do not call directly.
6991 Handles full width G = G `op` E and G = (not G) `op` E.
6992 */
6993
dis_SSE_E_to_G_all_wrk(UChar sorb,Int delta,const HChar * opname,IROp op,Bool invertG)6994 static UInt dis_SSE_E_to_G_all_wrk (
6995 UChar sorb, Int delta,
6996 const HChar* opname, IROp op,
6997 Bool invertG
6998 )
6999 {
7000 HChar dis_buf[50];
7001 Int alen;
7002 IRTemp addr;
7003 UChar rm = getIByte(delta);
7004 IRExpr* gpart
7005 = invertG ? unop(Iop_NotV128, getXMMReg(gregOfRM(rm)))
7006 : getXMMReg(gregOfRM(rm));
7007 if (epartIsReg(rm)) {
7008 putXMMReg(
7009 gregOfRM(rm),
7010 requiresRMode(op)
7011 ? triop(op, get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
7012 gpart,
7013 getXMMReg(eregOfRM(rm)))
7014 : binop(op, gpart,
7015 getXMMReg(eregOfRM(rm)))
7016 );
7017 DIP("%s %s,%s\n", opname,
7018 nameXMMReg(eregOfRM(rm)),
7019 nameXMMReg(gregOfRM(rm)) );
7020 return delta+1;
7021 } else {
7022 addr = disAMode ( &alen, sorb, delta, dis_buf );
7023 putXMMReg(
7024 gregOfRM(rm),
7025 requiresRMode(op)
7026 ? triop(op, get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
7027 gpart,
7028 loadLE(Ity_V128, mkexpr(addr)))
7029 : binop(op, gpart,
7030 loadLE(Ity_V128, mkexpr(addr)))
7031 );
7032 DIP("%s %s,%s\n", opname,
7033 dis_buf,
7034 nameXMMReg(gregOfRM(rm)) );
7035 return delta+alen;
7036 }
7037 }
7038
7039
7040 /* All lanes SSE binary operation, G = G `op` E. */
7041
7042 static
dis_SSE_E_to_G_all(UChar sorb,Int delta,const HChar * opname,IROp op)7043 UInt dis_SSE_E_to_G_all ( UChar sorb, Int delta, const HChar* opname, IROp op )
7044 {
7045 return dis_SSE_E_to_G_all_wrk( sorb, delta, opname, op, False );
7046 }
7047
7048 /* All lanes SSE binary operation, G = (not G) `op` E. */
7049
7050 static
dis_SSE_E_to_G_all_invG(UChar sorb,Int delta,const HChar * opname,IROp op)7051 UInt dis_SSE_E_to_G_all_invG ( UChar sorb, Int delta,
7052 const HChar* opname, IROp op )
7053 {
7054 return dis_SSE_E_to_G_all_wrk( sorb, delta, opname, op, True );
7055 }
7056
7057
7058 /* Lowest 32-bit lane only SSE binary operation, G = G `op` E. */
7059
dis_SSE_E_to_G_lo32(UChar sorb,Int delta,const HChar * opname,IROp op)7060 static UInt dis_SSE_E_to_G_lo32 ( UChar sorb, Int delta,
7061 const HChar* opname, IROp op )
7062 {
7063 HChar dis_buf[50];
7064 Int alen;
7065 IRTemp addr;
7066 UChar rm = getIByte(delta);
7067 IRExpr* gpart = getXMMReg(gregOfRM(rm));
7068 if (epartIsReg(rm)) {
7069 putXMMReg( gregOfRM(rm),
7070 binop(op, gpart,
7071 getXMMReg(eregOfRM(rm))) );
7072 DIP("%s %s,%s\n", opname,
7073 nameXMMReg(eregOfRM(rm)),
7074 nameXMMReg(gregOfRM(rm)) );
7075 return delta+1;
7076 } else {
7077 /* We can only do a 32-bit memory read, so the upper 3/4 of the
7078 E operand needs to be made simply of zeroes. */
7079 IRTemp epart = newTemp(Ity_V128);
7080 addr = disAMode ( &alen, sorb, delta, dis_buf );
7081 assign( epart, unop( Iop_32UtoV128,
7082 loadLE(Ity_I32, mkexpr(addr))) );
7083 putXMMReg( gregOfRM(rm),
7084 binop(op, gpart, mkexpr(epart)) );
7085 DIP("%s %s,%s\n", opname,
7086 dis_buf,
7087 nameXMMReg(gregOfRM(rm)) );
7088 return delta+alen;
7089 }
7090 }
7091
7092
7093 /* Lower 64-bit lane only SSE binary operation, G = G `op` E. */
7094
dis_SSE_E_to_G_lo64(UChar sorb,Int delta,const HChar * opname,IROp op)7095 static UInt dis_SSE_E_to_G_lo64 ( UChar sorb, Int delta,
7096 const HChar* opname, IROp op )
7097 {
7098 HChar dis_buf[50];
7099 Int alen;
7100 IRTemp addr;
7101 UChar rm = getIByte(delta);
7102 IRExpr* gpart = getXMMReg(gregOfRM(rm));
7103 if (epartIsReg(rm)) {
7104 putXMMReg( gregOfRM(rm),
7105 binop(op, gpart,
7106 getXMMReg(eregOfRM(rm))) );
7107 DIP("%s %s,%s\n", opname,
7108 nameXMMReg(eregOfRM(rm)),
7109 nameXMMReg(gregOfRM(rm)) );
7110 return delta+1;
7111 } else {
7112 /* We can only do a 64-bit memory read, so the upper half of the
7113 E operand needs to be made simply of zeroes. */
7114 IRTemp epart = newTemp(Ity_V128);
7115 addr = disAMode ( &alen, sorb, delta, dis_buf );
7116 assign( epart, unop( Iop_64UtoV128,
7117 loadLE(Ity_I64, mkexpr(addr))) );
7118 putXMMReg( gregOfRM(rm),
7119 binop(op, gpart, mkexpr(epart)) );
7120 DIP("%s %s,%s\n", opname,
7121 dis_buf,
7122 nameXMMReg(gregOfRM(rm)) );
7123 return delta+alen;
7124 }
7125 }
7126
7127
7128 /* All lanes unary SSE operation, G = op(E). */
7129
dis_SSE_E_to_G_unary_all(UChar sorb,Int delta,const HChar * opname,IROp op)7130 static UInt dis_SSE_E_to_G_unary_all (
7131 UChar sorb, Int delta,
7132 const HChar* opname, IROp op
7133 )
7134 {
7135 HChar dis_buf[50];
7136 Int alen;
7137 IRTemp addr;
7138 UChar rm = getIByte(delta);
7139 // Sqrt32Fx4 and Sqrt64Fx2 take a rounding mode, which is faked
7140 // up in the usual way.
7141 Bool needsIRRM = op == Iop_Sqrt32Fx4 || op == Iop_Sqrt64Fx2;
7142 if (epartIsReg(rm)) {
7143 IRExpr* src = getXMMReg(eregOfRM(rm));
7144 /* XXXROUNDINGFIXME */
7145 IRExpr* res = needsIRRM ? binop(op, get_FAKE_roundingmode(), src)
7146 : unop(op, src);
7147 putXMMReg( gregOfRM(rm), res );
7148 DIP("%s %s,%s\n", opname,
7149 nameXMMReg(eregOfRM(rm)),
7150 nameXMMReg(gregOfRM(rm)) );
7151 return delta+1;
7152 } else {
7153 addr = disAMode ( &alen, sorb, delta, dis_buf );
7154 IRExpr* src = loadLE(Ity_V128, mkexpr(addr));
7155 /* XXXROUNDINGFIXME */
7156 IRExpr* res = needsIRRM ? binop(op, get_FAKE_roundingmode(), src)
7157 : unop(op, src);
7158 putXMMReg( gregOfRM(rm), res );
7159 DIP("%s %s,%s\n", opname,
7160 dis_buf,
7161 nameXMMReg(gregOfRM(rm)) );
7162 return delta+alen;
7163 }
7164 }
7165
7166
7167 /* Lowest 32-bit lane only unary SSE operation, G = op(E). */
7168
dis_SSE_E_to_G_unary_lo32(UChar sorb,Int delta,const HChar * opname,IROp op)7169 static UInt dis_SSE_E_to_G_unary_lo32 (
7170 UChar sorb, Int delta,
7171 const HChar* opname, IROp op
7172 )
7173 {
7174 /* First we need to get the old G value and patch the low 32 bits
7175 of the E operand into it. Then apply op and write back to G. */
7176 HChar dis_buf[50];
7177 Int alen;
7178 IRTemp addr;
7179 UChar rm = getIByte(delta);
7180 IRTemp oldG0 = newTemp(Ity_V128);
7181 IRTemp oldG1 = newTemp(Ity_V128);
7182
7183 assign( oldG0, getXMMReg(gregOfRM(rm)) );
7184
7185 if (epartIsReg(rm)) {
7186 assign( oldG1,
7187 binop( Iop_SetV128lo32,
7188 mkexpr(oldG0),
7189 getXMMRegLane32(eregOfRM(rm), 0)) );
7190 putXMMReg( gregOfRM(rm), unop(op, mkexpr(oldG1)) );
7191 DIP("%s %s,%s\n", opname,
7192 nameXMMReg(eregOfRM(rm)),
7193 nameXMMReg(gregOfRM(rm)) );
7194 return delta+1;
7195 } else {
7196 addr = disAMode ( &alen, sorb, delta, dis_buf );
7197 assign( oldG1,
7198 binop( Iop_SetV128lo32,
7199 mkexpr(oldG0),
7200 loadLE(Ity_I32, mkexpr(addr)) ));
7201 putXMMReg( gregOfRM(rm), unop(op, mkexpr(oldG1)) );
7202 DIP("%s %s,%s\n", opname,
7203 dis_buf,
7204 nameXMMReg(gregOfRM(rm)) );
7205 return delta+alen;
7206 }
7207 }
7208
7209
7210 /* Lowest 64-bit lane only unary SSE operation, G = op(E). */
7211
dis_SSE_E_to_G_unary_lo64(UChar sorb,Int delta,const HChar * opname,IROp op)7212 static UInt dis_SSE_E_to_G_unary_lo64 (
7213 UChar sorb, Int delta,
7214 const HChar* opname, IROp op
7215 )
7216 {
7217 /* First we need to get the old G value and patch the low 64 bits
7218 of the E operand into it. Then apply op and write back to G. */
7219 HChar dis_buf[50];
7220 Int alen;
7221 IRTemp addr;
7222 UChar rm = getIByte(delta);
7223 IRTemp oldG0 = newTemp(Ity_V128);
7224 IRTemp oldG1 = newTemp(Ity_V128);
7225
7226 assign( oldG0, getXMMReg(gregOfRM(rm)) );
7227
7228 if (epartIsReg(rm)) {
7229 assign( oldG1,
7230 binop( Iop_SetV128lo64,
7231 mkexpr(oldG0),
7232 getXMMRegLane64(eregOfRM(rm), 0)) );
7233 putXMMReg( gregOfRM(rm), unop(op, mkexpr(oldG1)) );
7234 DIP("%s %s,%s\n", opname,
7235 nameXMMReg(eregOfRM(rm)),
7236 nameXMMReg(gregOfRM(rm)) );
7237 return delta+1;
7238 } else {
7239 addr = disAMode ( &alen, sorb, delta, dis_buf );
7240 assign( oldG1,
7241 binop( Iop_SetV128lo64,
7242 mkexpr(oldG0),
7243 loadLE(Ity_I64, mkexpr(addr)) ));
7244 putXMMReg( gregOfRM(rm), unop(op, mkexpr(oldG1)) );
7245 DIP("%s %s,%s\n", opname,
7246 dis_buf,
7247 nameXMMReg(gregOfRM(rm)) );
7248 return delta+alen;
7249 }
7250 }
7251
7252
7253 /* SSE integer binary operation:
7254 G = G `op` E (eLeft == False)
7255 G = E `op` G (eLeft == True)
7256 */
dis_SSEint_E_to_G(UChar sorb,Int delta,const HChar * opname,IROp op,Bool eLeft)7257 static UInt dis_SSEint_E_to_G(
7258 UChar sorb, Int delta,
7259 const HChar* opname, IROp op,
7260 Bool eLeft
7261 )
7262 {
7263 HChar dis_buf[50];
7264 Int alen;
7265 IRTemp addr;
7266 UChar rm = getIByte(delta);
7267 IRExpr* gpart = getXMMReg(gregOfRM(rm));
7268 IRExpr* epart = NULL;
7269 if (epartIsReg(rm)) {
7270 epart = getXMMReg(eregOfRM(rm));
7271 DIP("%s %s,%s\n", opname,
7272 nameXMMReg(eregOfRM(rm)),
7273 nameXMMReg(gregOfRM(rm)) );
7274 delta += 1;
7275 } else {
7276 addr = disAMode ( &alen, sorb, delta, dis_buf );
7277 epart = loadLE(Ity_V128, mkexpr(addr));
7278 DIP("%s %s,%s\n", opname,
7279 dis_buf,
7280 nameXMMReg(gregOfRM(rm)) );
7281 delta += alen;
7282 }
7283 putXMMReg( gregOfRM(rm),
7284 eLeft ? binop(op, epart, gpart)
7285 : binop(op, gpart, epart) );
7286 return delta;
7287 }
7288
7289
7290 /* Helper for doing SSE FP comparisons. */
7291
findSSECmpOp(Bool * needNot,IROp * op,Int imm8,Bool all_lanes,Int sz)7292 static void findSSECmpOp ( Bool* needNot, IROp* op,
7293 Int imm8, Bool all_lanes, Int sz )
7294 {
7295 imm8 &= 7;
7296 *needNot = False;
7297 *op = Iop_INVALID;
7298 if (imm8 >= 4) {
7299 *needNot = True;
7300 imm8 -= 4;
7301 }
7302
7303 if (sz == 4 && all_lanes) {
7304 switch (imm8) {
7305 case 0: *op = Iop_CmpEQ32Fx4; return;
7306 case 1: *op = Iop_CmpLT32Fx4; return;
7307 case 2: *op = Iop_CmpLE32Fx4; return;
7308 case 3: *op = Iop_CmpUN32Fx4; return;
7309 default: break;
7310 }
7311 }
7312 if (sz == 4 && !all_lanes) {
7313 switch (imm8) {
7314 case 0: *op = Iop_CmpEQ32F0x4; return;
7315 case 1: *op = Iop_CmpLT32F0x4; return;
7316 case 2: *op = Iop_CmpLE32F0x4; return;
7317 case 3: *op = Iop_CmpUN32F0x4; return;
7318 default: break;
7319 }
7320 }
7321 if (sz == 8 && all_lanes) {
7322 switch (imm8) {
7323 case 0: *op = Iop_CmpEQ64Fx2; return;
7324 case 1: *op = Iop_CmpLT64Fx2; return;
7325 case 2: *op = Iop_CmpLE64Fx2; return;
7326 case 3: *op = Iop_CmpUN64Fx2; return;
7327 default: break;
7328 }
7329 }
7330 if (sz == 8 && !all_lanes) {
7331 switch (imm8) {
7332 case 0: *op = Iop_CmpEQ64F0x2; return;
7333 case 1: *op = Iop_CmpLT64F0x2; return;
7334 case 2: *op = Iop_CmpLE64F0x2; return;
7335 case 3: *op = Iop_CmpUN64F0x2; return;
7336 default: break;
7337 }
7338 }
7339 vpanic("findSSECmpOp(x86,guest)");
7340 }
7341
7342 /* Handles SSE 32F/64F comparisons. */
7343
dis_SSEcmp_E_to_G(UChar sorb,Int delta,const HChar * opname,Bool all_lanes,Int sz)7344 static UInt dis_SSEcmp_E_to_G ( UChar sorb, Int delta,
7345 const HChar* opname, Bool all_lanes, Int sz )
7346 {
7347 HChar dis_buf[50];
7348 Int alen, imm8;
7349 IRTemp addr;
7350 Bool needNot = False;
7351 IROp op = Iop_INVALID;
7352 IRTemp plain = newTemp(Ity_V128);
7353 UChar rm = getIByte(delta);
7354 UShort mask = 0;
7355 vassert(sz == 4 || sz == 8);
7356 if (epartIsReg(rm)) {
7357 imm8 = getIByte(delta+1);
7358 findSSECmpOp(&needNot, &op, imm8, all_lanes, sz);
7359 assign( plain, binop(op, getXMMReg(gregOfRM(rm)),
7360 getXMMReg(eregOfRM(rm))) );
7361 delta += 2;
7362 DIP("%s $%d,%s,%s\n", opname,
7363 imm8,
7364 nameXMMReg(eregOfRM(rm)),
7365 nameXMMReg(gregOfRM(rm)) );
7366 } else {
7367 addr = disAMode ( &alen, sorb, delta, dis_buf );
7368 imm8 = getIByte(delta+alen);
7369 findSSECmpOp(&needNot, &op, imm8, all_lanes, sz);
7370 assign( plain,
7371 binop(
7372 op,
7373 getXMMReg(gregOfRM(rm)),
7374 all_lanes ? loadLE(Ity_V128, mkexpr(addr))
7375 : sz == 8 ? unop( Iop_64UtoV128, loadLE(Ity_I64, mkexpr(addr)))
7376 : /*sz==4*/ unop( Iop_32UtoV128, loadLE(Ity_I32, mkexpr(addr)))
7377 )
7378 );
7379 delta += alen+1;
7380 DIP("%s $%d,%s,%s\n", opname,
7381 imm8,
7382 dis_buf,
7383 nameXMMReg(gregOfRM(rm)) );
7384 }
7385
7386 if (needNot && all_lanes) {
7387 putXMMReg( gregOfRM(rm),
7388 unop(Iop_NotV128, mkexpr(plain)) );
7389 }
7390 else
7391 if (needNot && !all_lanes) {
7392 mask = toUShort( sz==4 ? 0x000F : 0x00FF );
7393 putXMMReg( gregOfRM(rm),
7394 binop(Iop_XorV128, mkexpr(plain), mkV128(mask)) );
7395 }
7396 else {
7397 putXMMReg( gregOfRM(rm), mkexpr(plain) );
7398 }
7399
7400 return delta;
7401 }
7402
7403
7404 /* Vector by scalar shift of G by the amount specified at the bottom
7405 of E. */
7406
dis_SSE_shiftG_byE(UChar sorb,Int delta,const HChar * opname,IROp op)7407 static UInt dis_SSE_shiftG_byE ( UChar sorb, Int delta,
7408 const HChar* opname, IROp op )
7409 {
7410 HChar dis_buf[50];
7411 Int alen, size;
7412 IRTemp addr;
7413 Bool shl, shr, sar;
7414 UChar rm = getIByte(delta);
7415 IRTemp g0 = newTemp(Ity_V128);
7416 IRTemp g1 = newTemp(Ity_V128);
7417 IRTemp amt = newTemp(Ity_I32);
7418 IRTemp amt8 = newTemp(Ity_I8);
7419 if (epartIsReg(rm)) {
7420 assign( amt, getXMMRegLane32(eregOfRM(rm), 0) );
7421 DIP("%s %s,%s\n", opname,
7422 nameXMMReg(eregOfRM(rm)),
7423 nameXMMReg(gregOfRM(rm)) );
7424 delta++;
7425 } else {
7426 addr = disAMode ( &alen, sorb, delta, dis_buf );
7427 assign( amt, loadLE(Ity_I32, mkexpr(addr)) );
7428 DIP("%s %s,%s\n", opname,
7429 dis_buf,
7430 nameXMMReg(gregOfRM(rm)) );
7431 delta += alen;
7432 }
7433 assign( g0, getXMMReg(gregOfRM(rm)) );
7434 assign( amt8, unop(Iop_32to8, mkexpr(amt)) );
7435
7436 shl = shr = sar = False;
7437 size = 0;
7438 switch (op) {
7439 case Iop_ShlN16x8: shl = True; size = 32; break;
7440 case Iop_ShlN32x4: shl = True; size = 32; break;
7441 case Iop_ShlN64x2: shl = True; size = 64; break;
7442 case Iop_SarN16x8: sar = True; size = 16; break;
7443 case Iop_SarN32x4: sar = True; size = 32; break;
7444 case Iop_ShrN16x8: shr = True; size = 16; break;
7445 case Iop_ShrN32x4: shr = True; size = 32; break;
7446 case Iop_ShrN64x2: shr = True; size = 64; break;
7447 default: vassert(0);
7448 }
7449
7450 if (shl || shr) {
7451 assign(
7452 g1,
7453 IRExpr_ITE(
7454 binop(Iop_CmpLT32U,mkexpr(amt),mkU32(size)),
7455 binop(op, mkexpr(g0), mkexpr(amt8)),
7456 mkV128(0x0000)
7457 )
7458 );
7459 } else
7460 if (sar) {
7461 assign(
7462 g1,
7463 IRExpr_ITE(
7464 binop(Iop_CmpLT32U,mkexpr(amt),mkU32(size)),
7465 binop(op, mkexpr(g0), mkexpr(amt8)),
7466 binop(op, mkexpr(g0), mkU8(size-1))
7467 )
7468 );
7469 } else {
7470 /*NOTREACHED*/
7471 vassert(0);
7472 }
7473
7474 putXMMReg( gregOfRM(rm), mkexpr(g1) );
7475 return delta;
7476 }
7477
7478
7479 /* Vector by scalar shift of E by an immediate byte. */
7480
7481 static
dis_SSE_shiftE_imm(Int delta,const HChar * opname,IROp op)7482 UInt dis_SSE_shiftE_imm ( Int delta, const HChar* opname, IROp op )
7483 {
7484 Bool shl, shr, sar;
7485 UChar rm = getIByte(delta);
7486 IRTemp e0 = newTemp(Ity_V128);
7487 IRTemp e1 = newTemp(Ity_V128);
7488 UChar amt, size;
7489 vassert(epartIsReg(rm));
7490 vassert(gregOfRM(rm) == 2
7491 || gregOfRM(rm) == 4 || gregOfRM(rm) == 6);
7492 amt = getIByte(delta+1);
7493 delta += 2;
7494 DIP("%s $%d,%s\n", opname,
7495 (Int)amt,
7496 nameXMMReg(eregOfRM(rm)) );
7497 assign( e0, getXMMReg(eregOfRM(rm)) );
7498
7499 shl = shr = sar = False;
7500 size = 0;
7501 switch (op) {
7502 case Iop_ShlN16x8: shl = True; size = 16; break;
7503 case Iop_ShlN32x4: shl = True; size = 32; break;
7504 case Iop_ShlN64x2: shl = True; size = 64; break;
7505 case Iop_SarN16x8: sar = True; size = 16; break;
7506 case Iop_SarN32x4: sar = True; size = 32; break;
7507 case Iop_ShrN16x8: shr = True; size = 16; break;
7508 case Iop_ShrN32x4: shr = True; size = 32; break;
7509 case Iop_ShrN64x2: shr = True; size = 64; break;
7510 default: vassert(0);
7511 }
7512
7513 if (shl || shr) {
7514 assign( e1, amt >= size
7515 ? mkV128(0x0000)
7516 : binop(op, mkexpr(e0), mkU8(amt))
7517 );
7518 } else
7519 if (sar) {
7520 assign( e1, amt >= size
7521 ? binop(op, mkexpr(e0), mkU8(size-1))
7522 : binop(op, mkexpr(e0), mkU8(amt))
7523 );
7524 } else {
7525 /*NOTREACHED*/
7526 vassert(0);
7527 }
7528
7529 putXMMReg( eregOfRM(rm), mkexpr(e1) );
7530 return delta;
7531 }
7532
7533
7534 /* Get the current SSE rounding mode. */
7535
get_sse_roundingmode(void)7536 static IRExpr* /* :: Ity_I32 */ get_sse_roundingmode ( void )
7537 {
7538 return binop( Iop_And32,
7539 IRExpr_Get( OFFB_SSEROUND, Ity_I32 ),
7540 mkU32(3) );
7541 }
7542
put_sse_roundingmode(IRExpr * sseround)7543 static void put_sse_roundingmode ( IRExpr* sseround )
7544 {
7545 vassert(typeOfIRExpr(irsb->tyenv, sseround) == Ity_I32);
7546 stmt( IRStmt_Put( OFFB_SSEROUND, sseround ) );
7547 }
7548
7549 /* Break a 128-bit value up into four 32-bit ints. */
7550
breakup128to32s(IRTemp t128,IRTemp * t3,IRTemp * t2,IRTemp * t1,IRTemp * t0)7551 static void breakup128to32s ( IRTemp t128,
7552 /*OUTs*/
7553 IRTemp* t3, IRTemp* t2,
7554 IRTemp* t1, IRTemp* t0 )
7555 {
7556 IRTemp hi64 = newTemp(Ity_I64);
7557 IRTemp lo64 = newTemp(Ity_I64);
7558 assign( hi64, unop(Iop_V128HIto64, mkexpr(t128)) );
7559 assign( lo64, unop(Iop_V128to64, mkexpr(t128)) );
7560
7561 vassert(t0 && *t0 == IRTemp_INVALID);
7562 vassert(t1 && *t1 == IRTemp_INVALID);
7563 vassert(t2 && *t2 == IRTemp_INVALID);
7564 vassert(t3 && *t3 == IRTemp_INVALID);
7565
7566 *t0 = newTemp(Ity_I32);
7567 *t1 = newTemp(Ity_I32);
7568 *t2 = newTemp(Ity_I32);
7569 *t3 = newTemp(Ity_I32);
7570 assign( *t0, unop(Iop_64to32, mkexpr(lo64)) );
7571 assign( *t1, unop(Iop_64HIto32, mkexpr(lo64)) );
7572 assign( *t2, unop(Iop_64to32, mkexpr(hi64)) );
7573 assign( *t3, unop(Iop_64HIto32, mkexpr(hi64)) );
7574 }
7575
7576 /* Construct a 128-bit value from four 32-bit ints. */
7577
mk128from32s(IRTemp t3,IRTemp t2,IRTemp t1,IRTemp t0)7578 static IRExpr* mk128from32s ( IRTemp t3, IRTemp t2,
7579 IRTemp t1, IRTemp t0 )
7580 {
7581 return
7582 binop( Iop_64HLtoV128,
7583 binop(Iop_32HLto64, mkexpr(t3), mkexpr(t2)),
7584 binop(Iop_32HLto64, mkexpr(t1), mkexpr(t0))
7585 );
7586 }
7587
7588 /* Break a 64-bit value up into four 16-bit ints. */
7589
breakup64to16s(IRTemp t64,IRTemp * t3,IRTemp * t2,IRTemp * t1,IRTemp * t0)7590 static void breakup64to16s ( IRTemp t64,
7591 /*OUTs*/
7592 IRTemp* t3, IRTemp* t2,
7593 IRTemp* t1, IRTemp* t0 )
7594 {
7595 IRTemp hi32 = newTemp(Ity_I32);
7596 IRTemp lo32 = newTemp(Ity_I32);
7597 assign( hi32, unop(Iop_64HIto32, mkexpr(t64)) );
7598 assign( lo32, unop(Iop_64to32, mkexpr(t64)) );
7599
7600 vassert(t0 && *t0 == IRTemp_INVALID);
7601 vassert(t1 && *t1 == IRTemp_INVALID);
7602 vassert(t2 && *t2 == IRTemp_INVALID);
7603 vassert(t3 && *t3 == IRTemp_INVALID);
7604
7605 *t0 = newTemp(Ity_I16);
7606 *t1 = newTemp(Ity_I16);
7607 *t2 = newTemp(Ity_I16);
7608 *t3 = newTemp(Ity_I16);
7609 assign( *t0, unop(Iop_32to16, mkexpr(lo32)) );
7610 assign( *t1, unop(Iop_32HIto16, mkexpr(lo32)) );
7611 assign( *t2, unop(Iop_32to16, mkexpr(hi32)) );
7612 assign( *t3, unop(Iop_32HIto16, mkexpr(hi32)) );
7613 }
7614
7615 /* Construct a 64-bit value from four 16-bit ints. */
7616
mk64from16s(IRTemp t3,IRTemp t2,IRTemp t1,IRTemp t0)7617 static IRExpr* mk64from16s ( IRTemp t3, IRTemp t2,
7618 IRTemp t1, IRTemp t0 )
7619 {
7620 return
7621 binop( Iop_32HLto64,
7622 binop(Iop_16HLto32, mkexpr(t3), mkexpr(t2)),
7623 binop(Iop_16HLto32, mkexpr(t1), mkexpr(t0))
7624 );
7625 }
7626
7627 /* Generate IR to set the guest %EFLAGS from the pushfl-format image
7628 in the given 32-bit temporary. The flags that are set are: O S Z A
7629 C P D ID AC.
7630
7631 In all cases, code to set AC is generated. However, VEX actually
7632 ignores the AC value and so can optionally emit an emulation
7633 warning when it is enabled. In this routine, an emulation warning
7634 is only emitted if emit_AC_emwarn is True, in which case
7635 next_insn_EIP must be correct (this allows for correct code
7636 generation for popfl/popfw). If emit_AC_emwarn is False,
7637 next_insn_EIP is unimportant (this allows for easy if kludgey code
7638 generation for IRET.) */
7639
7640 static
set_EFLAGS_from_value(IRTemp t1,Bool emit_AC_emwarn,Addr32 next_insn_EIP)7641 void set_EFLAGS_from_value ( IRTemp t1,
7642 Bool emit_AC_emwarn,
7643 Addr32 next_insn_EIP )
7644 {
7645 vassert(typeOfIRTemp(irsb->tyenv,t1) == Ity_I32);
7646
7647 /* t1 is the flag word. Mask out everything except OSZACP and set
7648 the flags thunk to X86G_CC_OP_COPY. */
7649 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
7650 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
7651 stmt( IRStmt_Put( OFFB_CC_DEP1,
7652 binop(Iop_And32,
7653 mkexpr(t1),
7654 mkU32( X86G_CC_MASK_C | X86G_CC_MASK_P
7655 | X86G_CC_MASK_A | X86G_CC_MASK_Z
7656 | X86G_CC_MASK_S| X86G_CC_MASK_O )
7657 )
7658 )
7659 );
7660 /* Set NDEP even though it isn't used. This makes redundant-PUT
7661 elimination of previous stores to this field work better. */
7662 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
7663
7664 /* Also need to set the D flag, which is held in bit 10 of t1.
7665 If zero, put 1 in OFFB_DFLAG, else -1 in OFFB_DFLAG. */
7666 stmt( IRStmt_Put(
7667 OFFB_DFLAG,
7668 IRExpr_ITE(
7669 unop(Iop_32to1,
7670 binop(Iop_And32,
7671 binop(Iop_Shr32, mkexpr(t1), mkU8(10)),
7672 mkU32(1))),
7673 mkU32(0xFFFFFFFF),
7674 mkU32(1)))
7675 );
7676
7677 /* Set the ID flag */
7678 stmt( IRStmt_Put(
7679 OFFB_IDFLAG,
7680 IRExpr_ITE(
7681 unop(Iop_32to1,
7682 binop(Iop_And32,
7683 binop(Iop_Shr32, mkexpr(t1), mkU8(21)),
7684 mkU32(1))),
7685 mkU32(1),
7686 mkU32(0)))
7687 );
7688
7689 /* And set the AC flag. If setting it 1 to, possibly emit an
7690 emulation warning. */
7691 stmt( IRStmt_Put(
7692 OFFB_ACFLAG,
7693 IRExpr_ITE(
7694 unop(Iop_32to1,
7695 binop(Iop_And32,
7696 binop(Iop_Shr32, mkexpr(t1), mkU8(18)),
7697 mkU32(1))),
7698 mkU32(1),
7699 mkU32(0)))
7700 );
7701
7702 if (emit_AC_emwarn) {
7703 put_emwarn( mkU32(EmWarn_X86_acFlag) );
7704 stmt(
7705 IRStmt_Exit(
7706 binop( Iop_CmpNE32,
7707 binop(Iop_And32, mkexpr(t1), mkU32(1<<18)),
7708 mkU32(0) ),
7709 Ijk_EmWarn,
7710 IRConst_U32( next_insn_EIP ),
7711 OFFB_EIP
7712 )
7713 );
7714 }
7715 }
7716
7717
7718 /* Helper for the SSSE3 (not SSE3) PMULHRSW insns. Given two 64-bit
7719 values (aa,bb), computes, for each of the 4 16-bit lanes:
7720
7721 (((aa_lane *s32 bb_lane) >>u 14) + 1) >>u 1
7722 */
dis_PMULHRSW_helper(IRExpr * aax,IRExpr * bbx)7723 static IRExpr* dis_PMULHRSW_helper ( IRExpr* aax, IRExpr* bbx )
7724 {
7725 IRTemp aa = newTemp(Ity_I64);
7726 IRTemp bb = newTemp(Ity_I64);
7727 IRTemp aahi32s = newTemp(Ity_I64);
7728 IRTemp aalo32s = newTemp(Ity_I64);
7729 IRTemp bbhi32s = newTemp(Ity_I64);
7730 IRTemp bblo32s = newTemp(Ity_I64);
7731 IRTemp rHi = newTemp(Ity_I64);
7732 IRTemp rLo = newTemp(Ity_I64);
7733 IRTemp one32x2 = newTemp(Ity_I64);
7734 assign(aa, aax);
7735 assign(bb, bbx);
7736 assign( aahi32s,
7737 binop(Iop_SarN32x2,
7738 binop(Iop_InterleaveHI16x4, mkexpr(aa), mkexpr(aa)),
7739 mkU8(16) ));
7740 assign( aalo32s,
7741 binop(Iop_SarN32x2,
7742 binop(Iop_InterleaveLO16x4, mkexpr(aa), mkexpr(aa)),
7743 mkU8(16) ));
7744 assign( bbhi32s,
7745 binop(Iop_SarN32x2,
7746 binop(Iop_InterleaveHI16x4, mkexpr(bb), mkexpr(bb)),
7747 mkU8(16) ));
7748 assign( bblo32s,
7749 binop(Iop_SarN32x2,
7750 binop(Iop_InterleaveLO16x4, mkexpr(bb), mkexpr(bb)),
7751 mkU8(16) ));
7752 assign(one32x2, mkU64( (1ULL << 32) + 1 ));
7753 assign(
7754 rHi,
7755 binop(
7756 Iop_ShrN32x2,
7757 binop(
7758 Iop_Add32x2,
7759 binop(
7760 Iop_ShrN32x2,
7761 binop(Iop_Mul32x2, mkexpr(aahi32s), mkexpr(bbhi32s)),
7762 mkU8(14)
7763 ),
7764 mkexpr(one32x2)
7765 ),
7766 mkU8(1)
7767 )
7768 );
7769 assign(
7770 rLo,
7771 binop(
7772 Iop_ShrN32x2,
7773 binop(
7774 Iop_Add32x2,
7775 binop(
7776 Iop_ShrN32x2,
7777 binop(Iop_Mul32x2, mkexpr(aalo32s), mkexpr(bblo32s)),
7778 mkU8(14)
7779 ),
7780 mkexpr(one32x2)
7781 ),
7782 mkU8(1)
7783 )
7784 );
7785 return
7786 binop(Iop_CatEvenLanes16x4, mkexpr(rHi), mkexpr(rLo));
7787 }
7788
7789 /* Helper for the SSSE3 (not SSE3) PSIGN{B,W,D} insns. Given two 64-bit
7790 values (aa,bb), computes, for each lane:
7791
7792 if aa_lane < 0 then - bb_lane
7793 else if aa_lane > 0 then bb_lane
7794 else 0
7795 */
dis_PSIGN_helper(IRExpr * aax,IRExpr * bbx,Int laneszB)7796 static IRExpr* dis_PSIGN_helper ( IRExpr* aax, IRExpr* bbx, Int laneszB )
7797 {
7798 IRTemp aa = newTemp(Ity_I64);
7799 IRTemp bb = newTemp(Ity_I64);
7800 IRTemp zero = newTemp(Ity_I64);
7801 IRTemp bbNeg = newTemp(Ity_I64);
7802 IRTemp negMask = newTemp(Ity_I64);
7803 IRTemp posMask = newTemp(Ity_I64);
7804 IROp opSub = Iop_INVALID;
7805 IROp opCmpGTS = Iop_INVALID;
7806
7807 switch (laneszB) {
7808 case 1: opSub = Iop_Sub8x8; opCmpGTS = Iop_CmpGT8Sx8; break;
7809 case 2: opSub = Iop_Sub16x4; opCmpGTS = Iop_CmpGT16Sx4; break;
7810 case 4: opSub = Iop_Sub32x2; opCmpGTS = Iop_CmpGT32Sx2; break;
7811 default: vassert(0);
7812 }
7813
7814 assign( aa, aax );
7815 assign( bb, bbx );
7816 assign( zero, mkU64(0) );
7817 assign( bbNeg, binop(opSub, mkexpr(zero), mkexpr(bb)) );
7818 assign( negMask, binop(opCmpGTS, mkexpr(zero), mkexpr(aa)) );
7819 assign( posMask, binop(opCmpGTS, mkexpr(aa), mkexpr(zero)) );
7820
7821 return
7822 binop(Iop_Or64,
7823 binop(Iop_And64, mkexpr(bb), mkexpr(posMask)),
7824 binop(Iop_And64, mkexpr(bbNeg), mkexpr(negMask)) );
7825
7826 }
7827
7828 /* Helper for the SSSE3 (not SSE3) PABS{B,W,D} insns. Given a 64-bit
7829 value aa, computes, for each lane
7830
7831 if aa < 0 then -aa else aa
7832
7833 Note that the result is interpreted as unsigned, so that the
7834 absolute value of the most negative signed input can be
7835 represented.
7836 */
dis_PABS_helper(IRExpr * aax,Int laneszB)7837 static IRExpr* dis_PABS_helper ( IRExpr* aax, Int laneszB )
7838 {
7839 IRTemp aa = newTemp(Ity_I64);
7840 IRTemp zero = newTemp(Ity_I64);
7841 IRTemp aaNeg = newTemp(Ity_I64);
7842 IRTemp negMask = newTemp(Ity_I64);
7843 IRTemp posMask = newTemp(Ity_I64);
7844 IROp opSub = Iop_INVALID;
7845 IROp opSarN = Iop_INVALID;
7846
7847 switch (laneszB) {
7848 case 1: opSub = Iop_Sub8x8; opSarN = Iop_SarN8x8; break;
7849 case 2: opSub = Iop_Sub16x4; opSarN = Iop_SarN16x4; break;
7850 case 4: opSub = Iop_Sub32x2; opSarN = Iop_SarN32x2; break;
7851 default: vassert(0);
7852 }
7853
7854 assign( aa, aax );
7855 assign( negMask, binop(opSarN, mkexpr(aa), mkU8(8*laneszB-1)) );
7856 assign( posMask, unop(Iop_Not64, mkexpr(negMask)) );
7857 assign( zero, mkU64(0) );
7858 assign( aaNeg, binop(opSub, mkexpr(zero), mkexpr(aa)) );
7859 return
7860 binop(Iop_Or64,
7861 binop(Iop_And64, mkexpr(aa), mkexpr(posMask)),
7862 binop(Iop_And64, mkexpr(aaNeg), mkexpr(negMask)) );
7863 }
7864
dis_PALIGNR_XMM_helper(IRTemp hi64,IRTemp lo64,Int byteShift)7865 static IRExpr* dis_PALIGNR_XMM_helper ( IRTemp hi64,
7866 IRTemp lo64, Int byteShift )
7867 {
7868 vassert(byteShift >= 1 && byteShift <= 7);
7869 return
7870 binop(Iop_Or64,
7871 binop(Iop_Shl64, mkexpr(hi64), mkU8(8*(8-byteShift))),
7872 binop(Iop_Shr64, mkexpr(lo64), mkU8(8*byteShift))
7873 );
7874 }
7875
7876 /* Generate a SIGSEGV followed by a restart of the current instruction
7877 if effective_addr is not 16-aligned. This is required behaviour
7878 for some SSE3 instructions and all 128-bit SSSE3 instructions.
7879 This assumes that guest_RIP_curr_instr is set correctly! */
gen_SEGV_if_not_16_aligned(IRTemp effective_addr)7880 static void gen_SEGV_if_not_16_aligned ( IRTemp effective_addr )
7881 {
7882 stmt(
7883 IRStmt_Exit(
7884 binop(Iop_CmpNE32,
7885 binop(Iop_And32,mkexpr(effective_addr),mkU32(0xF)),
7886 mkU32(0)),
7887 Ijk_SigSEGV,
7888 IRConst_U32(guest_EIP_curr_instr),
7889 OFFB_EIP
7890 )
7891 );
7892 }
7893
7894
7895 /* Helper for deciding whether a given insn (starting at the opcode
7896 byte) may validly be used with a LOCK prefix. The following insns
7897 may be used with LOCK when their destination operand is in memory.
7898 AFAICS this is exactly the same for both 32-bit and 64-bit mode.
7899
7900 ADD 80 /0, 81 /0, 82 /0, 83 /0, 00, 01
7901 OR 80 /1, 81 /1, 82 /x, 83 /1, 08, 09
7902 ADC 80 /2, 81 /2, 82 /2, 83 /2, 10, 11
7903 SBB 81 /3, 81 /3, 82 /x, 83 /3, 18, 19
7904 AND 80 /4, 81 /4, 82 /x, 83 /4, 20, 21
7905 SUB 80 /5, 81 /5, 82 /x, 83 /5, 28, 29
7906 XOR 80 /6, 81 /6, 82 /x, 83 /6, 30, 31
7907
7908 DEC FE /1, FF /1
7909 INC FE /0, FF /0
7910
7911 NEG F6 /3, F7 /3
7912 NOT F6 /2, F7 /2
7913
7914 XCHG 86, 87
7915
7916 BTC 0F BB, 0F BA /7
7917 BTR 0F B3, 0F BA /6
7918 BTS 0F AB, 0F BA /5
7919
7920 CMPXCHG 0F B0, 0F B1
7921 CMPXCHG8B 0F C7 /1
7922
7923 XADD 0F C0, 0F C1
7924
7925 ------------------------------
7926
7927 80 /0 = addb $imm8, rm8
7928 81 /0 = addl $imm32, rm32 and addw $imm16, rm16
7929 82 /0 = addb $imm8, rm8
7930 83 /0 = addl $simm8, rm32 and addw $simm8, rm16
7931
7932 00 = addb r8, rm8
7933 01 = addl r32, rm32 and addw r16, rm16
7934
7935 Same for ADD OR ADC SBB AND SUB XOR
7936
7937 FE /1 = dec rm8
7938 FF /1 = dec rm32 and dec rm16
7939
7940 FE /0 = inc rm8
7941 FF /0 = inc rm32 and inc rm16
7942
7943 F6 /3 = neg rm8
7944 F7 /3 = neg rm32 and neg rm16
7945
7946 F6 /2 = not rm8
7947 F7 /2 = not rm32 and not rm16
7948
7949 0F BB = btcw r16, rm16 and btcl r32, rm32
7950 OF BA /7 = btcw $imm8, rm16 and btcw $imm8, rm32
7951
7952 Same for BTS, BTR
7953 */
can_be_used_with_LOCK_prefix(const UChar * opc)7954 static Bool can_be_used_with_LOCK_prefix ( const UChar* opc )
7955 {
7956 switch (opc[0]) {
7957 case 0x00: case 0x01: case 0x08: case 0x09:
7958 case 0x10: case 0x11: case 0x18: case 0x19:
7959 case 0x20: case 0x21: case 0x28: case 0x29:
7960 case 0x30: case 0x31:
7961 if (!epartIsReg(opc[1]))
7962 return True;
7963 break;
7964
7965 case 0x80: case 0x81: case 0x82: case 0x83:
7966 if (gregOfRM(opc[1]) >= 0 && gregOfRM(opc[1]) <= 6
7967 && !epartIsReg(opc[1]))
7968 return True;
7969 break;
7970
7971 case 0xFE: case 0xFF:
7972 if (gregOfRM(opc[1]) >= 0 && gregOfRM(opc[1]) <= 1
7973 && !epartIsReg(opc[1]))
7974 return True;
7975 break;
7976
7977 case 0xF6: case 0xF7:
7978 if (gregOfRM(opc[1]) >= 2 && gregOfRM(opc[1]) <= 3
7979 && !epartIsReg(opc[1]))
7980 return True;
7981 break;
7982
7983 case 0x86: case 0x87:
7984 if (!epartIsReg(opc[1]))
7985 return True;
7986 break;
7987
7988 case 0x0F: {
7989 switch (opc[1]) {
7990 case 0xBB: case 0xB3: case 0xAB:
7991 if (!epartIsReg(opc[2]))
7992 return True;
7993 break;
7994 case 0xBA:
7995 if (gregOfRM(opc[2]) >= 5 && gregOfRM(opc[2]) <= 7
7996 && !epartIsReg(opc[2]))
7997 return True;
7998 break;
7999 case 0xB0: case 0xB1:
8000 if (!epartIsReg(opc[2]))
8001 return True;
8002 break;
8003 case 0xC7:
8004 if (gregOfRM(opc[2]) == 1 && !epartIsReg(opc[2]) )
8005 return True;
8006 break;
8007 case 0xC0: case 0xC1:
8008 if (!epartIsReg(opc[2]))
8009 return True;
8010 break;
8011 default:
8012 break;
8013 } /* switch (opc[1]) */
8014 break;
8015 }
8016
8017 default:
8018 break;
8019 } /* switch (opc[0]) */
8020
8021 return False;
8022 }
8023
math_BSWAP(IRTemp t1,IRType ty)8024 static IRTemp math_BSWAP ( IRTemp t1, IRType ty )
8025 {
8026 IRTemp t2 = newTemp(ty);
8027 if (ty == Ity_I32) {
8028 assign( t2,
8029 binop(
8030 Iop_Or32,
8031 binop(Iop_Shl32, mkexpr(t1), mkU8(24)),
8032 binop(
8033 Iop_Or32,
8034 binop(Iop_And32, binop(Iop_Shl32, mkexpr(t1), mkU8(8)),
8035 mkU32(0x00FF0000)),
8036 binop(Iop_Or32,
8037 binop(Iop_And32, binop(Iop_Shr32, mkexpr(t1), mkU8(8)),
8038 mkU32(0x0000FF00)),
8039 binop(Iop_And32, binop(Iop_Shr32, mkexpr(t1), mkU8(24)),
8040 mkU32(0x000000FF) )
8041 )))
8042 );
8043 return t2;
8044 }
8045 if (ty == Ity_I16) {
8046 assign(t2,
8047 binop(Iop_Or16,
8048 binop(Iop_Shl16, mkexpr(t1), mkU8(8)),
8049 binop(Iop_Shr16, mkexpr(t1), mkU8(8)) ));
8050 return t2;
8051 }
8052 vassert(0);
8053 /*NOTREACHED*/
8054 return IRTemp_INVALID;
8055 }
8056
8057 /*------------------------------------------------------------*/
8058 /*--- Disassemble a single instruction ---*/
8059 /*------------------------------------------------------------*/
8060
8061 /* Disassemble a single instruction into IR. The instruction is
8062 located in host memory at &guest_code[delta]. *expect_CAS is set
8063 to True if the resulting IR is expected to contain an IRCAS
8064 statement, and False if it's not expected to. This makes it
8065 possible for the caller of disInstr_X86_WRK to check that
8066 LOCK-prefixed instructions are at least plausibly translated, in
8067 that it becomes possible to check that a (validly) LOCK-prefixed
8068 instruction generates a translation containing an IRCAS, and
8069 instructions without LOCK prefixes don't generate translations
8070 containing an IRCAS.
8071 */
8072 static
disInstr_X86_WRK(Bool * expect_CAS,Bool (* resteerOkFn)(void *,Addr),Bool resteerCisOk,void * callback_opaque,Long delta64,const VexArchInfo * archinfo,const VexAbiInfo * vbi,Bool sigill_diag)8073 DisResult disInstr_X86_WRK (
8074 /*OUT*/Bool* expect_CAS,
8075 Bool (*resteerOkFn) ( /*opaque*/void*, Addr ),
8076 Bool resteerCisOk,
8077 void* callback_opaque,
8078 Long delta64,
8079 const VexArchInfo* archinfo,
8080 const VexAbiInfo* vbi,
8081 Bool sigill_diag
8082 )
8083 {
8084 IRType ty;
8085 IRTemp addr, t0, t1, t2, t3, t4, t5, t6;
8086 Int alen;
8087 UChar opc, modrm, abyte, pre;
8088 UInt d32;
8089 HChar dis_buf[50];
8090 Int am_sz, d_sz, n_prefixes;
8091 DisResult dres;
8092 const UChar* insn; /* used in SSE decoders */
8093
8094 /* The running delta */
8095 Int delta = (Int)delta64;
8096
8097 /* Holds eip at the start of the insn, so that we can print
8098 consistent error messages for unimplemented insns. */
8099 Int delta_start = delta;
8100
8101 /* sz denotes the nominal data-op size of the insn; we change it to
8102 2 if an 0x66 prefix is seen */
8103 Int sz = 4;
8104
8105 /* sorb holds the segment-override-prefix byte, if any. Zero if no
8106 prefix has been seen, else one of {0x26, 0x36, 0x3E, 0x64, 0x65}
8107 indicating the prefix. */
8108 UChar sorb = 0;
8109
8110 /* Gets set to True if a LOCK prefix is seen. */
8111 Bool pfx_lock = False;
8112
8113 /* Set result defaults. */
8114 dres.whatNext = Dis_Continue;
8115 dres.len = 0;
8116 dres.continueAt = 0;
8117 dres.hint = Dis_HintNone;
8118 dres.jk_StopHere = Ijk_INVALID;
8119
8120 *expect_CAS = False;
8121
8122 addr = t0 = t1 = t2 = t3 = t4 = t5 = t6 = IRTemp_INVALID;
8123
8124 vassert(guest_EIP_bbstart + delta == guest_EIP_curr_instr);
8125 DIP("\t0x%x: ", guest_EIP_bbstart+delta);
8126
8127 /* Spot "Special" instructions (see comment at top of file). */
8128 {
8129 const UChar* code = guest_code + delta;
8130 /* Spot the 12-byte preamble:
8131 C1C703 roll $3, %edi
8132 C1C70D roll $13, %edi
8133 C1C71D roll $29, %edi
8134 C1C713 roll $19, %edi
8135 */
8136 if (code[ 0] == 0xC1 && code[ 1] == 0xC7 && code[ 2] == 0x03 &&
8137 code[ 3] == 0xC1 && code[ 4] == 0xC7 && code[ 5] == 0x0D &&
8138 code[ 6] == 0xC1 && code[ 7] == 0xC7 && code[ 8] == 0x1D &&
8139 code[ 9] == 0xC1 && code[10] == 0xC7 && code[11] == 0x13) {
8140 /* Got a "Special" instruction preamble. Which one is it? */
8141 if (code[12] == 0x87 && code[13] == 0xDB /* xchgl %ebx,%ebx */) {
8142 /* %EDX = client_request ( %EAX ) */
8143 DIP("%%edx = client_request ( %%eax )\n");
8144 delta += 14;
8145 jmp_lit(&dres, Ijk_ClientReq, guest_EIP_bbstart+delta);
8146 vassert(dres.whatNext == Dis_StopHere);
8147 goto decode_success;
8148 }
8149 else
8150 if (code[12] == 0x87 && code[13] == 0xC9 /* xchgl %ecx,%ecx */) {
8151 /* %EAX = guest_NRADDR */
8152 DIP("%%eax = guest_NRADDR\n");
8153 delta += 14;
8154 putIReg(4, R_EAX, IRExpr_Get( OFFB_NRADDR, Ity_I32 ));
8155 goto decode_success;
8156 }
8157 else
8158 if (code[12] == 0x87 && code[13] == 0xD2 /* xchgl %edx,%edx */) {
8159 /* call-noredir *%EAX */
8160 DIP("call-noredir *%%eax\n");
8161 delta += 14;
8162 t1 = newTemp(Ity_I32);
8163 assign(t1, getIReg(4,R_EAX));
8164 t2 = newTemp(Ity_I32);
8165 assign(t2, binop(Iop_Sub32, getIReg(4,R_ESP), mkU32(4)));
8166 putIReg(4, R_ESP, mkexpr(t2));
8167 storeLE( mkexpr(t2), mkU32(guest_EIP_bbstart+delta));
8168 jmp_treg(&dres, Ijk_NoRedir, t1);
8169 vassert(dres.whatNext == Dis_StopHere);
8170 goto decode_success;
8171 }
8172 else
8173 if (code[12] == 0x87 && code[13] == 0xFF /* xchgl %edi,%edi */) {
8174 /* IR injection */
8175 DIP("IR injection\n");
8176 vex_inject_ir(irsb, Iend_LE);
8177
8178 // Invalidate the current insn. The reason is that the IRop we're
8179 // injecting here can change. In which case the translation has to
8180 // be redone. For ease of handling, we simply invalidate all the
8181 // time.
8182 stmt(IRStmt_Put(OFFB_CMSTART, mkU32(guest_EIP_curr_instr)));
8183 stmt(IRStmt_Put(OFFB_CMLEN, mkU32(14)));
8184
8185 delta += 14;
8186
8187 stmt( IRStmt_Put( OFFB_EIP, mkU32(guest_EIP_bbstart + delta) ) );
8188 dres.whatNext = Dis_StopHere;
8189 dres.jk_StopHere = Ijk_InvalICache;
8190 goto decode_success;
8191 }
8192 /* We don't know what it is. */
8193 goto decode_failure;
8194 /*NOTREACHED*/
8195 }
8196 }
8197
8198 /* Handle a couple of weird-ass NOPs that have been observed in the
8199 wild. */
8200 {
8201 const UChar* code = guest_code + delta;
8202 /* Sun's JVM 1.5.0 uses the following as a NOP:
8203 26 2E 64 65 90 %es:%cs:%fs:%gs:nop */
8204 if (code[0] == 0x26 && code[1] == 0x2E && code[2] == 0x64
8205 && code[3] == 0x65 && code[4] == 0x90) {
8206 DIP("%%es:%%cs:%%fs:%%gs:nop\n");
8207 delta += 5;
8208 goto decode_success;
8209 }
8210 /* Don't barf on recent binutils padding,
8211 all variants of which are: nopw %cs:0x0(%eax,%eax,1)
8212 66 2e 0f 1f 84 00 00 00 00 00
8213 66 66 2e 0f 1f 84 00 00 00 00 00
8214 66 66 66 2e 0f 1f 84 00 00 00 00 00
8215 66 66 66 66 2e 0f 1f 84 00 00 00 00 00
8216 66 66 66 66 66 2e 0f 1f 84 00 00 00 00 00
8217 66 66 66 66 66 66 2e 0f 1f 84 00 00 00 00 00
8218 */
8219 if (code[0] == 0x66) {
8220 Int data16_cnt;
8221 for (data16_cnt = 1; data16_cnt < 6; data16_cnt++)
8222 if (code[data16_cnt] != 0x66)
8223 break;
8224 if (code[data16_cnt] == 0x2E && code[data16_cnt + 1] == 0x0F
8225 && code[data16_cnt + 2] == 0x1F && code[data16_cnt + 3] == 0x84
8226 && code[data16_cnt + 4] == 0x00 && code[data16_cnt + 5] == 0x00
8227 && code[data16_cnt + 6] == 0x00 && code[data16_cnt + 7] == 0x00
8228 && code[data16_cnt + 8] == 0x00 ) {
8229 DIP("nopw %%cs:0x0(%%eax,%%eax,1)\n");
8230 delta += 9 + data16_cnt;
8231 goto decode_success;
8232 }
8233 }
8234
8235 // Intel CET requires the following opcodes to be treated as NOPs
8236 // with any prefix and ModRM, SIB and disp combination:
8237 // "0F 19", "0F 1C", "0F 1D", "0F 1E", "0F 1F"
8238 UInt opcode_index = 0;
8239 // Skip any prefix combination
8240 UInt addr_override = 0;
8241 UInt temp_sz = 4;
8242 Bool is_prefix = True;
8243 while (is_prefix) {
8244 switch (code[opcode_index]) {
8245 case 0x66:
8246 temp_sz = 2;
8247 opcode_index++;
8248 break;
8249 case 0x67:
8250 addr_override = 1;
8251 opcode_index++;
8252 break;
8253 case 0x26: case 0x3E: // if we set segment override here,
8254 case 0x64: case 0x65: // disAMode segfaults
8255 case 0x2E: case 0x36:
8256 case 0xF0: case 0xF2: case 0xF3:
8257 opcode_index++;
8258 break;
8259 default:
8260 is_prefix = False;
8261 }
8262 }
8263 // Check the opcode
8264 if (code[opcode_index] == 0x0F) {
8265 switch (code[opcode_index+1]) {
8266 case 0x19:
8267 case 0x1C: case 0x1D:
8268 case 0x1E: case 0x1F:
8269 delta += opcode_index+2;
8270 modrm = getUChar(delta);
8271 if (epartIsReg(modrm)) {
8272 delta += 1;
8273 DIP("nop%c\n", nameISize(temp_sz));
8274 }
8275 else {
8276 addr = disAMode(&alen, 0/*"no sorb"*/, delta, dis_buf);
8277 delta += alen - addr_override;
8278 DIP("nop%c %s\n", nameISize(temp_sz), dis_buf);
8279 }
8280 goto decode_success;
8281 default:
8282 break;
8283 }
8284 }
8285 }
8286 /* Normal instruction handling starts here. */
8287
8288 /* Deal with some but not all prefixes:
8289 66(oso)
8290 F0(lock)
8291 2E(cs:) 3E(ds:) 26(es:) 64(fs:) 65(gs:) 36(ss:)
8292 Not dealt with (left in place):
8293 F2 F3
8294 */
8295 n_prefixes = 0;
8296 while (True) {
8297 if (n_prefixes > 7) goto decode_failure;
8298 pre = getUChar(delta);
8299 switch (pre) {
8300 case 0x66:
8301 sz = 2;
8302 break;
8303 case 0xF0:
8304 pfx_lock = True;
8305 *expect_CAS = True;
8306 break;
8307 case 0x3E: /* %DS: */
8308 case 0x26: /* %ES: */
8309 case 0x64: /* %FS: */
8310 case 0x65: /* %GS: */
8311 case 0x36: /* %SS: */
8312 if (sorb != 0)
8313 goto decode_failure; /* only one seg override allowed */
8314 sorb = pre;
8315 break;
8316 case 0x2E: { /* %CS: */
8317 /* 2E prefix on a conditional branch instruction is a
8318 branch-prediction hint, which can safely be ignored. */
8319 UChar op1 = getIByte(delta+1);
8320 UChar op2 = getIByte(delta+2);
8321 if ((op1 >= 0x70 && op1 <= 0x7F)
8322 || (op1 == 0xE3)
8323 || (op1 == 0x0F && op2 >= 0x80 && op2 <= 0x8F)) {
8324 if (0) vex_printf("vex x86->IR: ignoring branch hint\n");
8325 } else {
8326 /* All other CS override cases are not handled */
8327 goto decode_failure;
8328 }
8329 break;
8330 }
8331 default:
8332 goto not_a_prefix;
8333 }
8334 n_prefixes++;
8335 delta++;
8336 }
8337
8338 not_a_prefix:
8339
8340 /* Now we should be looking at the primary opcode byte or the
8341 leading F2 or F3. Check that any LOCK prefix is actually
8342 allowed. */
8343
8344 if (pfx_lock) {
8345 if (can_be_used_with_LOCK_prefix( &guest_code[delta] )) {
8346 DIP("lock ");
8347 } else {
8348 *expect_CAS = False;
8349 goto decode_failure;
8350 }
8351 }
8352
8353
8354 /* ---------------------------------------------------- */
8355 /* --- The SSE decoder. --- */
8356 /* ---------------------------------------------------- */
8357
8358 /* What did I do to deserve SSE ? Perhaps I was really bad in a
8359 previous life? */
8360
8361 /* Note, this doesn't handle SSE2 or SSE3. That is handled in a
8362 later section, further on. */
8363
8364 insn = &guest_code[delta];
8365
8366 /* Treat fxsave specially. It should be doable even on an SSE0
8367 (Pentium-II class) CPU. Hence be prepared to handle it on
8368 any subarchitecture variant.
8369 */
8370
8371 /* 0F AE /0 = FXSAVE m512 -- write x87 and SSE state to memory */
8372 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xAE
8373 && !epartIsReg(insn[2]) && gregOfRM(insn[2]) == 0) {
8374 IRDirty* d;
8375 modrm = getIByte(delta+2);
8376 vassert(sz == 4);
8377 vassert(!epartIsReg(modrm));
8378
8379 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
8380 delta += 2+alen;
8381 gen_SEGV_if_not_16_aligned(addr);
8382
8383 DIP("fxsave %s\n", dis_buf);
8384
8385 /* Uses dirty helper:
8386 void x86g_do_FXSAVE ( VexGuestX86State*, UInt ) */
8387 d = unsafeIRDirty_0_N (
8388 0/*regparms*/,
8389 "x86g_dirtyhelper_FXSAVE",
8390 &x86g_dirtyhelper_FXSAVE,
8391 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr) )
8392 );
8393
8394 /* declare we're writing memory */
8395 d->mFx = Ifx_Write;
8396 d->mAddr = mkexpr(addr);
8397 d->mSize = 464; /* according to recent Intel docs */
8398
8399 /* declare we're reading guest state */
8400 d->nFxState = 7;
8401 vex_bzero(&d->fxState, sizeof(d->fxState));
8402
8403 d->fxState[0].fx = Ifx_Read;
8404 d->fxState[0].offset = OFFB_FTOP;
8405 d->fxState[0].size = sizeof(UInt);
8406
8407 d->fxState[1].fx = Ifx_Read;
8408 d->fxState[1].offset = OFFB_FPREGS;
8409 d->fxState[1].size = 8 * sizeof(ULong);
8410
8411 d->fxState[2].fx = Ifx_Read;
8412 d->fxState[2].offset = OFFB_FPTAGS;
8413 d->fxState[2].size = 8 * sizeof(UChar);
8414
8415 d->fxState[3].fx = Ifx_Read;
8416 d->fxState[3].offset = OFFB_FPROUND;
8417 d->fxState[3].size = sizeof(UInt);
8418
8419 d->fxState[4].fx = Ifx_Read;
8420 d->fxState[4].offset = OFFB_FC3210;
8421 d->fxState[4].size = sizeof(UInt);
8422
8423 d->fxState[5].fx = Ifx_Read;
8424 d->fxState[5].offset = OFFB_XMM0;
8425 d->fxState[5].size = 8 * sizeof(U128);
8426
8427 d->fxState[6].fx = Ifx_Read;
8428 d->fxState[6].offset = OFFB_SSEROUND;
8429 d->fxState[6].size = sizeof(UInt);
8430
8431 /* Be paranoid ... this assertion tries to ensure the 8 %xmm
8432 images are packed back-to-back. If not, the value of
8433 d->fxState[5].size is wrong. */
8434 vassert(16 == sizeof(U128));
8435 vassert(OFFB_XMM7 == (OFFB_XMM0 + 7 * 16));
8436
8437 stmt( IRStmt_Dirty(d) );
8438
8439 goto decode_success;
8440 }
8441
8442 /* 0F AE /1 = FXRSTOR m512 -- read x87 and SSE state from memory */
8443 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xAE
8444 && !epartIsReg(insn[2]) && gregOfRM(insn[2]) == 1) {
8445 IRDirty* d;
8446 modrm = getIByte(delta+2);
8447 vassert(sz == 4);
8448 vassert(!epartIsReg(modrm));
8449
8450 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
8451 delta += 2+alen;
8452 gen_SEGV_if_not_16_aligned(addr);
8453
8454 DIP("fxrstor %s\n", dis_buf);
8455
8456 /* Uses dirty helper:
8457 VexEmNote x86g_do_FXRSTOR ( VexGuestX86State*, UInt )
8458 NOTE:
8459 the VexEmNote value is simply ignored (unlike for FRSTOR)
8460 */
8461 d = unsafeIRDirty_0_N (
8462 0/*regparms*/,
8463 "x86g_dirtyhelper_FXRSTOR",
8464 &x86g_dirtyhelper_FXRSTOR,
8465 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr) )
8466 );
8467
8468 /* declare we're reading memory */
8469 d->mFx = Ifx_Read;
8470 d->mAddr = mkexpr(addr);
8471 d->mSize = 464; /* according to recent Intel docs */
8472
8473 /* declare we're writing guest state */
8474 d->nFxState = 7;
8475 vex_bzero(&d->fxState, sizeof(d->fxState));
8476
8477 d->fxState[0].fx = Ifx_Write;
8478 d->fxState[0].offset = OFFB_FTOP;
8479 d->fxState[0].size = sizeof(UInt);
8480
8481 d->fxState[1].fx = Ifx_Write;
8482 d->fxState[1].offset = OFFB_FPREGS;
8483 d->fxState[1].size = 8 * sizeof(ULong);
8484
8485 d->fxState[2].fx = Ifx_Write;
8486 d->fxState[2].offset = OFFB_FPTAGS;
8487 d->fxState[2].size = 8 * sizeof(UChar);
8488
8489 d->fxState[3].fx = Ifx_Write;
8490 d->fxState[3].offset = OFFB_FPROUND;
8491 d->fxState[3].size = sizeof(UInt);
8492
8493 d->fxState[4].fx = Ifx_Write;
8494 d->fxState[4].offset = OFFB_FC3210;
8495 d->fxState[4].size = sizeof(UInt);
8496
8497 d->fxState[5].fx = Ifx_Write;
8498 d->fxState[5].offset = OFFB_XMM0;
8499 d->fxState[5].size = 8 * sizeof(U128);
8500
8501 d->fxState[6].fx = Ifx_Write;
8502 d->fxState[6].offset = OFFB_SSEROUND;
8503 d->fxState[6].size = sizeof(UInt);
8504
8505 /* Be paranoid ... this assertion tries to ensure the 8 %xmm
8506 images are packed back-to-back. If not, the value of
8507 d->fxState[5].size is wrong. */
8508 vassert(16 == sizeof(U128));
8509 vassert(OFFB_XMM7 == (OFFB_XMM0 + 7 * 16));
8510
8511 stmt( IRStmt_Dirty(d) );
8512
8513 goto decode_success;
8514 }
8515
8516 /* ------ SSE decoder main ------ */
8517
8518 /* Skip parts of the decoder which don't apply given the stated
8519 guest subarchitecture. */
8520 if (archinfo->hwcaps == 0/*baseline, no sse at all*/)
8521 goto after_sse_decoders;
8522
8523 /* With mmxext only some extended MMX instructions are recognized.
8524 The mmxext instructions are MASKMOVQ MOVNTQ PAVGB PAVGW PMAXSW
8525 PMAXUB PMINSW PMINUB PMULHUW PSADBW PSHUFW PEXTRW PINSRW PMOVMSKB
8526 PREFETCHNTA PREFETCHT0 PREFETCHT1 PREFETCHT2 SFENCE
8527
8528 http://support.amd.com/us/Embedded_TechDocs/22466.pdf
8529 https://en.wikipedia.org/wiki/3DNow!#3DNow.21_extensions */
8530
8531 if (archinfo->hwcaps == VEX_HWCAPS_X86_MMXEXT/*integer only sse1 subset*/)
8532 goto mmxext;
8533
8534 /* Otherwise we must be doing sse1 or sse2, so we can at least try
8535 for SSE1 here. */
8536
8537 /* 0F 58 = ADDPS -- add 32Fx4 from R/M to R */
8538 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x58) {
8539 delta = dis_SSE_E_to_G_all( sorb, delta+2, "addps", Iop_Add32Fx4 );
8540 goto decode_success;
8541 }
8542
8543 /* F3 0F 58 = ADDSS -- add 32F0x4 from R/M to R */
8544 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x58) {
8545 vassert(sz == 4);
8546 delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "addss", Iop_Add32F0x4 );
8547 goto decode_success;
8548 }
8549
8550 /* 0F 55 = ANDNPS -- G = (not G) and E */
8551 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x55) {
8552 delta = dis_SSE_E_to_G_all_invG( sorb, delta+2, "andnps", Iop_AndV128 );
8553 goto decode_success;
8554 }
8555
8556 /* 0F 54 = ANDPS -- G = G and E */
8557 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x54) {
8558 delta = dis_SSE_E_to_G_all( sorb, delta+2, "andps", Iop_AndV128 );
8559 goto decode_success;
8560 }
8561
8562 /* 0F C2 = CMPPS -- 32Fx4 comparison from R/M to R */
8563 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xC2) {
8564 delta = dis_SSEcmp_E_to_G( sorb, delta+2, "cmpps", True, 4 );
8565 goto decode_success;
8566 }
8567
8568 /* F3 0F C2 = CMPSS -- 32F0x4 comparison from R/M to R */
8569 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0xC2) {
8570 vassert(sz == 4);
8571 delta = dis_SSEcmp_E_to_G( sorb, delta+3, "cmpss", False, 4 );
8572 goto decode_success;
8573 }
8574
8575 /* 0F 2F = COMISS -- 32F0x4 comparison G,E, and set ZCP */
8576 /* 0F 2E = UCOMISS -- 32F0x4 comparison G,E, and set ZCP */
8577 if (sz == 4 && insn[0] == 0x0F && (insn[1] == 0x2F || insn[1] == 0x2E)) {
8578 IRTemp argL = newTemp(Ity_F32);
8579 IRTemp argR = newTemp(Ity_F32);
8580 modrm = getIByte(delta+2);
8581 if (epartIsReg(modrm)) {
8582 assign( argR, getXMMRegLane32F( eregOfRM(modrm), 0/*lowest lane*/ ) );
8583 delta += 2+1;
8584 DIP("[u]comiss %s,%s\n", nameXMMReg(eregOfRM(modrm)),
8585 nameXMMReg(gregOfRM(modrm)) );
8586 } else {
8587 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
8588 assign( argR, loadLE(Ity_F32, mkexpr(addr)) );
8589 delta += 2+alen;
8590 DIP("[u]comiss %s,%s\n", dis_buf,
8591 nameXMMReg(gregOfRM(modrm)) );
8592 }
8593 assign( argL, getXMMRegLane32F( gregOfRM(modrm), 0/*lowest lane*/ ) );
8594
8595 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
8596 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
8597 stmt( IRStmt_Put(
8598 OFFB_CC_DEP1,
8599 binop( Iop_And32,
8600 binop(Iop_CmpF64,
8601 unop(Iop_F32toF64,mkexpr(argL)),
8602 unop(Iop_F32toF64,mkexpr(argR))),
8603 mkU32(0x45)
8604 )));
8605 /* Set NDEP even though it isn't used. This makes redundant-PUT
8606 elimination of previous stores to this field work better. */
8607 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
8608 goto decode_success;
8609 }
8610
8611 /* 0F 2A = CVTPI2PS -- convert 2 x I32 in mem/mmx to 2 x F32 in low
8612 half xmm */
8613 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x2A) {
8614 IRTemp arg64 = newTemp(Ity_I64);
8615 IRTemp rmode = newTemp(Ity_I32);
8616 vassert(sz == 4);
8617
8618 modrm = getIByte(delta+2);
8619 if (epartIsReg(modrm)) {
8620 /* Only switch to MMX mode if the source is a MMX register.
8621 See comments on CVTPI2PD for details. Fixes #357059. */
8622 do_MMX_preamble();
8623 assign( arg64, getMMXReg(eregOfRM(modrm)) );
8624 delta += 2+1;
8625 DIP("cvtpi2ps %s,%s\n", nameMMXReg(eregOfRM(modrm)),
8626 nameXMMReg(gregOfRM(modrm)));
8627 } else {
8628 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
8629 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
8630 delta += 2+alen;
8631 DIP("cvtpi2ps %s,%s\n", dis_buf,
8632 nameXMMReg(gregOfRM(modrm)) );
8633 }
8634
8635 assign( rmode, get_sse_roundingmode() );
8636
8637 putXMMRegLane32F(
8638 gregOfRM(modrm), 0,
8639 binop(Iop_F64toF32,
8640 mkexpr(rmode),
8641 unop(Iop_I32StoF64,
8642 unop(Iop_64to32, mkexpr(arg64)) )) );
8643
8644 putXMMRegLane32F(
8645 gregOfRM(modrm), 1,
8646 binop(Iop_F64toF32,
8647 mkexpr(rmode),
8648 unop(Iop_I32StoF64,
8649 unop(Iop_64HIto32, mkexpr(arg64)) )) );
8650
8651 goto decode_success;
8652 }
8653
8654 /* F3 0F 2A = CVTSI2SS -- convert I32 in mem/ireg to F32 in low
8655 quarter xmm */
8656 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x2A) {
8657 IRTemp arg32 = newTemp(Ity_I32);
8658 IRTemp rmode = newTemp(Ity_I32);
8659 vassert(sz == 4);
8660
8661 modrm = getIByte(delta+3);
8662 if (epartIsReg(modrm)) {
8663 assign( arg32, getIReg(4, eregOfRM(modrm)) );
8664 delta += 3+1;
8665 DIP("cvtsi2ss %s,%s\n", nameIReg(4, eregOfRM(modrm)),
8666 nameXMMReg(gregOfRM(modrm)));
8667 } else {
8668 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
8669 assign( arg32, loadLE(Ity_I32, mkexpr(addr)) );
8670 delta += 3+alen;
8671 DIP("cvtsi2ss %s,%s\n", dis_buf,
8672 nameXMMReg(gregOfRM(modrm)) );
8673 }
8674
8675 assign( rmode, get_sse_roundingmode() );
8676
8677 putXMMRegLane32F(
8678 gregOfRM(modrm), 0,
8679 binop(Iop_F64toF32,
8680 mkexpr(rmode),
8681 unop(Iop_I32StoF64, mkexpr(arg32)) ) );
8682
8683 goto decode_success;
8684 }
8685
8686 /* 0F 2D = CVTPS2PI -- convert 2 x F32 in mem/low half xmm to 2 x
8687 I32 in mmx, according to prevailing SSE rounding mode */
8688 /* 0F 2C = CVTTPS2PI -- convert 2 x F32 in mem/low half xmm to 2 x
8689 I32 in mmx, rounding towards zero */
8690 if (sz == 4 && insn[0] == 0x0F && (insn[1] == 0x2D || insn[1] == 0x2C)) {
8691 IRTemp dst64 = newTemp(Ity_I64);
8692 IRTemp rmode = newTemp(Ity_I32);
8693 IRTemp f32lo = newTemp(Ity_F32);
8694 IRTemp f32hi = newTemp(Ity_F32);
8695 Bool r2zero = toBool(insn[1] == 0x2C);
8696
8697 do_MMX_preamble();
8698 modrm = getIByte(delta+2);
8699
8700 if (epartIsReg(modrm)) {
8701 delta += 2+1;
8702 assign(f32lo, getXMMRegLane32F(eregOfRM(modrm), 0));
8703 assign(f32hi, getXMMRegLane32F(eregOfRM(modrm), 1));
8704 DIP("cvt%sps2pi %s,%s\n", r2zero ? "t" : "",
8705 nameXMMReg(eregOfRM(modrm)),
8706 nameMMXReg(gregOfRM(modrm)));
8707 } else {
8708 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
8709 assign(f32lo, loadLE(Ity_F32, mkexpr(addr)));
8710 assign(f32hi, loadLE(Ity_F32, binop( Iop_Add32,
8711 mkexpr(addr),
8712 mkU32(4) )));
8713 delta += 2+alen;
8714 DIP("cvt%sps2pi %s,%s\n", r2zero ? "t" : "",
8715 dis_buf,
8716 nameMMXReg(gregOfRM(modrm)));
8717 }
8718
8719 if (r2zero) {
8720 assign(rmode, mkU32((UInt)Irrm_ZERO) );
8721 } else {
8722 assign( rmode, get_sse_roundingmode() );
8723 }
8724
8725 assign(
8726 dst64,
8727 binop( Iop_32HLto64,
8728 binop( Iop_F64toI32S,
8729 mkexpr(rmode),
8730 unop( Iop_F32toF64, mkexpr(f32hi) ) ),
8731 binop( Iop_F64toI32S,
8732 mkexpr(rmode),
8733 unop( Iop_F32toF64, mkexpr(f32lo) ) )
8734 )
8735 );
8736
8737 putMMXReg(gregOfRM(modrm), mkexpr(dst64));
8738 goto decode_success;
8739 }
8740
8741 /* F3 0F 2D = CVTSS2SI -- convert F32 in mem/low quarter xmm to
8742 I32 in ireg, according to prevailing SSE rounding mode */
8743 /* F3 0F 2C = CVTTSS2SI -- convert F32 in mem/low quarter xmm to
8744 I32 in ireg, rounding towards zero */
8745 if (insn[0] == 0xF3 && insn[1] == 0x0F
8746 && (insn[2] == 0x2D || insn[2] == 0x2C)) {
8747 IRTemp rmode = newTemp(Ity_I32);
8748 IRTemp f32lo = newTemp(Ity_F32);
8749 Bool r2zero = toBool(insn[2] == 0x2C);
8750 vassert(sz == 4);
8751
8752 modrm = getIByte(delta+3);
8753 if (epartIsReg(modrm)) {
8754 delta += 3+1;
8755 assign(f32lo, getXMMRegLane32F(eregOfRM(modrm), 0));
8756 DIP("cvt%sss2si %s,%s\n", r2zero ? "t" : "",
8757 nameXMMReg(eregOfRM(modrm)),
8758 nameIReg(4, gregOfRM(modrm)));
8759 } else {
8760 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
8761 assign(f32lo, loadLE(Ity_F32, mkexpr(addr)));
8762 delta += 3+alen;
8763 DIP("cvt%sss2si %s,%s\n", r2zero ? "t" : "",
8764 dis_buf,
8765 nameIReg(4, gregOfRM(modrm)));
8766 }
8767
8768 if (r2zero) {
8769 assign( rmode, mkU32((UInt)Irrm_ZERO) );
8770 } else {
8771 assign( rmode, get_sse_roundingmode() );
8772 }
8773
8774 putIReg(4, gregOfRM(modrm),
8775 binop( Iop_F64toI32S,
8776 mkexpr(rmode),
8777 unop( Iop_F32toF64, mkexpr(f32lo) ) )
8778 );
8779
8780 goto decode_success;
8781 }
8782
8783 /* 0F 5E = DIVPS -- div 32Fx4 from R/M to R */
8784 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5E) {
8785 delta = dis_SSE_E_to_G_all( sorb, delta+2, "divps", Iop_Div32Fx4 );
8786 goto decode_success;
8787 }
8788
8789 /* F3 0F 5E = DIVSS -- div 32F0x4 from R/M to R */
8790 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5E) {
8791 vassert(sz == 4);
8792 delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "divss", Iop_Div32F0x4 );
8793 goto decode_success;
8794 }
8795
8796 /* 0F AE /2 = LDMXCSR m32 -- load %mxcsr */
8797 if (insn[0] == 0x0F && insn[1] == 0xAE
8798 && !epartIsReg(insn[2]) && gregOfRM(insn[2]) == 2) {
8799
8800 IRTemp t64 = newTemp(Ity_I64);
8801 IRTemp ew = newTemp(Ity_I32);
8802
8803 modrm = getIByte(delta+2);
8804 vassert(!epartIsReg(modrm));
8805 vassert(sz == 4);
8806
8807 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
8808 delta += 2+alen;
8809 DIP("ldmxcsr %s\n", dis_buf);
8810
8811 /* The only thing we observe in %mxcsr is the rounding mode.
8812 Therefore, pass the 32-bit value (SSE native-format control
8813 word) to a clean helper, getting back a 64-bit value, the
8814 lower half of which is the SSEROUND value to store, and the
8815 upper half of which is the emulation-warning token which may
8816 be generated.
8817 */
8818 /* ULong x86h_check_ldmxcsr ( UInt ); */
8819 assign( t64, mkIRExprCCall(
8820 Ity_I64, 0/*regparms*/,
8821 "x86g_check_ldmxcsr",
8822 &x86g_check_ldmxcsr,
8823 mkIRExprVec_1( loadLE(Ity_I32, mkexpr(addr)) )
8824 )
8825 );
8826
8827 put_sse_roundingmode( unop(Iop_64to32, mkexpr(t64)) );
8828 assign( ew, unop(Iop_64HIto32, mkexpr(t64) ) );
8829 put_emwarn( mkexpr(ew) );
8830 /* Finally, if an emulation warning was reported, side-exit to
8831 the next insn, reporting the warning, so that Valgrind's
8832 dispatcher sees the warning. */
8833 stmt(
8834 IRStmt_Exit(
8835 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)),
8836 Ijk_EmWarn,
8837 IRConst_U32( ((Addr32)guest_EIP_bbstart)+delta),
8838 OFFB_EIP
8839 )
8840 );
8841 goto decode_success;
8842 }
8843
8844
8845 /* mmxext sse1 subset starts here. mmxext only arches will parse
8846 only this subset of the sse1 instructions. */
8847 mmxext:
8848
8849 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
8850 /* 0F F7 = MASKMOVQ -- 8x8 masked store */
8851 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xF7) {
8852 Bool ok = False;
8853 delta = dis_MMX( &ok, sorb, sz, delta+1 );
8854 if (!ok)
8855 goto decode_failure;
8856 goto decode_success;
8857 }
8858
8859 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
8860 /* 0F E7 = MOVNTQ -- for us, just a plain MMX store. Note, the
8861 Intel manual does not say anything about the usual business of
8862 the FP reg tags getting trashed whenever an MMX insn happens.
8863 So we just leave them alone.
8864 */
8865 if (insn[0] == 0x0F && insn[1] == 0xE7) {
8866 modrm = getIByte(delta+2);
8867 if (sz == 4 && !epartIsReg(modrm)) {
8868 /* do_MMX_preamble(); Intel docs don't specify this */
8869 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
8870 storeLE( mkexpr(addr), getMMXReg(gregOfRM(modrm)) );
8871 DIP("movntq %s,%s\n", dis_buf,
8872 nameMMXReg(gregOfRM(modrm)));
8873 delta += 2+alen;
8874 goto decode_success;
8875 }
8876 /* else fall through */
8877 }
8878
8879 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
8880 /* 0F E0 = PAVGB -- 8x8 unsigned Packed Average, with rounding */
8881 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xE0) {
8882 do_MMX_preamble();
8883 delta = dis_MMXop_regmem_to_reg (
8884 sorb, delta+2, insn[1], "pavgb", False );
8885 goto decode_success;
8886 }
8887
8888 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
8889 /* 0F E3 = PAVGW -- 16x4 unsigned Packed Average, with rounding */
8890 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xE3) {
8891 do_MMX_preamble();
8892 delta = dis_MMXop_regmem_to_reg (
8893 sorb, delta+2, insn[1], "pavgw", False );
8894 goto decode_success;
8895 }
8896
8897 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
8898 /* 0F C5 = PEXTRW -- extract 16-bit field from mmx(E) and put
8899 zero-extend of it in ireg(G). */
8900 if (insn[0] == 0x0F && insn[1] == 0xC5) {
8901 modrm = insn[2];
8902 if (sz == 4 && epartIsReg(modrm)) {
8903 IRTemp sV = newTemp(Ity_I64);
8904 t5 = newTemp(Ity_I16);
8905 do_MMX_preamble();
8906 assign(sV, getMMXReg(eregOfRM(modrm)));
8907 breakup64to16s( sV, &t3, &t2, &t1, &t0 );
8908 switch (insn[3] & 3) {
8909 case 0: assign(t5, mkexpr(t0)); break;
8910 case 1: assign(t5, mkexpr(t1)); break;
8911 case 2: assign(t5, mkexpr(t2)); break;
8912 case 3: assign(t5, mkexpr(t3)); break;
8913 default: vassert(0); /*NOTREACHED*/
8914 }
8915 putIReg(4, gregOfRM(modrm), unop(Iop_16Uto32, mkexpr(t5)));
8916 DIP("pextrw $%d,%s,%s\n",
8917 (Int)insn[3], nameMMXReg(eregOfRM(modrm)),
8918 nameIReg(4,gregOfRM(modrm)));
8919 delta += 4;
8920 goto decode_success;
8921 }
8922 /* else fall through */
8923 }
8924
8925 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
8926 /* 0F C4 = PINSRW -- get 16 bits from E(mem or low half ireg) and
8927 put it into the specified lane of mmx(G). */
8928 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xC4) {
8929 /* Use t0 .. t3 to hold the 4 original 16-bit lanes of the
8930 mmx reg. t4 is the new lane value. t5 is the original
8931 mmx value. t6 is the new mmx value. */
8932 Int lane;
8933 t4 = newTemp(Ity_I16);
8934 t5 = newTemp(Ity_I64);
8935 t6 = newTemp(Ity_I64);
8936 modrm = insn[2];
8937 do_MMX_preamble();
8938
8939 assign(t5, getMMXReg(gregOfRM(modrm)));
8940 breakup64to16s( t5, &t3, &t2, &t1, &t0 );
8941
8942 if (epartIsReg(modrm)) {
8943 assign(t4, getIReg(2, eregOfRM(modrm)));
8944 delta += 3+1;
8945 lane = insn[3+1-1];
8946 DIP("pinsrw $%d,%s,%s\n", lane,
8947 nameIReg(2,eregOfRM(modrm)),
8948 nameMMXReg(gregOfRM(modrm)));
8949 } else {
8950 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
8951 delta += 3+alen;
8952 lane = insn[3+alen-1];
8953 assign(t4, loadLE(Ity_I16, mkexpr(addr)));
8954 DIP("pinsrw $%d,%s,%s\n", lane,
8955 dis_buf,
8956 nameMMXReg(gregOfRM(modrm)));
8957 }
8958
8959 switch (lane & 3) {
8960 case 0: assign(t6, mk64from16s(t3,t2,t1,t4)); break;
8961 case 1: assign(t6, mk64from16s(t3,t2,t4,t0)); break;
8962 case 2: assign(t6, mk64from16s(t3,t4,t1,t0)); break;
8963 case 3: assign(t6, mk64from16s(t4,t2,t1,t0)); break;
8964 default: vassert(0); /*NOTREACHED*/
8965 }
8966 putMMXReg(gregOfRM(modrm), mkexpr(t6));
8967 goto decode_success;
8968 }
8969
8970 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
8971 /* 0F EE = PMAXSW -- 16x4 signed max */
8972 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xEE) {
8973 do_MMX_preamble();
8974 delta = dis_MMXop_regmem_to_reg (
8975 sorb, delta+2, insn[1], "pmaxsw", False );
8976 goto decode_success;
8977 }
8978
8979 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
8980 /* 0F DE = PMAXUB -- 8x8 unsigned max */
8981 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xDE) {
8982 do_MMX_preamble();
8983 delta = dis_MMXop_regmem_to_reg (
8984 sorb, delta+2, insn[1], "pmaxub", False );
8985 goto decode_success;
8986 }
8987
8988 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
8989 /* 0F EA = PMINSW -- 16x4 signed min */
8990 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xEA) {
8991 do_MMX_preamble();
8992 delta = dis_MMXop_regmem_to_reg (
8993 sorb, delta+2, insn[1], "pminsw", False );
8994 goto decode_success;
8995 }
8996
8997 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
8998 /* 0F DA = PMINUB -- 8x8 unsigned min */
8999 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xDA) {
9000 do_MMX_preamble();
9001 delta = dis_MMXop_regmem_to_reg (
9002 sorb, delta+2, insn[1], "pminub", False );
9003 goto decode_success;
9004 }
9005
9006 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
9007 /* 0F D7 = PMOVMSKB -- extract sign bits from each of 8 lanes in
9008 mmx(E), turn them into a byte, and put zero-extend of it in
9009 ireg(G). */
9010 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xD7) {
9011 modrm = insn[2];
9012 if (epartIsReg(modrm)) {
9013 do_MMX_preamble();
9014 t0 = newTemp(Ity_I64);
9015 t1 = newTemp(Ity_I32);
9016 assign(t0, getMMXReg(eregOfRM(modrm)));
9017 assign(t1, unop(Iop_8Uto32, unop(Iop_GetMSBs8x8, mkexpr(t0))));
9018 putIReg(4, gregOfRM(modrm), mkexpr(t1));
9019 DIP("pmovmskb %s,%s\n", nameMMXReg(eregOfRM(modrm)),
9020 nameIReg(4,gregOfRM(modrm)));
9021 delta += 3;
9022 goto decode_success;
9023 }
9024 /* else fall through */
9025 }
9026
9027 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
9028 /* 0F E4 = PMULUH -- 16x4 hi-half of unsigned widening multiply */
9029 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xE4) {
9030 do_MMX_preamble();
9031 delta = dis_MMXop_regmem_to_reg (
9032 sorb, delta+2, insn[1], "pmuluh", False );
9033 goto decode_success;
9034 }
9035
9036 /* 0F 18 /0 = PREFETCHNTA -- prefetch into caches, */
9037 /* 0F 18 /1 = PREFETCH0 -- with various different hints */
9038 /* 0F 18 /2 = PREFETCH1 */
9039 /* 0F 18 /3 = PREFETCH2 */
9040 if (insn[0] == 0x0F && insn[1] == 0x18
9041 && !epartIsReg(insn[2])
9042 && gregOfRM(insn[2]) >= 0 && gregOfRM(insn[2]) <= 3) {
9043 const HChar* hintstr = "??";
9044
9045 modrm = getIByte(delta+2);
9046 vassert(!epartIsReg(modrm));
9047
9048 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9049 delta += 2+alen;
9050
9051 switch (gregOfRM(modrm)) {
9052 case 0: hintstr = "nta"; break;
9053 case 1: hintstr = "t0"; break;
9054 case 2: hintstr = "t1"; break;
9055 case 3: hintstr = "t2"; break;
9056 default: vassert(0); /*NOTREACHED*/
9057 }
9058
9059 DIP("prefetch%s %s\n", hintstr, dis_buf);
9060 goto decode_success;
9061 }
9062
9063 /* 0F 0D /0 = PREFETCH m8 -- 3DNow! prefetch */
9064 /* 0F 0D /1 = PREFETCHW m8 -- ditto, with some other hint */
9065 if (insn[0] == 0x0F && insn[1] == 0x0D
9066 && !epartIsReg(insn[2])
9067 && gregOfRM(insn[2]) >= 0 && gregOfRM(insn[2]) <= 1) {
9068 const HChar* hintstr = "??";
9069
9070 modrm = getIByte(delta+2);
9071 vassert(!epartIsReg(modrm));
9072
9073 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9074 delta += 2+alen;
9075
9076 switch (gregOfRM(modrm)) {
9077 case 0: hintstr = ""; break;
9078 case 1: hintstr = "w"; break;
9079 default: vassert(0); /*NOTREACHED*/
9080 }
9081
9082 DIP("prefetch%s %s\n", hintstr, dis_buf);
9083 goto decode_success;
9084 }
9085
9086 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
9087 /* 0F F6 = PSADBW -- sum of 8Ux8 absolute differences */
9088 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xF6) {
9089 do_MMX_preamble();
9090 delta = dis_MMXop_regmem_to_reg (
9091 sorb, delta+2, insn[1], "psadbw", False );
9092 goto decode_success;
9093 }
9094
9095 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
9096 /* 0F 70 = PSHUFW -- rearrange 4x16 from E(mmx or mem) to G(mmx) */
9097 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x70) {
9098 Int order;
9099 IRTemp sV, dV, s3, s2, s1, s0;
9100 s3 = s2 = s1 = s0 = IRTemp_INVALID;
9101 sV = newTemp(Ity_I64);
9102 dV = newTemp(Ity_I64);
9103 do_MMX_preamble();
9104 modrm = insn[2];
9105 if (epartIsReg(modrm)) {
9106 assign( sV, getMMXReg(eregOfRM(modrm)) );
9107 order = (Int)insn[3];
9108 delta += 2+2;
9109 DIP("pshufw $%d,%s,%s\n", order,
9110 nameMMXReg(eregOfRM(modrm)),
9111 nameMMXReg(gregOfRM(modrm)));
9112 } else {
9113 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9114 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
9115 order = (Int)insn[2+alen];
9116 delta += 3+alen;
9117 DIP("pshufw $%d,%s,%s\n", order,
9118 dis_buf,
9119 nameMMXReg(gregOfRM(modrm)));
9120 }
9121 breakup64to16s( sV, &s3, &s2, &s1, &s0 );
9122
9123 # define SEL(n) \
9124 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
9125 assign(dV,
9126 mk64from16s( SEL((order>>6)&3), SEL((order>>4)&3),
9127 SEL((order>>2)&3), SEL((order>>0)&3) )
9128 );
9129 putMMXReg(gregOfRM(modrm), mkexpr(dV));
9130 # undef SEL
9131 goto decode_success;
9132 }
9133
9134 /* 0F AE /7 = SFENCE -- flush pending operations to memory */
9135 if (insn[0] == 0x0F && insn[1] == 0xAE
9136 && epartIsReg(insn[2]) && gregOfRM(insn[2]) == 7) {
9137 vassert(sz == 4);
9138 delta += 3;
9139 /* Insert a memory fence. It's sometimes important that these
9140 are carried through to the generated code. */
9141 stmt( IRStmt_MBE(Imbe_Fence) );
9142 DIP("sfence\n");
9143 goto decode_success;
9144 }
9145
9146 /* End of mmxext sse1 subset. No more sse parsing for mmxext only arches. */
9147 if (archinfo->hwcaps == VEX_HWCAPS_X86_MMXEXT/*integer only sse1 subset*/)
9148 goto after_sse_decoders;
9149
9150
9151 /* 0F 5F = MAXPS -- max 32Fx4 from R/M to R */
9152 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5F) {
9153 delta = dis_SSE_E_to_G_all( sorb, delta+2, "maxps", Iop_Max32Fx4 );
9154 goto decode_success;
9155 }
9156
9157 /* F3 0F 5F = MAXSS -- max 32F0x4 from R/M to R */
9158 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5F) {
9159 vassert(sz == 4);
9160 delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "maxss", Iop_Max32F0x4 );
9161 goto decode_success;
9162 }
9163
9164 /* 0F 5D = MINPS -- min 32Fx4 from R/M to R */
9165 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5D) {
9166 delta = dis_SSE_E_to_G_all( sorb, delta+2, "minps", Iop_Min32Fx4 );
9167 goto decode_success;
9168 }
9169
9170 /* F3 0F 5D = MINSS -- min 32F0x4 from R/M to R */
9171 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5D) {
9172 vassert(sz == 4);
9173 delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "minss", Iop_Min32F0x4 );
9174 goto decode_success;
9175 }
9176
9177 /* 0F 28 = MOVAPS -- move from E (mem or xmm) to G (xmm). */
9178 /* 0F 10 = MOVUPS -- move from E (mem or xmm) to G (xmm). */
9179 if (sz == 4 && insn[0] == 0x0F && (insn[1] == 0x28 || insn[1] == 0x10)) {
9180 modrm = getIByte(delta+2);
9181 if (epartIsReg(modrm)) {
9182 putXMMReg( gregOfRM(modrm),
9183 getXMMReg( eregOfRM(modrm) ));
9184 DIP("mov[ua]ps %s,%s\n", nameXMMReg(eregOfRM(modrm)),
9185 nameXMMReg(gregOfRM(modrm)));
9186 delta += 2+1;
9187 } else {
9188 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9189 if (insn[1] == 0x28/*movaps*/)
9190 gen_SEGV_if_not_16_aligned( addr );
9191 putXMMReg( gregOfRM(modrm),
9192 loadLE(Ity_V128, mkexpr(addr)) );
9193 DIP("mov[ua]ps %s,%s\n", dis_buf,
9194 nameXMMReg(gregOfRM(modrm)));
9195 delta += 2+alen;
9196 }
9197 goto decode_success;
9198 }
9199
9200 /* 0F 29 = MOVAPS -- move from G (xmm) to E (mem or xmm). */
9201 /* 0F 11 = MOVUPS -- move from G (xmm) to E (mem or xmm). */
9202 if (sz == 4 && insn[0] == 0x0F
9203 && (insn[1] == 0x29 || insn[1] == 0x11)) {
9204 modrm = getIByte(delta+2);
9205 if (epartIsReg(modrm)) {
9206 /* fall through; awaiting test case */
9207 } else {
9208 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9209 if (insn[1] == 0x29/*movaps*/)
9210 gen_SEGV_if_not_16_aligned( addr );
9211 storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) );
9212 DIP("mov[ua]ps %s,%s\n", nameXMMReg(gregOfRM(modrm)),
9213 dis_buf );
9214 delta += 2+alen;
9215 goto decode_success;
9216 }
9217 }
9218
9219 /* 0F 16 = MOVHPS -- move from mem to high half of XMM. */
9220 /* 0F 16 = MOVLHPS -- move from lo half to hi half of XMM. */
9221 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x16) {
9222 modrm = getIByte(delta+2);
9223 if (epartIsReg(modrm)) {
9224 delta += 2+1;
9225 putXMMRegLane64( gregOfRM(modrm), 1/*upper lane*/,
9226 getXMMRegLane64( eregOfRM(modrm), 0 ) );
9227 DIP("movhps %s,%s\n", nameXMMReg(eregOfRM(modrm)),
9228 nameXMMReg(gregOfRM(modrm)));
9229 } else {
9230 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9231 delta += 2+alen;
9232 putXMMRegLane64( gregOfRM(modrm), 1/*upper lane*/,
9233 loadLE(Ity_I64, mkexpr(addr)) );
9234 DIP("movhps %s,%s\n", dis_buf,
9235 nameXMMReg( gregOfRM(modrm) ));
9236 }
9237 goto decode_success;
9238 }
9239
9240 /* 0F 17 = MOVHPS -- move from high half of XMM to mem. */
9241 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x17) {
9242 if (!epartIsReg(insn[2])) {
9243 delta += 2;
9244 addr = disAMode ( &alen, sorb, delta, dis_buf );
9245 delta += alen;
9246 storeLE( mkexpr(addr),
9247 getXMMRegLane64( gregOfRM(insn[2]),
9248 1/*upper lane*/ ) );
9249 DIP("movhps %s,%s\n", nameXMMReg( gregOfRM(insn[2]) ),
9250 dis_buf);
9251 goto decode_success;
9252 }
9253 /* else fall through */
9254 }
9255
9256 /* 0F 12 = MOVLPS -- move from mem to low half of XMM. */
9257 /* OF 12 = MOVHLPS -- from from hi half to lo half of XMM. */
9258 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x12) {
9259 modrm = getIByte(delta+2);
9260 if (epartIsReg(modrm)) {
9261 delta += 2+1;
9262 putXMMRegLane64( gregOfRM(modrm),
9263 0/*lower lane*/,
9264 getXMMRegLane64( eregOfRM(modrm), 1 ));
9265 DIP("movhlps %s, %s\n", nameXMMReg(eregOfRM(modrm)),
9266 nameXMMReg(gregOfRM(modrm)));
9267 } else {
9268 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9269 delta += 2+alen;
9270 putXMMRegLane64( gregOfRM(modrm), 0/*lower lane*/,
9271 loadLE(Ity_I64, mkexpr(addr)) );
9272 DIP("movlps %s, %s\n",
9273 dis_buf, nameXMMReg( gregOfRM(modrm) ));
9274 }
9275 goto decode_success;
9276 }
9277
9278 /* 0F 13 = MOVLPS -- move from low half of XMM to mem. */
9279 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x13) {
9280 if (!epartIsReg(insn[2])) {
9281 delta += 2;
9282 addr = disAMode ( &alen, sorb, delta, dis_buf );
9283 delta += alen;
9284 storeLE( mkexpr(addr),
9285 getXMMRegLane64( gregOfRM(insn[2]),
9286 0/*lower lane*/ ) );
9287 DIP("movlps %s, %s\n", nameXMMReg( gregOfRM(insn[2]) ),
9288 dis_buf);
9289 goto decode_success;
9290 }
9291 /* else fall through */
9292 }
9293
9294 /* 0F 50 = MOVMSKPS - move 4 sign bits from 4 x F32 in xmm(E)
9295 to 4 lowest bits of ireg(G) */
9296 if (insn[0] == 0x0F && insn[1] == 0x50) {
9297 modrm = getIByte(delta+2);
9298 if (sz == 4 && epartIsReg(modrm)) {
9299 Int src;
9300 t0 = newTemp(Ity_I32);
9301 t1 = newTemp(Ity_I32);
9302 t2 = newTemp(Ity_I32);
9303 t3 = newTemp(Ity_I32);
9304 delta += 2+1;
9305 src = eregOfRM(modrm);
9306 assign( t0, binop( Iop_And32,
9307 binop(Iop_Shr32, getXMMRegLane32(src,0), mkU8(31)),
9308 mkU32(1) ));
9309 assign( t1, binop( Iop_And32,
9310 binop(Iop_Shr32, getXMMRegLane32(src,1), mkU8(30)),
9311 mkU32(2) ));
9312 assign( t2, binop( Iop_And32,
9313 binop(Iop_Shr32, getXMMRegLane32(src,2), mkU8(29)),
9314 mkU32(4) ));
9315 assign( t3, binop( Iop_And32,
9316 binop(Iop_Shr32, getXMMRegLane32(src,3), mkU8(28)),
9317 mkU32(8) ));
9318 putIReg(4, gregOfRM(modrm),
9319 binop(Iop_Or32,
9320 binop(Iop_Or32, mkexpr(t0), mkexpr(t1)),
9321 binop(Iop_Or32, mkexpr(t2), mkexpr(t3))
9322 )
9323 );
9324 DIP("movmskps %s,%s\n", nameXMMReg(src),
9325 nameIReg(4, gregOfRM(modrm)));
9326 goto decode_success;
9327 }
9328 /* else fall through */
9329 }
9330
9331 /* 0F 2B = MOVNTPS -- for us, just a plain SSE store. */
9332 /* 66 0F 2B = MOVNTPD -- for us, just a plain SSE store. */
9333 if (insn[0] == 0x0F && insn[1] == 0x2B) {
9334 modrm = getIByte(delta+2);
9335 if (!epartIsReg(modrm)) {
9336 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9337 gen_SEGV_if_not_16_aligned( addr );
9338 storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) );
9339 DIP("movntp%s %s,%s\n", sz==2 ? "d" : "s",
9340 dis_buf,
9341 nameXMMReg(gregOfRM(modrm)));
9342 delta += 2+alen;
9343 goto decode_success;
9344 }
9345 /* else fall through */
9346 }
9347
9348 /* F3 0F 10 = MOVSS -- move 32 bits from E (mem or lo 1/4 xmm) to G
9349 (lo 1/4 xmm). If E is mem, upper 3/4 of G is zeroed out. */
9350 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x10) {
9351 vassert(sz == 4);
9352 modrm = getIByte(delta+3);
9353 if (epartIsReg(modrm)) {
9354 putXMMRegLane32( gregOfRM(modrm), 0,
9355 getXMMRegLane32( eregOfRM(modrm), 0 ));
9356 DIP("movss %s,%s\n", nameXMMReg(eregOfRM(modrm)),
9357 nameXMMReg(gregOfRM(modrm)));
9358 delta += 3+1;
9359 } else {
9360 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
9361 /* zero bits 127:64 */
9362 putXMMRegLane64( gregOfRM(modrm), 1, mkU64(0) );
9363 /* zero bits 63:32 */
9364 putXMMRegLane32( gregOfRM(modrm), 1, mkU32(0) );
9365 /* write bits 31:0 */
9366 putXMMRegLane32( gregOfRM(modrm), 0,
9367 loadLE(Ity_I32, mkexpr(addr)) );
9368 DIP("movss %s,%s\n", dis_buf,
9369 nameXMMReg(gregOfRM(modrm)));
9370 delta += 3+alen;
9371 }
9372 goto decode_success;
9373 }
9374
9375 /* F3 0F 11 = MOVSS -- move 32 bits from G (lo 1/4 xmm) to E (mem
9376 or lo 1/4 xmm). */
9377 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x11) {
9378 vassert(sz == 4);
9379 modrm = getIByte(delta+3);
9380 if (epartIsReg(modrm)) {
9381 /* fall through, we don't yet have a test case */
9382 } else {
9383 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
9384 storeLE( mkexpr(addr),
9385 getXMMRegLane32(gregOfRM(modrm), 0) );
9386 DIP("movss %s,%s\n", nameXMMReg(gregOfRM(modrm)),
9387 dis_buf);
9388 delta += 3+alen;
9389 goto decode_success;
9390 }
9391 }
9392
9393 /* 0F 59 = MULPS -- mul 32Fx4 from R/M to R */
9394 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x59) {
9395 delta = dis_SSE_E_to_G_all( sorb, delta+2, "mulps", Iop_Mul32Fx4 );
9396 goto decode_success;
9397 }
9398
9399 /* F3 0F 59 = MULSS -- mul 32F0x4 from R/M to R */
9400 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x59) {
9401 vassert(sz == 4);
9402 delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "mulss", Iop_Mul32F0x4 );
9403 goto decode_success;
9404 }
9405
9406 /* 0F 56 = ORPS -- G = G and E */
9407 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x56) {
9408 delta = dis_SSE_E_to_G_all( sorb, delta+2, "orps", Iop_OrV128 );
9409 goto decode_success;
9410 }
9411
9412 /* 0F 53 = RCPPS -- approx reciprocal 32Fx4 from R/M to R */
9413 if (insn[0] == 0x0F && insn[1] == 0x53) {
9414 vassert(sz == 4);
9415 delta = dis_SSE_E_to_G_unary_all( sorb, delta+2,
9416 "rcpps", Iop_RecipEst32Fx4 );
9417 goto decode_success;
9418 }
9419
9420 /* F3 0F 53 = RCPSS -- approx reciprocal 32F0x4 from R/M to R */
9421 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x53) {
9422 vassert(sz == 4);
9423 delta = dis_SSE_E_to_G_unary_lo32( sorb, delta+3,
9424 "rcpss", Iop_RecipEst32F0x4 );
9425 goto decode_success;
9426 }
9427
9428 /* 0F 52 = RSQRTPS -- approx reciprocal sqrt 32Fx4 from R/M to R */
9429 if (insn[0] == 0x0F && insn[1] == 0x52) {
9430 vassert(sz == 4);
9431 delta = dis_SSE_E_to_G_unary_all( sorb, delta+2,
9432 "rsqrtps", Iop_RSqrtEst32Fx4 );
9433 goto decode_success;
9434 }
9435
9436 /* F3 0F 52 = RSQRTSS -- approx reciprocal sqrt 32F0x4 from R/M to R */
9437 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x52) {
9438 vassert(sz == 4);
9439 delta = dis_SSE_E_to_G_unary_lo32( sorb, delta+3,
9440 "rsqrtss", Iop_RSqrtEst32F0x4 );
9441 goto decode_success;
9442 }
9443
9444 /* 0F C6 /r ib = SHUFPS -- shuffle packed F32s */
9445 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xC6) {
9446 Int select;
9447 IRTemp sV, dV;
9448 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
9449 sV = newTemp(Ity_V128);
9450 dV = newTemp(Ity_V128);
9451 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
9452 modrm = insn[2];
9453 assign( dV, getXMMReg(gregOfRM(modrm)) );
9454
9455 if (epartIsReg(modrm)) {
9456 assign( sV, getXMMReg(eregOfRM(modrm)) );
9457 select = (Int)insn[3];
9458 delta += 2+2;
9459 DIP("shufps $%d,%s,%s\n", select,
9460 nameXMMReg(eregOfRM(modrm)),
9461 nameXMMReg(gregOfRM(modrm)));
9462 } else {
9463 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9464 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
9465 select = (Int)insn[2+alen];
9466 delta += 3+alen;
9467 DIP("shufps $%d,%s,%s\n", select,
9468 dis_buf,
9469 nameXMMReg(gregOfRM(modrm)));
9470 }
9471
9472 breakup128to32s( dV, &d3, &d2, &d1, &d0 );
9473 breakup128to32s( sV, &s3, &s2, &s1, &s0 );
9474
9475 # define SELD(n) ((n)==0 ? d0 : ((n)==1 ? d1 : ((n)==2 ? d2 : d3)))
9476 # define SELS(n) ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
9477
9478 putXMMReg(
9479 gregOfRM(modrm),
9480 mk128from32s( SELS((select>>6)&3), SELS((select>>4)&3),
9481 SELD((select>>2)&3), SELD((select>>0)&3) )
9482 );
9483
9484 # undef SELD
9485 # undef SELS
9486
9487 goto decode_success;
9488 }
9489
9490 /* 0F 51 = SQRTPS -- approx sqrt 32Fx4 from R/M to R */
9491 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x51) {
9492 delta = dis_SSE_E_to_G_unary_all( sorb, delta+2,
9493 "sqrtps", Iop_Sqrt32Fx4 );
9494 goto decode_success;
9495 }
9496
9497 /* F3 0F 51 = SQRTSS -- approx sqrt 32F0x4 from R/M to R */
9498 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x51) {
9499 vassert(sz == 4);
9500 delta = dis_SSE_E_to_G_unary_lo32( sorb, delta+3,
9501 "sqrtss", Iop_Sqrt32F0x4 );
9502 goto decode_success;
9503 }
9504
9505 /* 0F AE /3 = STMXCSR m32 -- store %mxcsr */
9506 if (insn[0] == 0x0F && insn[1] == 0xAE
9507 && !epartIsReg(insn[2]) && gregOfRM(insn[2]) == 3) {
9508 modrm = getIByte(delta+2);
9509 vassert(sz == 4);
9510 vassert(!epartIsReg(modrm));
9511
9512 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9513 delta += 2+alen;
9514
9515 /* Fake up a native SSE mxcsr word. The only thing it depends
9516 on is SSEROUND[1:0], so call a clean helper to cook it up.
9517 */
9518 /* UInt x86h_create_mxcsr ( UInt sseround ) */
9519 DIP("stmxcsr %s\n", dis_buf);
9520 storeLE( mkexpr(addr),
9521 mkIRExprCCall(
9522 Ity_I32, 0/*regp*/,
9523 "x86g_create_mxcsr", &x86g_create_mxcsr,
9524 mkIRExprVec_1( get_sse_roundingmode() )
9525 )
9526 );
9527 goto decode_success;
9528 }
9529
9530 /* 0F 5C = SUBPS -- sub 32Fx4 from R/M to R */
9531 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5C) {
9532 delta = dis_SSE_E_to_G_all( sorb, delta+2, "subps", Iop_Sub32Fx4 );
9533 goto decode_success;
9534 }
9535
9536 /* F3 0F 5C = SUBSS -- sub 32F0x4 from R/M to R */
9537 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5C) {
9538 vassert(sz == 4);
9539 delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "subss", Iop_Sub32F0x4 );
9540 goto decode_success;
9541 }
9542
9543 /* 0F 15 = UNPCKHPS -- unpack and interleave high part F32s */
9544 /* 0F 14 = UNPCKLPS -- unpack and interleave low part F32s */
9545 /* These just appear to be special cases of SHUFPS */
9546 if (sz == 4 && insn[0] == 0x0F && (insn[1] == 0x15 || insn[1] == 0x14)) {
9547 IRTemp sV, dV;
9548 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
9549 Bool hi = toBool(insn[1] == 0x15);
9550 sV = newTemp(Ity_V128);
9551 dV = newTemp(Ity_V128);
9552 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
9553 modrm = insn[2];
9554 assign( dV, getXMMReg(gregOfRM(modrm)) );
9555
9556 if (epartIsReg(modrm)) {
9557 assign( sV, getXMMReg(eregOfRM(modrm)) );
9558 delta += 2+1;
9559 DIP("unpck%sps %s,%s\n", hi ? "h" : "l",
9560 nameXMMReg(eregOfRM(modrm)),
9561 nameXMMReg(gregOfRM(modrm)));
9562 } else {
9563 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9564 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
9565 delta += 2+alen;
9566 DIP("unpck%sps %s,%s\n", hi ? "h" : "l",
9567 dis_buf,
9568 nameXMMReg(gregOfRM(modrm)));
9569 }
9570
9571 breakup128to32s( dV, &d3, &d2, &d1, &d0 );
9572 breakup128to32s( sV, &s3, &s2, &s1, &s0 );
9573
9574 if (hi) {
9575 putXMMReg( gregOfRM(modrm), mk128from32s( s3, d3, s2, d2 ) );
9576 } else {
9577 putXMMReg( gregOfRM(modrm), mk128from32s( s1, d1, s0, d0 ) );
9578 }
9579
9580 goto decode_success;
9581 }
9582
9583 /* 0F 57 = XORPS -- G = G and E */
9584 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x57) {
9585 delta = dis_SSE_E_to_G_all( sorb, delta+2, "xorps", Iop_XorV128 );
9586 goto decode_success;
9587 }
9588
9589 /* ---------------------------------------------------- */
9590 /* --- end of the SSE decoder. --- */
9591 /* ---------------------------------------------------- */
9592
9593 /* ---------------------------------------------------- */
9594 /* --- start of the SSE2 decoder. --- */
9595 /* ---------------------------------------------------- */
9596
9597 /* Skip parts of the decoder which don't apply given the stated
9598 guest subarchitecture. */
9599 if (0 == (archinfo->hwcaps & VEX_HWCAPS_X86_SSE2))
9600 goto after_sse_decoders; /* no SSE2 capabilities */
9601
9602 insn = &guest_code[delta];
9603
9604 /* 66 0F 58 = ADDPD -- add 32Fx4 from R/M to R */
9605 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x58) {
9606 delta = dis_SSE_E_to_G_all( sorb, delta+2, "addpd", Iop_Add64Fx2 );
9607 goto decode_success;
9608 }
9609
9610 /* F2 0F 58 = ADDSD -- add 64F0x2 from R/M to R */
9611 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x58) {
9612 vassert(sz == 4);
9613 delta = dis_SSE_E_to_G_lo64( sorb, delta+3, "addsd", Iop_Add64F0x2 );
9614 goto decode_success;
9615 }
9616
9617 /* 66 0F 55 = ANDNPD -- G = (not G) and E */
9618 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x55) {
9619 delta = dis_SSE_E_to_G_all_invG( sorb, delta+2, "andnpd", Iop_AndV128 );
9620 goto decode_success;
9621 }
9622
9623 /* 66 0F 54 = ANDPD -- G = G and E */
9624 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x54) {
9625 delta = dis_SSE_E_to_G_all( sorb, delta+2, "andpd", Iop_AndV128 );
9626 goto decode_success;
9627 }
9628
9629 /* 66 0F C2 = CMPPD -- 64Fx2 comparison from R/M to R */
9630 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xC2) {
9631 delta = dis_SSEcmp_E_to_G( sorb, delta+2, "cmppd", True, 8 );
9632 goto decode_success;
9633 }
9634
9635 /* F2 0F C2 = CMPSD -- 64F0x2 comparison from R/M to R */
9636 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0xC2) {
9637 vassert(sz == 4);
9638 delta = dis_SSEcmp_E_to_G( sorb, delta+3, "cmpsd", False, 8 );
9639 goto decode_success;
9640 }
9641
9642 /* 66 0F 2F = COMISD -- 64F0x2 comparison G,E, and set ZCP */
9643 /* 66 0F 2E = UCOMISD -- 64F0x2 comparison G,E, and set ZCP */
9644 if (sz == 2 && insn[0] == 0x0F && (insn[1] == 0x2F || insn[1] == 0x2E)) {
9645 IRTemp argL = newTemp(Ity_F64);
9646 IRTemp argR = newTemp(Ity_F64);
9647 modrm = getIByte(delta+2);
9648 if (epartIsReg(modrm)) {
9649 assign( argR, getXMMRegLane64F( eregOfRM(modrm), 0/*lowest lane*/ ) );
9650 delta += 2+1;
9651 DIP("[u]comisd %s,%s\n", nameXMMReg(eregOfRM(modrm)),
9652 nameXMMReg(gregOfRM(modrm)) );
9653 } else {
9654 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9655 assign( argR, loadLE(Ity_F64, mkexpr(addr)) );
9656 delta += 2+alen;
9657 DIP("[u]comisd %s,%s\n", dis_buf,
9658 nameXMMReg(gregOfRM(modrm)) );
9659 }
9660 assign( argL, getXMMRegLane64F( gregOfRM(modrm), 0/*lowest lane*/ ) );
9661
9662 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
9663 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
9664 stmt( IRStmt_Put(
9665 OFFB_CC_DEP1,
9666 binop( Iop_And32,
9667 binop(Iop_CmpF64, mkexpr(argL), mkexpr(argR)),
9668 mkU32(0x45)
9669 )));
9670 /* Set NDEP even though it isn't used. This makes redundant-PUT
9671 elimination of previous stores to this field work better. */
9672 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
9673 goto decode_success;
9674 }
9675
9676 /* F3 0F E6 = CVTDQ2PD -- convert 2 x I32 in mem/lo half xmm to 2 x
9677 F64 in xmm(G) */
9678 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0xE6) {
9679 IRTemp arg64 = newTemp(Ity_I64);
9680 vassert(sz == 4);
9681
9682 modrm = getIByte(delta+3);
9683 if (epartIsReg(modrm)) {
9684 assign( arg64, getXMMRegLane64(eregOfRM(modrm), 0) );
9685 delta += 3+1;
9686 DIP("cvtdq2pd %s,%s\n", nameXMMReg(eregOfRM(modrm)),
9687 nameXMMReg(gregOfRM(modrm)));
9688 } else {
9689 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
9690 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
9691 delta += 3+alen;
9692 DIP("cvtdq2pd %s,%s\n", dis_buf,
9693 nameXMMReg(gregOfRM(modrm)) );
9694 }
9695
9696 putXMMRegLane64F(
9697 gregOfRM(modrm), 0,
9698 unop(Iop_I32StoF64, unop(Iop_64to32, mkexpr(arg64)))
9699 );
9700
9701 putXMMRegLane64F(
9702 gregOfRM(modrm), 1,
9703 unop(Iop_I32StoF64, unop(Iop_64HIto32, mkexpr(arg64)))
9704 );
9705
9706 goto decode_success;
9707 }
9708
9709 /* 0F 5B = CVTDQ2PS -- convert 4 x I32 in mem/xmm to 4 x F32 in
9710 xmm(G) */
9711 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5B) {
9712 IRTemp argV = newTemp(Ity_V128);
9713 IRTemp rmode = newTemp(Ity_I32);
9714
9715 modrm = getIByte(delta+2);
9716 if (epartIsReg(modrm)) {
9717 assign( argV, getXMMReg(eregOfRM(modrm)) );
9718 delta += 2+1;
9719 DIP("cvtdq2ps %s,%s\n", nameXMMReg(eregOfRM(modrm)),
9720 nameXMMReg(gregOfRM(modrm)));
9721 } else {
9722 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9723 assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
9724 delta += 2+alen;
9725 DIP("cvtdq2ps %s,%s\n", dis_buf,
9726 nameXMMReg(gregOfRM(modrm)) );
9727 }
9728
9729 assign( rmode, get_sse_roundingmode() );
9730 breakup128to32s( argV, &t3, &t2, &t1, &t0 );
9731
9732 # define CVT(_t) binop( Iop_F64toF32, \
9733 mkexpr(rmode), \
9734 unop(Iop_I32StoF64,mkexpr(_t)))
9735
9736 putXMMRegLane32F( gregOfRM(modrm), 3, CVT(t3) );
9737 putXMMRegLane32F( gregOfRM(modrm), 2, CVT(t2) );
9738 putXMMRegLane32F( gregOfRM(modrm), 1, CVT(t1) );
9739 putXMMRegLane32F( gregOfRM(modrm), 0, CVT(t0) );
9740
9741 # undef CVT
9742
9743 goto decode_success;
9744 }
9745
9746 /* F2 0F E6 = CVTPD2DQ -- convert 2 x F64 in mem/xmm to 2 x I32 in
9747 lo half xmm(G), and zero upper half */
9748 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0xE6) {
9749 IRTemp argV = newTemp(Ity_V128);
9750 IRTemp rmode = newTemp(Ity_I32);
9751 vassert(sz == 4);
9752
9753 modrm = getIByte(delta+3);
9754 if (epartIsReg(modrm)) {
9755 assign( argV, getXMMReg(eregOfRM(modrm)) );
9756 delta += 3+1;
9757 DIP("cvtpd2dq %s,%s\n", nameXMMReg(eregOfRM(modrm)),
9758 nameXMMReg(gregOfRM(modrm)));
9759 } else {
9760 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
9761 assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
9762 delta += 3+alen;
9763 DIP("cvtpd2dq %s,%s\n", dis_buf,
9764 nameXMMReg(gregOfRM(modrm)) );
9765 }
9766
9767 assign( rmode, get_sse_roundingmode() );
9768 t0 = newTemp(Ity_F64);
9769 t1 = newTemp(Ity_F64);
9770 assign( t0, unop(Iop_ReinterpI64asF64,
9771 unop(Iop_V128to64, mkexpr(argV))) );
9772 assign( t1, unop(Iop_ReinterpI64asF64,
9773 unop(Iop_V128HIto64, mkexpr(argV))) );
9774
9775 # define CVT(_t) binop( Iop_F64toI32S, \
9776 mkexpr(rmode), \
9777 mkexpr(_t) )
9778
9779 putXMMRegLane32( gregOfRM(modrm), 3, mkU32(0) );
9780 putXMMRegLane32( gregOfRM(modrm), 2, mkU32(0) );
9781 putXMMRegLane32( gregOfRM(modrm), 1, CVT(t1) );
9782 putXMMRegLane32( gregOfRM(modrm), 0, CVT(t0) );
9783
9784 # undef CVT
9785
9786 goto decode_success;
9787 }
9788
9789 /* 66 0F 2D = CVTPD2PI -- convert 2 x F64 in mem/xmm to 2 x
9790 I32 in mmx, according to prevailing SSE rounding mode */
9791 /* 66 0F 2C = CVTTPD2PI -- convert 2 x F64 in mem/xmm to 2 x
9792 I32 in mmx, rounding towards zero */
9793 if (sz == 2 && insn[0] == 0x0F && (insn[1] == 0x2D || insn[1] == 0x2C)) {
9794 IRTemp dst64 = newTemp(Ity_I64);
9795 IRTemp rmode = newTemp(Ity_I32);
9796 IRTemp f64lo = newTemp(Ity_F64);
9797 IRTemp f64hi = newTemp(Ity_F64);
9798 Bool r2zero = toBool(insn[1] == 0x2C);
9799
9800 do_MMX_preamble();
9801 modrm = getIByte(delta+2);
9802
9803 if (epartIsReg(modrm)) {
9804 delta += 2+1;
9805 assign(f64lo, getXMMRegLane64F(eregOfRM(modrm), 0));
9806 assign(f64hi, getXMMRegLane64F(eregOfRM(modrm), 1));
9807 DIP("cvt%spd2pi %s,%s\n", r2zero ? "t" : "",
9808 nameXMMReg(eregOfRM(modrm)),
9809 nameMMXReg(gregOfRM(modrm)));
9810 } else {
9811 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9812 assign(f64lo, loadLE(Ity_F64, mkexpr(addr)));
9813 assign(f64hi, loadLE(Ity_F64, binop( Iop_Add32,
9814 mkexpr(addr),
9815 mkU32(8) )));
9816 delta += 2+alen;
9817 DIP("cvt%spf2pi %s,%s\n", r2zero ? "t" : "",
9818 dis_buf,
9819 nameMMXReg(gregOfRM(modrm)));
9820 }
9821
9822 if (r2zero) {
9823 assign(rmode, mkU32((UInt)Irrm_ZERO) );
9824 } else {
9825 assign( rmode, get_sse_roundingmode() );
9826 }
9827
9828 assign(
9829 dst64,
9830 binop( Iop_32HLto64,
9831 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64hi) ),
9832 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64lo) )
9833 )
9834 );
9835
9836 putMMXReg(gregOfRM(modrm), mkexpr(dst64));
9837 goto decode_success;
9838 }
9839
9840 /* 66 0F 5A = CVTPD2PS -- convert 2 x F64 in mem/xmm to 2 x F32 in
9841 lo half xmm(G), and zero upper half */
9842 /* Note, this is practically identical to CVTPD2DQ. It would have
9843 been nicer to merge them together, but the insn[] offsets differ
9844 by one. */
9845 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x5A) {
9846 IRTemp argV = newTemp(Ity_V128);
9847 IRTemp rmode = newTemp(Ity_I32);
9848
9849 modrm = getIByte(delta+2);
9850 if (epartIsReg(modrm)) {
9851 assign( argV, getXMMReg(eregOfRM(modrm)) );
9852 delta += 2+1;
9853 DIP("cvtpd2ps %s,%s\n", nameXMMReg(eregOfRM(modrm)),
9854 nameXMMReg(gregOfRM(modrm)));
9855 } else {
9856 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9857 assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
9858 delta += 2+alen;
9859 DIP("cvtpd2ps %s,%s\n", dis_buf,
9860 nameXMMReg(gregOfRM(modrm)) );
9861 }
9862
9863 assign( rmode, get_sse_roundingmode() );
9864 t0 = newTemp(Ity_F64);
9865 t1 = newTemp(Ity_F64);
9866 assign( t0, unop(Iop_ReinterpI64asF64,
9867 unop(Iop_V128to64, mkexpr(argV))) );
9868 assign( t1, unop(Iop_ReinterpI64asF64,
9869 unop(Iop_V128HIto64, mkexpr(argV))) );
9870
9871 # define CVT(_t) binop( Iop_F64toF32, \
9872 mkexpr(rmode), \
9873 mkexpr(_t) )
9874
9875 putXMMRegLane32( gregOfRM(modrm), 3, mkU32(0) );
9876 putXMMRegLane32( gregOfRM(modrm), 2, mkU32(0) );
9877 putXMMRegLane32F( gregOfRM(modrm), 1, CVT(t1) );
9878 putXMMRegLane32F( gregOfRM(modrm), 0, CVT(t0) );
9879
9880 # undef CVT
9881
9882 goto decode_success;
9883 }
9884
9885 /* 66 0F 2A = CVTPI2PD -- convert 2 x I32 in mem/mmx to 2 x F64 in
9886 xmm(G) */
9887 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x2A) {
9888 IRTemp arg64 = newTemp(Ity_I64);
9889
9890 modrm = getIByte(delta+2);
9891 if (epartIsReg(modrm)) {
9892 /* Only switch to MMX mode if the source is a MMX register.
9893 This is inconsistent with all other instructions which
9894 convert between XMM and (M64 or MMX), which always switch
9895 to MMX mode even if 64-bit operand is M64 and not MMX. At
9896 least, that's what the Intel docs seem to me to say.
9897 Fixes #210264. */
9898 do_MMX_preamble();
9899 assign( arg64, getMMXReg(eregOfRM(modrm)) );
9900 delta += 2+1;
9901 DIP("cvtpi2pd %s,%s\n", nameMMXReg(eregOfRM(modrm)),
9902 nameXMMReg(gregOfRM(modrm)));
9903 } else {
9904 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9905 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
9906 delta += 2+alen;
9907 DIP("cvtpi2pd %s,%s\n", dis_buf,
9908 nameXMMReg(gregOfRM(modrm)) );
9909 }
9910
9911 putXMMRegLane64F(
9912 gregOfRM(modrm), 0,
9913 unop(Iop_I32StoF64, unop(Iop_64to32, mkexpr(arg64)) )
9914 );
9915
9916 putXMMRegLane64F(
9917 gregOfRM(modrm), 1,
9918 unop(Iop_I32StoF64, unop(Iop_64HIto32, mkexpr(arg64)) )
9919 );
9920
9921 goto decode_success;
9922 }
9923
9924 /* 66 0F 5B = CVTPS2DQ -- convert 4 x F32 in mem/xmm to 4 x I32 in
9925 xmm(G) */
9926 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x5B) {
9927 IRTemp argV = newTemp(Ity_V128);
9928 IRTemp rmode = newTemp(Ity_I32);
9929
9930 modrm = getIByte(delta+2);
9931 if (epartIsReg(modrm)) {
9932 assign( argV, getXMMReg(eregOfRM(modrm)) );
9933 delta += 2+1;
9934 DIP("cvtps2dq %s,%s\n", nameXMMReg(eregOfRM(modrm)),
9935 nameXMMReg(gregOfRM(modrm)));
9936 } else {
9937 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9938 assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
9939 delta += 2+alen;
9940 DIP("cvtps2dq %s,%s\n", dis_buf,
9941 nameXMMReg(gregOfRM(modrm)) );
9942 }
9943
9944 assign( rmode, get_sse_roundingmode() );
9945 breakup128to32s( argV, &t3, &t2, &t1, &t0 );
9946
9947 /* This is less than ideal. If it turns out to be a performance
9948 bottleneck it can be improved. */
9949 # define CVT(_t) \
9950 binop( Iop_F64toI32S, \
9951 mkexpr(rmode), \
9952 unop( Iop_F32toF64, \
9953 unop( Iop_ReinterpI32asF32, mkexpr(_t))) )
9954
9955 putXMMRegLane32( gregOfRM(modrm), 3, CVT(t3) );
9956 putXMMRegLane32( gregOfRM(modrm), 2, CVT(t2) );
9957 putXMMRegLane32( gregOfRM(modrm), 1, CVT(t1) );
9958 putXMMRegLane32( gregOfRM(modrm), 0, CVT(t0) );
9959
9960 # undef CVT
9961
9962 goto decode_success;
9963 }
9964
9965 /* 0F 5A = CVTPS2PD -- convert 2 x F32 in low half mem/xmm to 2 x
9966 F64 in xmm(G). */
9967 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5A) {
9968 IRTemp f32lo = newTemp(Ity_F32);
9969 IRTemp f32hi = newTemp(Ity_F32);
9970
9971 modrm = getIByte(delta+2);
9972 if (epartIsReg(modrm)) {
9973 assign( f32lo, getXMMRegLane32F(eregOfRM(modrm), 0) );
9974 assign( f32hi, getXMMRegLane32F(eregOfRM(modrm), 1) );
9975 delta += 2+1;
9976 DIP("cvtps2pd %s,%s\n", nameXMMReg(eregOfRM(modrm)),
9977 nameXMMReg(gregOfRM(modrm)));
9978 } else {
9979 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
9980 assign( f32lo, loadLE(Ity_F32, mkexpr(addr)) );
9981 assign( f32hi, loadLE(Ity_F32,
9982 binop(Iop_Add32,mkexpr(addr),mkU32(4))) );
9983 delta += 2+alen;
9984 DIP("cvtps2pd %s,%s\n", dis_buf,
9985 nameXMMReg(gregOfRM(modrm)) );
9986 }
9987
9988 putXMMRegLane64F( gregOfRM(modrm), 1,
9989 unop(Iop_F32toF64, mkexpr(f32hi)) );
9990 putXMMRegLane64F( gregOfRM(modrm), 0,
9991 unop(Iop_F32toF64, mkexpr(f32lo)) );
9992
9993 goto decode_success;
9994 }
9995
9996 /* F2 0F 2D = CVTSD2SI -- convert F64 in mem/low half xmm to
9997 I32 in ireg, according to prevailing SSE rounding mode */
9998 /* F2 0F 2C = CVTTSD2SI -- convert F64 in mem/low half xmm to
9999 I32 in ireg, rounding towards zero */
10000 if (insn[0] == 0xF2 && insn[1] == 0x0F
10001 && (insn[2] == 0x2D || insn[2] == 0x2C)) {
10002 IRTemp rmode = newTemp(Ity_I32);
10003 IRTemp f64lo = newTemp(Ity_F64);
10004 Bool r2zero = toBool(insn[2] == 0x2C);
10005 vassert(sz == 4);
10006
10007 modrm = getIByte(delta+3);
10008 if (epartIsReg(modrm)) {
10009 delta += 3+1;
10010 assign(f64lo, getXMMRegLane64F(eregOfRM(modrm), 0));
10011 DIP("cvt%ssd2si %s,%s\n", r2zero ? "t" : "",
10012 nameXMMReg(eregOfRM(modrm)),
10013 nameIReg(4, gregOfRM(modrm)));
10014 } else {
10015 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
10016 assign(f64lo, loadLE(Ity_F64, mkexpr(addr)));
10017 delta += 3+alen;
10018 DIP("cvt%ssd2si %s,%s\n", r2zero ? "t" : "",
10019 dis_buf,
10020 nameIReg(4, gregOfRM(modrm)));
10021 }
10022
10023 if (r2zero) {
10024 assign( rmode, mkU32((UInt)Irrm_ZERO) );
10025 } else {
10026 assign( rmode, get_sse_roundingmode() );
10027 }
10028
10029 putIReg(4, gregOfRM(modrm),
10030 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64lo)) );
10031
10032 goto decode_success;
10033 }
10034
10035 /* F2 0F 5A = CVTSD2SS -- convert F64 in mem/low half xmm to F32 in
10036 low 1/4 xmm(G), according to prevailing SSE rounding mode */
10037 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x5A) {
10038 IRTemp rmode = newTemp(Ity_I32);
10039 IRTemp f64lo = newTemp(Ity_F64);
10040 vassert(sz == 4);
10041
10042 modrm = getIByte(delta+3);
10043 if (epartIsReg(modrm)) {
10044 delta += 3+1;
10045 assign(f64lo, getXMMRegLane64F(eregOfRM(modrm), 0));
10046 DIP("cvtsd2ss %s,%s\n", nameXMMReg(eregOfRM(modrm)),
10047 nameXMMReg(gregOfRM(modrm)));
10048 } else {
10049 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
10050 assign(f64lo, loadLE(Ity_F64, mkexpr(addr)));
10051 delta += 3+alen;
10052 DIP("cvtsd2ss %s,%s\n", dis_buf,
10053 nameXMMReg(gregOfRM(modrm)));
10054 }
10055
10056 assign( rmode, get_sse_roundingmode() );
10057 putXMMRegLane32F(
10058 gregOfRM(modrm), 0,
10059 binop( Iop_F64toF32, mkexpr(rmode), mkexpr(f64lo) )
10060 );
10061
10062 goto decode_success;
10063 }
10064
10065 /* F2 0F 2A = CVTSI2SD -- convert I32 in mem/ireg to F64 in low
10066 half xmm */
10067 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x2A) {
10068 IRTemp arg32 = newTemp(Ity_I32);
10069 vassert(sz == 4);
10070
10071 modrm = getIByte(delta+3);
10072 if (epartIsReg(modrm)) {
10073 assign( arg32, getIReg(4, eregOfRM(modrm)) );
10074 delta += 3+1;
10075 DIP("cvtsi2sd %s,%s\n", nameIReg(4, eregOfRM(modrm)),
10076 nameXMMReg(gregOfRM(modrm)));
10077 } else {
10078 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
10079 assign( arg32, loadLE(Ity_I32, mkexpr(addr)) );
10080 delta += 3+alen;
10081 DIP("cvtsi2sd %s,%s\n", dis_buf,
10082 nameXMMReg(gregOfRM(modrm)) );
10083 }
10084
10085 putXMMRegLane64F(
10086 gregOfRM(modrm), 0,
10087 unop(Iop_I32StoF64, mkexpr(arg32)) );
10088
10089 goto decode_success;
10090 }
10091
10092 /* F3 0F 5A = CVTSS2SD -- convert F32 in mem/low 1/4 xmm to F64 in
10093 low half xmm(G) */
10094 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5A) {
10095 IRTemp f32lo = newTemp(Ity_F32);
10096 vassert(sz == 4);
10097
10098 modrm = getIByte(delta+3);
10099 if (epartIsReg(modrm)) {
10100 delta += 3+1;
10101 assign(f32lo, getXMMRegLane32F(eregOfRM(modrm), 0));
10102 DIP("cvtss2sd %s,%s\n", nameXMMReg(eregOfRM(modrm)),
10103 nameXMMReg(gregOfRM(modrm)));
10104 } else {
10105 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
10106 assign(f32lo, loadLE(Ity_F32, mkexpr(addr)));
10107 delta += 3+alen;
10108 DIP("cvtss2sd %s,%s\n", dis_buf,
10109 nameXMMReg(gregOfRM(modrm)));
10110 }
10111
10112 putXMMRegLane64F( gregOfRM(modrm), 0,
10113 unop( Iop_F32toF64, mkexpr(f32lo) ) );
10114
10115 goto decode_success;
10116 }
10117
10118 /* 66 0F E6 = CVTTPD2DQ -- convert 2 x F64 in mem/xmm to 2 x I32 in
10119 lo half xmm(G), and zero upper half, rounding towards zero */
10120 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE6) {
10121 IRTemp argV = newTemp(Ity_V128);
10122 IRTemp rmode = newTemp(Ity_I32);
10123
10124 modrm = getIByte(delta+2);
10125 if (epartIsReg(modrm)) {
10126 assign( argV, getXMMReg(eregOfRM(modrm)) );
10127 delta += 2+1;
10128 DIP("cvttpd2dq %s,%s\n", nameXMMReg(eregOfRM(modrm)),
10129 nameXMMReg(gregOfRM(modrm)));
10130 } else {
10131 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
10132 assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
10133 delta += 2+alen;
10134 DIP("cvttpd2dq %s,%s\n", dis_buf,
10135 nameXMMReg(gregOfRM(modrm)) );
10136 }
10137
10138 assign( rmode, mkU32((UInt)Irrm_ZERO) );
10139
10140 t0 = newTemp(Ity_F64);
10141 t1 = newTemp(Ity_F64);
10142 assign( t0, unop(Iop_ReinterpI64asF64,
10143 unop(Iop_V128to64, mkexpr(argV))) );
10144 assign( t1, unop(Iop_ReinterpI64asF64,
10145 unop(Iop_V128HIto64, mkexpr(argV))) );
10146
10147 # define CVT(_t) binop( Iop_F64toI32S, \
10148 mkexpr(rmode), \
10149 mkexpr(_t) )
10150
10151 putXMMRegLane32( gregOfRM(modrm), 3, mkU32(0) );
10152 putXMMRegLane32( gregOfRM(modrm), 2, mkU32(0) );
10153 putXMMRegLane32( gregOfRM(modrm), 1, CVT(t1) );
10154 putXMMRegLane32( gregOfRM(modrm), 0, CVT(t0) );
10155
10156 # undef CVT
10157
10158 goto decode_success;
10159 }
10160
10161 /* F3 0F 5B = CVTTPS2DQ -- convert 4 x F32 in mem/xmm to 4 x I32 in
10162 xmm(G), rounding towards zero */
10163 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5B) {
10164 IRTemp argV = newTemp(Ity_V128);
10165 IRTemp rmode = newTemp(Ity_I32);
10166 vassert(sz == 4);
10167
10168 modrm = getIByte(delta+3);
10169 if (epartIsReg(modrm)) {
10170 assign( argV, getXMMReg(eregOfRM(modrm)) );
10171 delta += 3+1;
10172 DIP("cvttps2dq %s,%s\n", nameXMMReg(eregOfRM(modrm)),
10173 nameXMMReg(gregOfRM(modrm)));
10174 } else {
10175 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
10176 assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
10177 delta += 3+alen;
10178 DIP("cvttps2dq %s,%s\n", dis_buf,
10179 nameXMMReg(gregOfRM(modrm)) );
10180 }
10181
10182 assign( rmode, mkU32((UInt)Irrm_ZERO) );
10183 breakup128to32s( argV, &t3, &t2, &t1, &t0 );
10184
10185 /* This is less than ideal. If it turns out to be a performance
10186 bottleneck it can be improved. */
10187 # define CVT(_t) \
10188 binop( Iop_F64toI32S, \
10189 mkexpr(rmode), \
10190 unop( Iop_F32toF64, \
10191 unop( Iop_ReinterpI32asF32, mkexpr(_t))) )
10192
10193 putXMMRegLane32( gregOfRM(modrm), 3, CVT(t3) );
10194 putXMMRegLane32( gregOfRM(modrm), 2, CVT(t2) );
10195 putXMMRegLane32( gregOfRM(modrm), 1, CVT(t1) );
10196 putXMMRegLane32( gregOfRM(modrm), 0, CVT(t0) );
10197
10198 # undef CVT
10199
10200 goto decode_success;
10201 }
10202
10203 /* 66 0F 5E = DIVPD -- div 64Fx2 from R/M to R */
10204 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x5E) {
10205 delta = dis_SSE_E_to_G_all( sorb, delta+2, "divpd", Iop_Div64Fx2 );
10206 goto decode_success;
10207 }
10208
10209 /* F2 0F 5E = DIVSD -- div 64F0x2 from R/M to R */
10210 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x5E) {
10211 vassert(sz == 4);
10212 delta = dis_SSE_E_to_G_lo64( sorb, delta+3, "divsd", Iop_Div64F0x2 );
10213 goto decode_success;
10214 }
10215
10216 /* 0F AE /5 = LFENCE -- flush pending operations to memory */
10217 /* 0F AE /6 = MFENCE -- flush pending operations to memory */
10218 if (insn[0] == 0x0F && insn[1] == 0xAE
10219 && epartIsReg(insn[2])
10220 && (gregOfRM(insn[2]) == 5 || gregOfRM(insn[2]) == 6)) {
10221 vassert(sz == 4);
10222 delta += 3;
10223 /* Insert a memory fence. It's sometimes important that these
10224 are carried through to the generated code. */
10225 stmt( IRStmt_MBE(Imbe_Fence) );
10226 DIP("%sfence\n", gregOfRM(insn[2])==5 ? "l" : "m");
10227 goto decode_success;
10228 }
10229
10230 /* 66 0F 5F = MAXPD -- max 64Fx2 from R/M to R */
10231 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x5F) {
10232 delta = dis_SSE_E_to_G_all( sorb, delta+2, "maxpd", Iop_Max64Fx2 );
10233 goto decode_success;
10234 }
10235
10236 /* F2 0F 5F = MAXSD -- max 64F0x2 from R/M to R */
10237 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x5F) {
10238 vassert(sz == 4);
10239 delta = dis_SSE_E_to_G_lo64( sorb, delta+3, "maxsd", Iop_Max64F0x2 );
10240 goto decode_success;
10241 }
10242
10243 /* 66 0F 5D = MINPD -- min 64Fx2 from R/M to R */
10244 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x5D) {
10245 delta = dis_SSE_E_to_G_all( sorb, delta+2, "minpd", Iop_Min64Fx2 );
10246 goto decode_success;
10247 }
10248
10249 /* F2 0F 5D = MINSD -- min 64F0x2 from R/M to R */
10250 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x5D) {
10251 vassert(sz == 4);
10252 delta = dis_SSE_E_to_G_lo64( sorb, delta+3, "minsd", Iop_Min64F0x2 );
10253 goto decode_success;
10254 }
10255
10256 /* 66 0F 28 = MOVAPD -- move from E (mem or xmm) to G (xmm). */
10257 /* 66 0F 10 = MOVUPD -- move from E (mem or xmm) to G (xmm). */
10258 /* 66 0F 6F = MOVDQA -- move from E (mem or xmm) to G (xmm). */
10259 if (sz == 2 && insn[0] == 0x0F
10260 && (insn[1] == 0x28 || insn[1] == 0x10 || insn[1] == 0x6F)) {
10261 const HChar* wot = insn[1]==0x28 ? "apd" :
10262 insn[1]==0x10 ? "upd" : "dqa";
10263 modrm = getIByte(delta+2);
10264 if (epartIsReg(modrm)) {
10265 putXMMReg( gregOfRM(modrm),
10266 getXMMReg( eregOfRM(modrm) ));
10267 DIP("mov%s %s,%s\n", wot, nameXMMReg(eregOfRM(modrm)),
10268 nameXMMReg(gregOfRM(modrm)));
10269 delta += 2+1;
10270 } else {
10271 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
10272 if (insn[1] == 0x28/*movapd*/ || insn[1] == 0x6F/*movdqa*/)
10273 gen_SEGV_if_not_16_aligned( addr );
10274 putXMMReg( gregOfRM(modrm),
10275 loadLE(Ity_V128, mkexpr(addr)) );
10276 DIP("mov%s %s,%s\n", wot, dis_buf,
10277 nameXMMReg(gregOfRM(modrm)));
10278 delta += 2+alen;
10279 }
10280 goto decode_success;
10281 }
10282
10283 /* 66 0F 29 = MOVAPD -- move from G (xmm) to E (mem or xmm). */
10284 /* 66 0F 11 = MOVUPD -- move from G (xmm) to E (mem or xmm). */
10285 if (sz == 2 && insn[0] == 0x0F
10286 && (insn[1] == 0x29 || insn[1] == 0x11)) {
10287 const HChar* wot = insn[1]==0x29 ? "apd" : "upd";
10288 modrm = getIByte(delta+2);
10289 if (epartIsReg(modrm)) {
10290 /* fall through; awaiting test case */
10291 } else {
10292 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
10293 if (insn[1] == 0x29/*movapd*/)
10294 gen_SEGV_if_not_16_aligned( addr );
10295 storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) );
10296 DIP("mov%s %s,%s\n", wot, nameXMMReg(gregOfRM(modrm)),
10297 dis_buf );
10298 delta += 2+alen;
10299 goto decode_success;
10300 }
10301 }
10302
10303 /* 66 0F 6E = MOVD from r/m32 to xmm, zeroing high 3/4 of xmm. */
10304 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x6E) {
10305 modrm = getIByte(delta+2);
10306 if (epartIsReg(modrm)) {
10307 delta += 2+1;
10308 putXMMReg(
10309 gregOfRM(modrm),
10310 unop( Iop_32UtoV128, getIReg(4, eregOfRM(modrm)) )
10311 );
10312 DIP("movd %s, %s\n",
10313 nameIReg(4,eregOfRM(modrm)), nameXMMReg(gregOfRM(modrm)));
10314 } else {
10315 addr = disAMode( &alen, sorb, delta+2, dis_buf );
10316 delta += 2+alen;
10317 putXMMReg(
10318 gregOfRM(modrm),
10319 unop( Iop_32UtoV128,loadLE(Ity_I32, mkexpr(addr)) )
10320 );
10321 DIP("movd %s, %s\n", dis_buf, nameXMMReg(gregOfRM(modrm)));
10322 }
10323 goto decode_success;
10324 }
10325
10326 /* 66 0F 7E = MOVD from xmm low 1/4 to r/m32. */
10327 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x7E) {
10328 modrm = getIByte(delta+2);
10329 if (epartIsReg(modrm)) {
10330 delta += 2+1;
10331 putIReg( 4, eregOfRM(modrm),
10332 getXMMRegLane32(gregOfRM(modrm), 0) );
10333 DIP("movd %s, %s\n",
10334 nameXMMReg(gregOfRM(modrm)), nameIReg(4,eregOfRM(modrm)));
10335 } else {
10336 addr = disAMode( &alen, sorb, delta+2, dis_buf );
10337 delta += 2+alen;
10338 storeLE( mkexpr(addr),
10339 getXMMRegLane32(gregOfRM(modrm), 0) );
10340 DIP("movd %s, %s\n", nameXMMReg(gregOfRM(modrm)), dis_buf);
10341 }
10342 goto decode_success;
10343 }
10344
10345 /* 66 0F 7F = MOVDQA -- move from G (xmm) to E (mem or xmm). */
10346 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x7F) {
10347 modrm = getIByte(delta+2);
10348 if (epartIsReg(modrm)) {
10349 delta += 2+1;
10350 putXMMReg( eregOfRM(modrm),
10351 getXMMReg(gregOfRM(modrm)) );
10352 DIP("movdqa %s, %s\n", nameXMMReg(gregOfRM(modrm)),
10353 nameXMMReg(eregOfRM(modrm)));
10354 } else {
10355 addr = disAMode( &alen, sorb, delta+2, dis_buf );
10356 delta += 2+alen;
10357 gen_SEGV_if_not_16_aligned( addr );
10358 storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) );
10359 DIP("movdqa %s, %s\n", nameXMMReg(gregOfRM(modrm)), dis_buf);
10360 }
10361 goto decode_success;
10362 }
10363
10364 /* F3 0F 6F = MOVDQU -- move from E (mem or xmm) to G (xmm). */
10365 /* Unfortunately can't simply use the MOVDQA case since the
10366 prefix lengths are different (66 vs F3) */
10367 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x6F) {
10368 vassert(sz == 4);
10369 modrm = getIByte(delta+3);
10370 if (epartIsReg(modrm)) {
10371 putXMMReg( gregOfRM(modrm),
10372 getXMMReg( eregOfRM(modrm) ));
10373 DIP("movdqu %s,%s\n", nameXMMReg(eregOfRM(modrm)),
10374 nameXMMReg(gregOfRM(modrm)));
10375 delta += 3+1;
10376 } else {
10377 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
10378 putXMMReg( gregOfRM(modrm),
10379 loadLE(Ity_V128, mkexpr(addr)) );
10380 DIP("movdqu %s,%s\n", dis_buf,
10381 nameXMMReg(gregOfRM(modrm)));
10382 delta += 3+alen;
10383 }
10384 goto decode_success;
10385 }
10386
10387 /* F3 0F 7F = MOVDQU -- move from G (xmm) to E (mem or xmm). */
10388 /* Unfortunately can't simply use the MOVDQA case since the
10389 prefix lengths are different (66 vs F3) */
10390 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x7F) {
10391 vassert(sz == 4);
10392 modrm = getIByte(delta+3);
10393 if (epartIsReg(modrm)) {
10394 delta += 3+1;
10395 putXMMReg( eregOfRM(modrm),
10396 getXMMReg(gregOfRM(modrm)) );
10397 DIP("movdqu %s, %s\n", nameXMMReg(gregOfRM(modrm)),
10398 nameXMMReg(eregOfRM(modrm)));
10399 } else {
10400 addr = disAMode( &alen, sorb, delta+3, dis_buf );
10401 delta += 3+alen;
10402 storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) );
10403 DIP("movdqu %s, %s\n", nameXMMReg(gregOfRM(modrm)), dis_buf);
10404 }
10405 goto decode_success;
10406 }
10407
10408 /* F2 0F D6 = MOVDQ2Q -- move from E (lo half xmm, not mem) to G (mmx). */
10409 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0xD6) {
10410 vassert(sz == 4);
10411 modrm = getIByte(delta+3);
10412 if (epartIsReg(modrm)) {
10413 do_MMX_preamble();
10414 putMMXReg( gregOfRM(modrm),
10415 getXMMRegLane64( eregOfRM(modrm), 0 ));
10416 DIP("movdq2q %s,%s\n", nameXMMReg(eregOfRM(modrm)),
10417 nameMMXReg(gregOfRM(modrm)));
10418 delta += 3+1;
10419 goto decode_success;
10420 } else {
10421 /* fall through, apparently no mem case for this insn */
10422 }
10423 }
10424
10425 /* 66 0F 16 = MOVHPD -- move from mem to high half of XMM. */
10426 /* These seems identical to MOVHPS. This instruction encoding is
10427 completely crazy. */
10428 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x16) {
10429 modrm = getIByte(delta+2);
10430 if (epartIsReg(modrm)) {
10431 /* fall through; apparently reg-reg is not possible */
10432 } else {
10433 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
10434 delta += 2+alen;
10435 putXMMRegLane64( gregOfRM(modrm), 1/*upper lane*/,
10436 loadLE(Ity_I64, mkexpr(addr)) );
10437 DIP("movhpd %s,%s\n", dis_buf,
10438 nameXMMReg( gregOfRM(modrm) ));
10439 goto decode_success;
10440 }
10441 }
10442
10443 /* 66 0F 17 = MOVHPD -- move from high half of XMM to mem. */
10444 /* Again, this seems identical to MOVHPS. */
10445 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x17) {
10446 if (!epartIsReg(insn[2])) {
10447 delta += 2;
10448 addr = disAMode ( &alen, sorb, delta, dis_buf );
10449 delta += alen;
10450 storeLE( mkexpr(addr),
10451 getXMMRegLane64( gregOfRM(insn[2]),
10452 1/*upper lane*/ ) );
10453 DIP("movhpd %s,%s\n", nameXMMReg( gregOfRM(insn[2]) ),
10454 dis_buf);
10455 goto decode_success;
10456 }
10457 /* else fall through */
10458 }
10459
10460 /* 66 0F 12 = MOVLPD -- move from mem to low half of XMM. */
10461 /* Identical to MOVLPS ? */
10462 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x12) {
10463 modrm = getIByte(delta+2);
10464 if (epartIsReg(modrm)) {
10465 /* fall through; apparently reg-reg is not possible */
10466 } else {
10467 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
10468 delta += 2+alen;
10469 putXMMRegLane64( gregOfRM(modrm), 0/*lower lane*/,
10470 loadLE(Ity_I64, mkexpr(addr)) );
10471 DIP("movlpd %s, %s\n",
10472 dis_buf, nameXMMReg( gregOfRM(modrm) ));
10473 goto decode_success;
10474 }
10475 }
10476
10477 /* 66 0F 13 = MOVLPD -- move from low half of XMM to mem. */
10478 /* Identical to MOVLPS ? */
10479 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x13) {
10480 if (!epartIsReg(insn[2])) {
10481 delta += 2;
10482 addr = disAMode ( &alen, sorb, delta, dis_buf );
10483 delta += alen;
10484 storeLE( mkexpr(addr),
10485 getXMMRegLane64( gregOfRM(insn[2]),
10486 0/*lower lane*/ ) );
10487 DIP("movlpd %s, %s\n", nameXMMReg( gregOfRM(insn[2]) ),
10488 dis_buf);
10489 goto decode_success;
10490 }
10491 /* else fall through */
10492 }
10493
10494 /* 66 0F 50 = MOVMSKPD - move 2 sign bits from 2 x F64 in xmm(E) to
10495 2 lowest bits of ireg(G) */
10496 if (insn[0] == 0x0F && insn[1] == 0x50) {
10497 modrm = getIByte(delta+2);
10498 if (sz == 2 && epartIsReg(modrm)) {
10499 Int src;
10500 t0 = newTemp(Ity_I32);
10501 t1 = newTemp(Ity_I32);
10502 delta += 2+1;
10503 src = eregOfRM(modrm);
10504 assign( t0, binop( Iop_And32,
10505 binop(Iop_Shr32, getXMMRegLane32(src,1), mkU8(31)),
10506 mkU32(1) ));
10507 assign( t1, binop( Iop_And32,
10508 binop(Iop_Shr32, getXMMRegLane32(src,3), mkU8(30)),
10509 mkU32(2) ));
10510 putIReg(4, gregOfRM(modrm),
10511 binop(Iop_Or32, mkexpr(t0), mkexpr(t1))
10512 );
10513 DIP("movmskpd %s,%s\n", nameXMMReg(src),
10514 nameIReg(4, gregOfRM(modrm)));
10515 goto decode_success;
10516 }
10517 /* else fall through */
10518 }
10519
10520 /* 66 0F F7 = MASKMOVDQU -- store selected bytes of double quadword */
10521 if (insn[0] == 0x0F && insn[1] == 0xF7) {
10522 modrm = getIByte(delta+2);
10523 if (sz == 2 && epartIsReg(modrm)) {
10524 IRTemp regD = newTemp(Ity_V128);
10525 IRTemp mask = newTemp(Ity_V128);
10526 IRTemp olddata = newTemp(Ity_V128);
10527 IRTemp newdata = newTemp(Ity_V128);
10528 addr = newTemp(Ity_I32);
10529
10530 assign( addr, handleSegOverride( sorb, getIReg(4, R_EDI) ));
10531 assign( regD, getXMMReg( gregOfRM(modrm) ));
10532
10533 /* Unfortunately can't do the obvious thing with SarN8x16
10534 here since that can't be re-emitted as SSE2 code - no such
10535 insn. */
10536 assign(
10537 mask,
10538 binop(Iop_64HLtoV128,
10539 binop(Iop_SarN8x8,
10540 getXMMRegLane64( eregOfRM(modrm), 1 ),
10541 mkU8(7) ),
10542 binop(Iop_SarN8x8,
10543 getXMMRegLane64( eregOfRM(modrm), 0 ),
10544 mkU8(7) ) ));
10545 assign( olddata, loadLE( Ity_V128, mkexpr(addr) ));
10546 assign( newdata,
10547 binop(Iop_OrV128,
10548 binop(Iop_AndV128,
10549 mkexpr(regD),
10550 mkexpr(mask) ),
10551 binop(Iop_AndV128,
10552 mkexpr(olddata),
10553 unop(Iop_NotV128, mkexpr(mask)))) );
10554 storeLE( mkexpr(addr), mkexpr(newdata) );
10555
10556 delta += 2+1;
10557 DIP("maskmovdqu %s,%s\n", nameXMMReg( eregOfRM(modrm) ),
10558 nameXMMReg( gregOfRM(modrm) ) );
10559 goto decode_success;
10560 }
10561 /* else fall through */
10562 }
10563
10564 /* 66 0F E7 = MOVNTDQ -- for us, just a plain SSE store. */
10565 if (insn[0] == 0x0F && insn[1] == 0xE7) {
10566 modrm = getIByte(delta+2);
10567 if (sz == 2 && !epartIsReg(modrm)) {
10568 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
10569 gen_SEGV_if_not_16_aligned( addr );
10570 storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) );
10571 DIP("movntdq %s,%s\n", dis_buf,
10572 nameXMMReg(gregOfRM(modrm)));
10573 delta += 2+alen;
10574 goto decode_success;
10575 }
10576 /* else fall through */
10577 }
10578
10579 /* 0F C3 = MOVNTI -- for us, just a plain ireg store. */
10580 if (insn[0] == 0x0F && insn[1] == 0xC3) {
10581 vassert(sz == 4);
10582 modrm = getIByte(delta+2);
10583 if (!epartIsReg(modrm)) {
10584 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
10585 storeLE( mkexpr(addr), getIReg(4, gregOfRM(modrm)) );
10586 DIP("movnti %s,%s\n", dis_buf,
10587 nameIReg(4, gregOfRM(modrm)));
10588 delta += 2+alen;
10589 goto decode_success;
10590 }
10591 /* else fall through */
10592 }
10593
10594 /* 66 0F D6 = MOVQ -- move 64 bits from G (lo half xmm) to E (mem
10595 or lo half xmm). */
10596 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD6) {
10597 modrm = getIByte(delta+2);
10598 if (epartIsReg(modrm)) {
10599 /* fall through, awaiting test case */
10600 /* dst: lo half copied, hi half zeroed */
10601 } else {
10602 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
10603 storeLE( mkexpr(addr),
10604 getXMMRegLane64( gregOfRM(modrm), 0 ));
10605 DIP("movq %s,%s\n", nameXMMReg(gregOfRM(modrm)), dis_buf );
10606 delta += 2+alen;
10607 goto decode_success;
10608 }
10609 }
10610
10611 /* F3 0F D6 = MOVQ2DQ -- move from E (mmx) to G (lo half xmm, zero
10612 hi half). */
10613 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0xD6) {
10614 vassert(sz == 4);
10615 modrm = getIByte(delta+3);
10616 if (epartIsReg(modrm)) {
10617 do_MMX_preamble();
10618 putXMMReg( gregOfRM(modrm),
10619 unop(Iop_64UtoV128, getMMXReg( eregOfRM(modrm) )) );
10620 DIP("movq2dq %s,%s\n", nameMMXReg(eregOfRM(modrm)),
10621 nameXMMReg(gregOfRM(modrm)));
10622 delta += 3+1;
10623 goto decode_success;
10624 } else {
10625 /* fall through, apparently no mem case for this insn */
10626 }
10627 }
10628
10629 /* F3 0F 7E = MOVQ -- move 64 bits from E (mem or lo half xmm) to
10630 G (lo half xmm). Upper half of G is zeroed out. */
10631 /* F2 0F 10 = MOVSD -- move 64 bits from E (mem or lo half xmm) to
10632 G (lo half xmm). If E is mem, upper half of G is zeroed out.
10633 If E is reg, upper half of G is unchanged. */
10634 if ((insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x10)
10635 || (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x7E)) {
10636 vassert(sz == 4);
10637 modrm = getIByte(delta+3);
10638 if (epartIsReg(modrm)) {
10639 putXMMRegLane64( gregOfRM(modrm), 0,
10640 getXMMRegLane64( eregOfRM(modrm), 0 ));
10641 if (insn[0] == 0xF3/*MOVQ*/) {
10642 /* zero bits 127:64 */
10643 putXMMRegLane64( gregOfRM(modrm), 1, mkU64(0) );
10644 }
10645 DIP("movsd %s,%s\n", nameXMMReg(eregOfRM(modrm)),
10646 nameXMMReg(gregOfRM(modrm)));
10647 delta += 3+1;
10648 } else {
10649 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
10650 /* zero bits 127:64 */
10651 putXMMRegLane64( gregOfRM(modrm), 1, mkU64(0) );
10652 /* write bits 63:0 */
10653 putXMMRegLane64( gregOfRM(modrm), 0,
10654 loadLE(Ity_I64, mkexpr(addr)) );
10655 DIP("movsd %s,%s\n", dis_buf,
10656 nameXMMReg(gregOfRM(modrm)));
10657 delta += 3+alen;
10658 }
10659 goto decode_success;
10660 }
10661
10662 /* F2 0F 11 = MOVSD -- move 64 bits from G (lo half xmm) to E (mem
10663 or lo half xmm). */
10664 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x11) {
10665 vassert(sz == 4);
10666 modrm = getIByte(delta+3);
10667 if (epartIsReg(modrm)) {
10668 putXMMRegLane64( eregOfRM(modrm), 0,
10669 getXMMRegLane64( gregOfRM(modrm), 0 ));
10670 DIP("movsd %s,%s\n", nameXMMReg(gregOfRM(modrm)),
10671 nameXMMReg(eregOfRM(modrm)));
10672 delta += 3+1;
10673 } else {
10674 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
10675 storeLE( mkexpr(addr),
10676 getXMMRegLane64(gregOfRM(modrm), 0) );
10677 DIP("movsd %s,%s\n", nameXMMReg(gregOfRM(modrm)),
10678 dis_buf);
10679 delta += 3+alen;
10680 }
10681 goto decode_success;
10682 }
10683
10684 /* 66 0F 59 = MULPD -- mul 64Fx2 from R/M to R */
10685 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x59) {
10686 delta = dis_SSE_E_to_G_all( sorb, delta+2, "mulpd", Iop_Mul64Fx2 );
10687 goto decode_success;
10688 }
10689
10690 /* F2 0F 59 = MULSD -- mul 64F0x2 from R/M to R */
10691 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x59) {
10692 vassert(sz == 4);
10693 delta = dis_SSE_E_to_G_lo64( sorb, delta+3, "mulsd", Iop_Mul64F0x2 );
10694 goto decode_success;
10695 }
10696
10697 /* 66 0F 56 = ORPD -- G = G and E */
10698 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x56) {
10699 delta = dis_SSE_E_to_G_all( sorb, delta+2, "orpd", Iop_OrV128 );
10700 goto decode_success;
10701 }
10702
10703 /* 66 0F C6 /r ib = SHUFPD -- shuffle packed F64s */
10704 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xC6) {
10705 Int select;
10706 IRTemp sV = newTemp(Ity_V128);
10707 IRTemp dV = newTemp(Ity_V128);
10708 IRTemp s1 = newTemp(Ity_I64);
10709 IRTemp s0 = newTemp(Ity_I64);
10710 IRTemp d1 = newTemp(Ity_I64);
10711 IRTemp d0 = newTemp(Ity_I64);
10712
10713 modrm = insn[2];
10714 assign( dV, getXMMReg(gregOfRM(modrm)) );
10715
10716 if (epartIsReg(modrm)) {
10717 assign( sV, getXMMReg(eregOfRM(modrm)) );
10718 select = (Int)insn[3];
10719 delta += 2+2;
10720 DIP("shufpd $%d,%s,%s\n", select,
10721 nameXMMReg(eregOfRM(modrm)),
10722 nameXMMReg(gregOfRM(modrm)));
10723 } else {
10724 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
10725 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
10726 select = (Int)insn[2+alen];
10727 delta += 3+alen;
10728 DIP("shufpd $%d,%s,%s\n", select,
10729 dis_buf,
10730 nameXMMReg(gregOfRM(modrm)));
10731 }
10732
10733 assign( d1, unop(Iop_V128HIto64, mkexpr(dV)) );
10734 assign( d0, unop(Iop_V128to64, mkexpr(dV)) );
10735 assign( s1, unop(Iop_V128HIto64, mkexpr(sV)) );
10736 assign( s0, unop(Iop_V128to64, mkexpr(sV)) );
10737
10738 # define SELD(n) mkexpr((n)==0 ? d0 : d1)
10739 # define SELS(n) mkexpr((n)==0 ? s0 : s1)
10740
10741 putXMMReg(
10742 gregOfRM(modrm),
10743 binop(Iop_64HLtoV128, SELS((select>>1)&1), SELD((select>>0)&1) )
10744 );
10745
10746 # undef SELD
10747 # undef SELS
10748
10749 goto decode_success;
10750 }
10751
10752 /* 66 0F 51 = SQRTPD -- approx sqrt 64Fx2 from R/M to R */
10753 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x51) {
10754 delta = dis_SSE_E_to_G_unary_all( sorb, delta+2,
10755 "sqrtpd", Iop_Sqrt64Fx2 );
10756 goto decode_success;
10757 }
10758
10759 /* F2 0F 51 = SQRTSD -- approx sqrt 64F0x2 from R/M to R */
10760 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x51) {
10761 vassert(sz == 4);
10762 delta = dis_SSE_E_to_G_unary_lo64( sorb, delta+3,
10763 "sqrtsd", Iop_Sqrt64F0x2 );
10764 goto decode_success;
10765 }
10766
10767 /* 66 0F 5C = SUBPD -- sub 64Fx2 from R/M to R */
10768 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x5C) {
10769 delta = dis_SSE_E_to_G_all( sorb, delta+2, "subpd", Iop_Sub64Fx2 );
10770 goto decode_success;
10771 }
10772
10773 /* F2 0F 5C = SUBSD -- sub 64F0x2 from R/M to R */
10774 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x5C) {
10775 vassert(sz == 4);
10776 delta = dis_SSE_E_to_G_lo64( sorb, delta+3, "subsd", Iop_Sub64F0x2 );
10777 goto decode_success;
10778 }
10779
10780 /* 66 0F 15 = UNPCKHPD -- unpack and interleave high part F64s */
10781 /* 66 0F 14 = UNPCKLPD -- unpack and interleave low part F64s */
10782 /* These just appear to be special cases of SHUFPS */
10783 if (sz == 2 && insn[0] == 0x0F && (insn[1] == 0x15 || insn[1] == 0x14)) {
10784 IRTemp s1 = newTemp(Ity_I64);
10785 IRTemp s0 = newTemp(Ity_I64);
10786 IRTemp d1 = newTemp(Ity_I64);
10787 IRTemp d0 = newTemp(Ity_I64);
10788 IRTemp sV = newTemp(Ity_V128);
10789 IRTemp dV = newTemp(Ity_V128);
10790 Bool hi = toBool(insn[1] == 0x15);
10791
10792 modrm = insn[2];
10793 assign( dV, getXMMReg(gregOfRM(modrm)) );
10794
10795 if (epartIsReg(modrm)) {
10796 assign( sV, getXMMReg(eregOfRM(modrm)) );
10797 delta += 2+1;
10798 DIP("unpck%sps %s,%s\n", hi ? "h" : "l",
10799 nameXMMReg(eregOfRM(modrm)),
10800 nameXMMReg(gregOfRM(modrm)));
10801 } else {
10802 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
10803 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
10804 delta += 2+alen;
10805 DIP("unpck%sps %s,%s\n", hi ? "h" : "l",
10806 dis_buf,
10807 nameXMMReg(gregOfRM(modrm)));
10808 }
10809
10810 assign( d1, unop(Iop_V128HIto64, mkexpr(dV)) );
10811 assign( d0, unop(Iop_V128to64, mkexpr(dV)) );
10812 assign( s1, unop(Iop_V128HIto64, mkexpr(sV)) );
10813 assign( s0, unop(Iop_V128to64, mkexpr(sV)) );
10814
10815 if (hi) {
10816 putXMMReg( gregOfRM(modrm),
10817 binop(Iop_64HLtoV128, mkexpr(s1), mkexpr(d1)) );
10818 } else {
10819 putXMMReg( gregOfRM(modrm),
10820 binop(Iop_64HLtoV128, mkexpr(s0), mkexpr(d0)) );
10821 }
10822
10823 goto decode_success;
10824 }
10825
10826 /* 66 0F 57 = XORPD -- G = G and E */
10827 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x57) {
10828 delta = dis_SSE_E_to_G_all( sorb, delta+2, "xorpd", Iop_XorV128 );
10829 goto decode_success;
10830 }
10831
10832 /* 66 0F 6B = PACKSSDW */
10833 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x6B) {
10834 delta = dis_SSEint_E_to_G( sorb, delta+2,
10835 "packssdw",
10836 Iop_QNarrowBin32Sto16Sx8, True );
10837 goto decode_success;
10838 }
10839
10840 /* 66 0F 63 = PACKSSWB */
10841 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x63) {
10842 delta = dis_SSEint_E_to_G( sorb, delta+2,
10843 "packsswb",
10844 Iop_QNarrowBin16Sto8Sx16, True );
10845 goto decode_success;
10846 }
10847
10848 /* 66 0F 67 = PACKUSWB */
10849 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x67) {
10850 delta = dis_SSEint_E_to_G( sorb, delta+2,
10851 "packuswb",
10852 Iop_QNarrowBin16Sto8Ux16, True );
10853 goto decode_success;
10854 }
10855
10856 /* 66 0F FC = PADDB */
10857 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xFC) {
10858 delta = dis_SSEint_E_to_G( sorb, delta+2,
10859 "paddb", Iop_Add8x16, False );
10860 goto decode_success;
10861 }
10862
10863 /* 66 0F FE = PADDD */
10864 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xFE) {
10865 delta = dis_SSEint_E_to_G( sorb, delta+2,
10866 "paddd", Iop_Add32x4, False );
10867 goto decode_success;
10868 }
10869
10870 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */
10871 /* 0F D4 = PADDQ -- add 64x1 */
10872 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xD4) {
10873 do_MMX_preamble();
10874 delta = dis_MMXop_regmem_to_reg (
10875 sorb, delta+2, insn[1], "paddq", False );
10876 goto decode_success;
10877 }
10878
10879 /* 66 0F D4 = PADDQ */
10880 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD4) {
10881 delta = dis_SSEint_E_to_G( sorb, delta+2,
10882 "paddq", Iop_Add64x2, False );
10883 goto decode_success;
10884 }
10885
10886 /* 66 0F FD = PADDW */
10887 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xFD) {
10888 delta = dis_SSEint_E_to_G( sorb, delta+2,
10889 "paddw", Iop_Add16x8, False );
10890 goto decode_success;
10891 }
10892
10893 /* 66 0F EC = PADDSB */
10894 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xEC) {
10895 delta = dis_SSEint_E_to_G( sorb, delta+2,
10896 "paddsb", Iop_QAdd8Sx16, False );
10897 goto decode_success;
10898 }
10899
10900 /* 66 0F ED = PADDSW */
10901 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xED) {
10902 delta = dis_SSEint_E_to_G( sorb, delta+2,
10903 "paddsw", Iop_QAdd16Sx8, False );
10904 goto decode_success;
10905 }
10906
10907 /* 66 0F DC = PADDUSB */
10908 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xDC) {
10909 delta = dis_SSEint_E_to_G( sorb, delta+2,
10910 "paddusb", Iop_QAdd8Ux16, False );
10911 goto decode_success;
10912 }
10913
10914 /* 66 0F DD = PADDUSW */
10915 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xDD) {
10916 delta = dis_SSEint_E_to_G( sorb, delta+2,
10917 "paddusw", Iop_QAdd16Ux8, False );
10918 goto decode_success;
10919 }
10920
10921 /* 66 0F DB = PAND */
10922 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xDB) {
10923 delta = dis_SSE_E_to_G_all( sorb, delta+2, "pand", Iop_AndV128 );
10924 goto decode_success;
10925 }
10926
10927 /* 66 0F DF = PANDN */
10928 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xDF) {
10929 delta = dis_SSE_E_to_G_all_invG( sorb, delta+2, "pandn", Iop_AndV128 );
10930 goto decode_success;
10931 }
10932
10933 /* 66 0F E0 = PAVGB */
10934 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE0) {
10935 delta = dis_SSEint_E_to_G( sorb, delta+2,
10936 "pavgb", Iop_Avg8Ux16, False );
10937 goto decode_success;
10938 }
10939
10940 /* 66 0F E3 = PAVGW */
10941 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE3) {
10942 delta = dis_SSEint_E_to_G( sorb, delta+2,
10943 "pavgw", Iop_Avg16Ux8, False );
10944 goto decode_success;
10945 }
10946
10947 /* 66 0F 74 = PCMPEQB */
10948 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x74) {
10949 delta = dis_SSEint_E_to_G( sorb, delta+2,
10950 "pcmpeqb", Iop_CmpEQ8x16, False );
10951 goto decode_success;
10952 }
10953
10954 /* 66 0F 76 = PCMPEQD */
10955 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x76) {
10956 delta = dis_SSEint_E_to_G( sorb, delta+2,
10957 "pcmpeqd", Iop_CmpEQ32x4, False );
10958 goto decode_success;
10959 }
10960
10961 /* 66 0F 75 = PCMPEQW */
10962 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x75) {
10963 delta = dis_SSEint_E_to_G( sorb, delta+2,
10964 "pcmpeqw", Iop_CmpEQ16x8, False );
10965 goto decode_success;
10966 }
10967
10968 /* 66 0F 64 = PCMPGTB */
10969 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x64) {
10970 delta = dis_SSEint_E_to_G( sorb, delta+2,
10971 "pcmpgtb", Iop_CmpGT8Sx16, False );
10972 goto decode_success;
10973 }
10974
10975 /* 66 0F 66 = PCMPGTD */
10976 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x66) {
10977 delta = dis_SSEint_E_to_G( sorb, delta+2,
10978 "pcmpgtd", Iop_CmpGT32Sx4, False );
10979 goto decode_success;
10980 }
10981
10982 /* 66 0F 65 = PCMPGTW */
10983 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x65) {
10984 delta = dis_SSEint_E_to_G( sorb, delta+2,
10985 "pcmpgtw", Iop_CmpGT16Sx8, False );
10986 goto decode_success;
10987 }
10988
10989 /* 66 0F C5 = PEXTRW -- extract 16-bit field from xmm(E) and put
10990 zero-extend of it in ireg(G). */
10991 if (insn[0] == 0x0F && insn[1] == 0xC5) {
10992 modrm = insn[2];
10993 if (sz == 2 && epartIsReg(modrm)) {
10994 t5 = newTemp(Ity_V128);
10995 t4 = newTemp(Ity_I16);
10996 assign(t5, getXMMReg(eregOfRM(modrm)));
10997 breakup128to32s( t5, &t3, &t2, &t1, &t0 );
10998 switch (insn[3] & 7) {
10999 case 0: assign(t4, unop(Iop_32to16, mkexpr(t0))); break;
11000 case 1: assign(t4, unop(Iop_32HIto16, mkexpr(t0))); break;
11001 case 2: assign(t4, unop(Iop_32to16, mkexpr(t1))); break;
11002 case 3: assign(t4, unop(Iop_32HIto16, mkexpr(t1))); break;
11003 case 4: assign(t4, unop(Iop_32to16, mkexpr(t2))); break;
11004 case 5: assign(t4, unop(Iop_32HIto16, mkexpr(t2))); break;
11005 case 6: assign(t4, unop(Iop_32to16, mkexpr(t3))); break;
11006 case 7: assign(t4, unop(Iop_32HIto16, mkexpr(t3))); break;
11007 default: vassert(0); /*NOTREACHED*/
11008 }
11009 putIReg(4, gregOfRM(modrm), unop(Iop_16Uto32, mkexpr(t4)));
11010 DIP("pextrw $%d,%s,%s\n",
11011 (Int)insn[3], nameXMMReg(eregOfRM(modrm)),
11012 nameIReg(4,gregOfRM(modrm)));
11013 delta += 4;
11014 goto decode_success;
11015 }
11016 /* else fall through */
11017 }
11018
11019 /* 66 0F C4 = PINSRW -- get 16 bits from E(mem or low half ireg) and
11020 put it into the specified lane of xmm(G). */
11021 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xC4) {
11022 Int lane;
11023 t4 = newTemp(Ity_I16);
11024 modrm = insn[2];
11025
11026 if (epartIsReg(modrm)) {
11027 assign(t4, getIReg(2, eregOfRM(modrm)));
11028 delta += 3+1;
11029 lane = insn[3+1-1];
11030 DIP("pinsrw $%d,%s,%s\n", lane,
11031 nameIReg(2,eregOfRM(modrm)),
11032 nameXMMReg(gregOfRM(modrm)));
11033 } else {
11034 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
11035 delta += 3+alen;
11036 lane = insn[3+alen-1];
11037 assign(t4, loadLE(Ity_I16, mkexpr(addr)));
11038 DIP("pinsrw $%d,%s,%s\n", lane,
11039 dis_buf,
11040 nameXMMReg(gregOfRM(modrm)));
11041 }
11042
11043 putXMMRegLane16( gregOfRM(modrm), lane & 7, mkexpr(t4) );
11044 goto decode_success;
11045 }
11046
11047 /* 66 0F F5 = PMADDWD -- Multiply and add packed integers from
11048 E(xmm or mem) to G(xmm) */
11049 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF5) {
11050 IRTemp s1V = newTemp(Ity_V128);
11051 IRTemp s2V = newTemp(Ity_V128);
11052 IRTemp dV = newTemp(Ity_V128);
11053 IRTemp s1Hi = newTemp(Ity_I64);
11054 IRTemp s1Lo = newTemp(Ity_I64);
11055 IRTemp s2Hi = newTemp(Ity_I64);
11056 IRTemp s2Lo = newTemp(Ity_I64);
11057 IRTemp dHi = newTemp(Ity_I64);
11058 IRTemp dLo = newTemp(Ity_I64);
11059 modrm = insn[2];
11060 if (epartIsReg(modrm)) {
11061 assign( s1V, getXMMReg(eregOfRM(modrm)) );
11062 delta += 2+1;
11063 DIP("pmaddwd %s,%s\n", nameXMMReg(eregOfRM(modrm)),
11064 nameXMMReg(gregOfRM(modrm)));
11065 } else {
11066 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
11067 assign( s1V, loadLE(Ity_V128, mkexpr(addr)) );
11068 delta += 2+alen;
11069 DIP("pmaddwd %s,%s\n", dis_buf,
11070 nameXMMReg(gregOfRM(modrm)));
11071 }
11072 assign( s2V, getXMMReg(gregOfRM(modrm)) );
11073 assign( s1Hi, unop(Iop_V128HIto64, mkexpr(s1V)) );
11074 assign( s1Lo, unop(Iop_V128to64, mkexpr(s1V)) );
11075 assign( s2Hi, unop(Iop_V128HIto64, mkexpr(s2V)) );
11076 assign( s2Lo, unop(Iop_V128to64, mkexpr(s2V)) );
11077 assign( dHi, mkIRExprCCall(
11078 Ity_I64, 0/*regparms*/,
11079 "x86g_calculate_mmx_pmaddwd",
11080 &x86g_calculate_mmx_pmaddwd,
11081 mkIRExprVec_2( mkexpr(s1Hi), mkexpr(s2Hi))
11082 ));
11083 assign( dLo, mkIRExprCCall(
11084 Ity_I64, 0/*regparms*/,
11085 "x86g_calculate_mmx_pmaddwd",
11086 &x86g_calculate_mmx_pmaddwd,
11087 mkIRExprVec_2( mkexpr(s1Lo), mkexpr(s2Lo))
11088 ));
11089 assign( dV, binop(Iop_64HLtoV128, mkexpr(dHi), mkexpr(dLo))) ;
11090 putXMMReg(gregOfRM(modrm), mkexpr(dV));
11091 goto decode_success;
11092 }
11093
11094 /* 66 0F EE = PMAXSW -- 16x8 signed max */
11095 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xEE) {
11096 delta = dis_SSEint_E_to_G( sorb, delta+2,
11097 "pmaxsw", Iop_Max16Sx8, False );
11098 goto decode_success;
11099 }
11100
11101 /* 66 0F DE = PMAXUB -- 8x16 unsigned max */
11102 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xDE) {
11103 delta = dis_SSEint_E_to_G( sorb, delta+2,
11104 "pmaxub", Iop_Max8Ux16, False );
11105 goto decode_success;
11106 }
11107
11108 /* 66 0F EA = PMINSW -- 16x8 signed min */
11109 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xEA) {
11110 delta = dis_SSEint_E_to_G( sorb, delta+2,
11111 "pminsw", Iop_Min16Sx8, False );
11112 goto decode_success;
11113 }
11114
11115 /* 66 0F DA = PMINUB -- 8x16 unsigned min */
11116 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xDA) {
11117 delta = dis_SSEint_E_to_G( sorb, delta+2,
11118 "pminub", Iop_Min8Ux16, False );
11119 goto decode_success;
11120 }
11121
11122 /* 66 0F D7 = PMOVMSKB -- extract sign bits from each of 16 lanes
11123 in xmm(E), turn them into a byte, and put zero-extend of it in
11124 ireg(G). */
11125 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD7) {
11126 modrm = insn[2];
11127 if (epartIsReg(modrm)) {
11128 t0 = newTemp(Ity_I64);
11129 t1 = newTemp(Ity_I64);
11130 assign(t0, getXMMRegLane64(eregOfRM(modrm), 0));
11131 assign(t1, getXMMRegLane64(eregOfRM(modrm), 1));
11132 t5 = newTemp(Ity_I32);
11133 assign(t5,
11134 unop(Iop_16Uto32,
11135 binop(Iop_8HLto16,
11136 unop(Iop_GetMSBs8x8, mkexpr(t1)),
11137 unop(Iop_GetMSBs8x8, mkexpr(t0)))));
11138 putIReg(4, gregOfRM(modrm), mkexpr(t5));
11139 DIP("pmovmskb %s,%s\n", nameXMMReg(eregOfRM(modrm)),
11140 nameIReg(4,gregOfRM(modrm)));
11141 delta += 3;
11142 goto decode_success;
11143 }
11144 /* else fall through */
11145 }
11146
11147 /* 66 0F E4 = PMULHUW -- 16x8 hi-half of unsigned widening multiply */
11148 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE4) {
11149 delta = dis_SSEint_E_to_G( sorb, delta+2,
11150 "pmulhuw", Iop_MulHi16Ux8, False );
11151 goto decode_success;
11152 }
11153
11154 /* 66 0F E5 = PMULHW -- 16x8 hi-half of signed widening multiply */
11155 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE5) {
11156 delta = dis_SSEint_E_to_G( sorb, delta+2,
11157 "pmulhw", Iop_MulHi16Sx8, False );
11158 goto decode_success;
11159 }
11160
11161 /* 66 0F D5 = PMULHL -- 16x8 multiply */
11162 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD5) {
11163 delta = dis_SSEint_E_to_G( sorb, delta+2,
11164 "pmullw", Iop_Mul16x8, False );
11165 goto decode_success;
11166 }
11167
11168 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */
11169 /* 0F F4 = PMULUDQ -- unsigned widening multiply of 32-lanes 0 x
11170 0 to form 64-bit result */
11171 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xF4) {
11172 IRTemp sV = newTemp(Ity_I64);
11173 IRTemp dV = newTemp(Ity_I64);
11174 t1 = newTemp(Ity_I32);
11175 t0 = newTemp(Ity_I32);
11176 modrm = insn[2];
11177
11178 do_MMX_preamble();
11179 assign( dV, getMMXReg(gregOfRM(modrm)) );
11180
11181 if (epartIsReg(modrm)) {
11182 assign( sV, getMMXReg(eregOfRM(modrm)) );
11183 delta += 2+1;
11184 DIP("pmuludq %s,%s\n", nameMMXReg(eregOfRM(modrm)),
11185 nameMMXReg(gregOfRM(modrm)));
11186 } else {
11187 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
11188 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
11189 delta += 2+alen;
11190 DIP("pmuludq %s,%s\n", dis_buf,
11191 nameMMXReg(gregOfRM(modrm)));
11192 }
11193
11194 assign( t0, unop(Iop_64to32, mkexpr(dV)) );
11195 assign( t1, unop(Iop_64to32, mkexpr(sV)) );
11196 putMMXReg( gregOfRM(modrm),
11197 binop( Iop_MullU32, mkexpr(t0), mkexpr(t1) ) );
11198 goto decode_success;
11199 }
11200
11201 /* 66 0F F4 = PMULUDQ -- unsigned widening multiply of 32-lanes 0 x
11202 0 to form lower 64-bit half and lanes 2 x 2 to form upper 64-bit
11203 half */
11204 /* This is a really poor translation -- could be improved if
11205 performance critical */
11206 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF4) {
11207 IRTemp sV, dV;
11208 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
11209 sV = newTemp(Ity_V128);
11210 dV = newTemp(Ity_V128);
11211 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
11212 t1 = newTemp(Ity_I64);
11213 t0 = newTemp(Ity_I64);
11214 modrm = insn[2];
11215 assign( dV, getXMMReg(gregOfRM(modrm)) );
11216
11217 if (epartIsReg(modrm)) {
11218 assign( sV, getXMMReg(eregOfRM(modrm)) );
11219 delta += 2+1;
11220 DIP("pmuludq %s,%s\n", nameXMMReg(eregOfRM(modrm)),
11221 nameXMMReg(gregOfRM(modrm)));
11222 } else {
11223 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
11224 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
11225 delta += 2+alen;
11226 DIP("pmuludq %s,%s\n", dis_buf,
11227 nameXMMReg(gregOfRM(modrm)));
11228 }
11229
11230 breakup128to32s( dV, &d3, &d2, &d1, &d0 );
11231 breakup128to32s( sV, &s3, &s2, &s1, &s0 );
11232
11233 assign( t0, binop( Iop_MullU32, mkexpr(d0), mkexpr(s0)) );
11234 putXMMRegLane64( gregOfRM(modrm), 0, mkexpr(t0) );
11235 assign( t1, binop( Iop_MullU32, mkexpr(d2), mkexpr(s2)) );
11236 putXMMRegLane64( gregOfRM(modrm), 1, mkexpr(t1) );
11237 goto decode_success;
11238 }
11239
11240 /* 66 0F EB = POR */
11241 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xEB) {
11242 delta = dis_SSE_E_to_G_all( sorb, delta+2, "por", Iop_OrV128 );
11243 goto decode_success;
11244 }
11245
11246 /* 66 0F F6 = PSADBW -- 2 x (8x8 -> 48 zeroes ++ u16) Sum Abs Diffs
11247 from E(xmm or mem) to G(xmm) */
11248 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF6) {
11249 IRTemp s1V = newTemp(Ity_V128);
11250 IRTemp s2V = newTemp(Ity_V128);
11251 IRTemp dV = newTemp(Ity_V128);
11252 IRTemp s1Hi = newTemp(Ity_I64);
11253 IRTemp s1Lo = newTemp(Ity_I64);
11254 IRTemp s2Hi = newTemp(Ity_I64);
11255 IRTemp s2Lo = newTemp(Ity_I64);
11256 IRTemp dHi = newTemp(Ity_I64);
11257 IRTemp dLo = newTemp(Ity_I64);
11258 modrm = insn[2];
11259 if (epartIsReg(modrm)) {
11260 assign( s1V, getXMMReg(eregOfRM(modrm)) );
11261 delta += 2+1;
11262 DIP("psadbw %s,%s\n", nameXMMReg(eregOfRM(modrm)),
11263 nameXMMReg(gregOfRM(modrm)));
11264 } else {
11265 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
11266 assign( s1V, loadLE(Ity_V128, mkexpr(addr)) );
11267 delta += 2+alen;
11268 DIP("psadbw %s,%s\n", dis_buf,
11269 nameXMMReg(gregOfRM(modrm)));
11270 }
11271 assign( s2V, getXMMReg(gregOfRM(modrm)) );
11272 assign( s1Hi, unop(Iop_V128HIto64, mkexpr(s1V)) );
11273 assign( s1Lo, unop(Iop_V128to64, mkexpr(s1V)) );
11274 assign( s2Hi, unop(Iop_V128HIto64, mkexpr(s2V)) );
11275 assign( s2Lo, unop(Iop_V128to64, mkexpr(s2V)) );
11276 assign( dHi, mkIRExprCCall(
11277 Ity_I64, 0/*regparms*/,
11278 "x86g_calculate_mmx_psadbw",
11279 &x86g_calculate_mmx_psadbw,
11280 mkIRExprVec_2( mkexpr(s1Hi), mkexpr(s2Hi))
11281 ));
11282 assign( dLo, mkIRExprCCall(
11283 Ity_I64, 0/*regparms*/,
11284 "x86g_calculate_mmx_psadbw",
11285 &x86g_calculate_mmx_psadbw,
11286 mkIRExprVec_2( mkexpr(s1Lo), mkexpr(s2Lo))
11287 ));
11288 assign( dV, binop(Iop_64HLtoV128, mkexpr(dHi), mkexpr(dLo))) ;
11289 putXMMReg(gregOfRM(modrm), mkexpr(dV));
11290 goto decode_success;
11291 }
11292
11293 /* 66 0F 70 = PSHUFD -- rearrange 4x32 from E(xmm or mem) to G(xmm) */
11294 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x70) {
11295 Int order;
11296 IRTemp sV, dV, s3, s2, s1, s0;
11297 s3 = s2 = s1 = s0 = IRTemp_INVALID;
11298 sV = newTemp(Ity_V128);
11299 dV = newTemp(Ity_V128);
11300 modrm = insn[2];
11301 if (epartIsReg(modrm)) {
11302 assign( sV, getXMMReg(eregOfRM(modrm)) );
11303 order = (Int)insn[3];
11304 delta += 2+2;
11305 DIP("pshufd $%d,%s,%s\n", order,
11306 nameXMMReg(eregOfRM(modrm)),
11307 nameXMMReg(gregOfRM(modrm)));
11308 } else {
11309 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
11310 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
11311 order = (Int)insn[2+alen];
11312 delta += 3+alen;
11313 DIP("pshufd $%d,%s,%s\n", order,
11314 dis_buf,
11315 nameXMMReg(gregOfRM(modrm)));
11316 }
11317 breakup128to32s( sV, &s3, &s2, &s1, &s0 );
11318
11319 # define SEL(n) \
11320 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
11321 assign(dV,
11322 mk128from32s( SEL((order>>6)&3), SEL((order>>4)&3),
11323 SEL((order>>2)&3), SEL((order>>0)&3) )
11324 );
11325 putXMMReg(gregOfRM(modrm), mkexpr(dV));
11326 # undef SEL
11327 goto decode_success;
11328 }
11329
11330 /* F3 0F 70 = PSHUFHW -- rearrange upper half 4x16 from E(xmm or
11331 mem) to G(xmm), and copy lower half */
11332 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x70) {
11333 Int order;
11334 IRTemp sVhi, dVhi, sV, dV, s3, s2, s1, s0;
11335 s3 = s2 = s1 = s0 = IRTemp_INVALID;
11336 sV = newTemp(Ity_V128);
11337 dV = newTemp(Ity_V128);
11338 sVhi = newTemp(Ity_I64);
11339 dVhi = newTemp(Ity_I64);
11340 modrm = insn[3];
11341 if (epartIsReg(modrm)) {
11342 assign( sV, getXMMReg(eregOfRM(modrm)) );
11343 order = (Int)insn[4];
11344 delta += 4+1;
11345 DIP("pshufhw $%d,%s,%s\n", order,
11346 nameXMMReg(eregOfRM(modrm)),
11347 nameXMMReg(gregOfRM(modrm)));
11348 } else {
11349 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
11350 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
11351 order = (Int)insn[3+alen];
11352 delta += 4+alen;
11353 DIP("pshufhw $%d,%s,%s\n", order,
11354 dis_buf,
11355 nameXMMReg(gregOfRM(modrm)));
11356 }
11357 assign( sVhi, unop(Iop_V128HIto64, mkexpr(sV)) );
11358 breakup64to16s( sVhi, &s3, &s2, &s1, &s0 );
11359
11360 # define SEL(n) \
11361 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
11362 assign(dVhi,
11363 mk64from16s( SEL((order>>6)&3), SEL((order>>4)&3),
11364 SEL((order>>2)&3), SEL((order>>0)&3) )
11365 );
11366 assign(dV, binop( Iop_64HLtoV128,
11367 mkexpr(dVhi),
11368 unop(Iop_V128to64, mkexpr(sV))) );
11369 putXMMReg(gregOfRM(modrm), mkexpr(dV));
11370 # undef SEL
11371 goto decode_success;
11372 }
11373
11374 /* F2 0F 70 = PSHUFLW -- rearrange lower half 4x16 from E(xmm or
11375 mem) to G(xmm), and copy upper half */
11376 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x70) {
11377 Int order;
11378 IRTemp sVlo, dVlo, sV, dV, s3, s2, s1, s0;
11379 s3 = s2 = s1 = s0 = IRTemp_INVALID;
11380 sV = newTemp(Ity_V128);
11381 dV = newTemp(Ity_V128);
11382 sVlo = newTemp(Ity_I64);
11383 dVlo = newTemp(Ity_I64);
11384 modrm = insn[3];
11385 if (epartIsReg(modrm)) {
11386 assign( sV, getXMMReg(eregOfRM(modrm)) );
11387 order = (Int)insn[4];
11388 delta += 4+1;
11389 DIP("pshuflw $%d,%s,%s\n", order,
11390 nameXMMReg(eregOfRM(modrm)),
11391 nameXMMReg(gregOfRM(modrm)));
11392 } else {
11393 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
11394 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
11395 order = (Int)insn[3+alen];
11396 delta += 4+alen;
11397 DIP("pshuflw $%d,%s,%s\n", order,
11398 dis_buf,
11399 nameXMMReg(gregOfRM(modrm)));
11400 }
11401 assign( sVlo, unop(Iop_V128to64, mkexpr(sV)) );
11402 breakup64to16s( sVlo, &s3, &s2, &s1, &s0 );
11403
11404 # define SEL(n) \
11405 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
11406 assign(dVlo,
11407 mk64from16s( SEL((order>>6)&3), SEL((order>>4)&3),
11408 SEL((order>>2)&3), SEL((order>>0)&3) )
11409 );
11410 assign(dV, binop( Iop_64HLtoV128,
11411 unop(Iop_V128HIto64, mkexpr(sV)),
11412 mkexpr(dVlo) ) );
11413 putXMMReg(gregOfRM(modrm), mkexpr(dV));
11414 # undef SEL
11415 goto decode_success;
11416 }
11417
11418 /* 66 0F 72 /6 ib = PSLLD by immediate */
11419 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x72
11420 && epartIsReg(insn[2])
11421 && gregOfRM(insn[2]) == 6) {
11422 delta = dis_SSE_shiftE_imm( delta+2, "pslld", Iop_ShlN32x4 );
11423 goto decode_success;
11424 }
11425
11426 /* 66 0F F2 = PSLLD by E */
11427 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF2) {
11428 delta = dis_SSE_shiftG_byE( sorb, delta+2, "pslld", Iop_ShlN32x4 );
11429 goto decode_success;
11430 }
11431
11432 /* 66 0F 73 /7 ib = PSLLDQ by immediate */
11433 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x73
11434 && epartIsReg(insn[2])
11435 && gregOfRM(insn[2]) == 7) {
11436 IRTemp sV, dV, hi64, lo64, hi64r, lo64r;
11437 Int imm = (Int)insn[3];
11438 Int reg = eregOfRM(insn[2]);
11439 DIP("pslldq $%d,%s\n", imm, nameXMMReg(reg));
11440 vassert(imm >= 0 && imm <= 255);
11441 delta += 4;
11442
11443 sV = newTemp(Ity_V128);
11444 dV = newTemp(Ity_V128);
11445 hi64 = newTemp(Ity_I64);
11446 lo64 = newTemp(Ity_I64);
11447 hi64r = newTemp(Ity_I64);
11448 lo64r = newTemp(Ity_I64);
11449
11450 if (imm >= 16) {
11451 putXMMReg(reg, mkV128(0x0000));
11452 goto decode_success;
11453 }
11454
11455 assign( sV, getXMMReg(reg) );
11456 assign( hi64, unop(Iop_V128HIto64, mkexpr(sV)) );
11457 assign( lo64, unop(Iop_V128to64, mkexpr(sV)) );
11458
11459 if (imm == 0) {
11460 assign( lo64r, mkexpr(lo64) );
11461 assign( hi64r, mkexpr(hi64) );
11462 }
11463 else
11464 if (imm == 8) {
11465 assign( lo64r, mkU64(0) );
11466 assign( hi64r, mkexpr(lo64) );
11467 }
11468 else
11469 if (imm > 8) {
11470 assign( lo64r, mkU64(0) );
11471 assign( hi64r, binop( Iop_Shl64,
11472 mkexpr(lo64),
11473 mkU8( 8*(imm-8) ) ));
11474 } else {
11475 assign( lo64r, binop( Iop_Shl64,
11476 mkexpr(lo64),
11477 mkU8(8 * imm) ));
11478 assign( hi64r,
11479 binop( Iop_Or64,
11480 binop(Iop_Shl64, mkexpr(hi64),
11481 mkU8(8 * imm)),
11482 binop(Iop_Shr64, mkexpr(lo64),
11483 mkU8(8 * (8 - imm)) )
11484 )
11485 );
11486 }
11487 assign( dV, binop(Iop_64HLtoV128, mkexpr(hi64r), mkexpr(lo64r)) );
11488 putXMMReg(reg, mkexpr(dV));
11489 goto decode_success;
11490 }
11491
11492 /* 66 0F 73 /6 ib = PSLLQ by immediate */
11493 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x73
11494 && epartIsReg(insn[2])
11495 && gregOfRM(insn[2]) == 6) {
11496 delta = dis_SSE_shiftE_imm( delta+2, "psllq", Iop_ShlN64x2 );
11497 goto decode_success;
11498 }
11499
11500 /* 66 0F F3 = PSLLQ by E */
11501 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF3) {
11502 delta = dis_SSE_shiftG_byE( sorb, delta+2, "psllq", Iop_ShlN64x2 );
11503 goto decode_success;
11504 }
11505
11506 /* 66 0F 71 /6 ib = PSLLW by immediate */
11507 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x71
11508 && epartIsReg(insn[2])
11509 && gregOfRM(insn[2]) == 6) {
11510 delta = dis_SSE_shiftE_imm( delta+2, "psllw", Iop_ShlN16x8 );
11511 goto decode_success;
11512 }
11513
11514 /* 66 0F F1 = PSLLW by E */
11515 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF1) {
11516 delta = dis_SSE_shiftG_byE( sorb, delta+2, "psllw", Iop_ShlN16x8 );
11517 goto decode_success;
11518 }
11519
11520 /* 66 0F 72 /4 ib = PSRAD by immediate */
11521 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x72
11522 && epartIsReg(insn[2])
11523 && gregOfRM(insn[2]) == 4) {
11524 delta = dis_SSE_shiftE_imm( delta+2, "psrad", Iop_SarN32x4 );
11525 goto decode_success;
11526 }
11527
11528 /* 66 0F E2 = PSRAD by E */
11529 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE2) {
11530 delta = dis_SSE_shiftG_byE( sorb, delta+2, "psrad", Iop_SarN32x4 );
11531 goto decode_success;
11532 }
11533
11534 /* 66 0F 71 /4 ib = PSRAW by immediate */
11535 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x71
11536 && epartIsReg(insn[2])
11537 && gregOfRM(insn[2]) == 4) {
11538 delta = dis_SSE_shiftE_imm( delta+2, "psraw", Iop_SarN16x8 );
11539 goto decode_success;
11540 }
11541
11542 /* 66 0F E1 = PSRAW by E */
11543 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE1) {
11544 delta = dis_SSE_shiftG_byE( sorb, delta+2, "psraw", Iop_SarN16x8 );
11545 goto decode_success;
11546 }
11547
11548 /* 66 0F 72 /2 ib = PSRLD by immediate */
11549 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x72
11550 && epartIsReg(insn[2])
11551 && gregOfRM(insn[2]) == 2) {
11552 delta = dis_SSE_shiftE_imm( delta+2, "psrld", Iop_ShrN32x4 );
11553 goto decode_success;
11554 }
11555
11556 /* 66 0F D2 = PSRLD by E */
11557 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD2) {
11558 delta = dis_SSE_shiftG_byE( sorb, delta+2, "psrld", Iop_ShrN32x4 );
11559 goto decode_success;
11560 }
11561
11562 /* 66 0F 73 /3 ib = PSRLDQ by immediate */
11563 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x73
11564 && epartIsReg(insn[2])
11565 && gregOfRM(insn[2]) == 3) {
11566 IRTemp sV, dV, hi64, lo64, hi64r, lo64r;
11567 Int imm = (Int)insn[3];
11568 Int reg = eregOfRM(insn[2]);
11569 DIP("psrldq $%d,%s\n", imm, nameXMMReg(reg));
11570 vassert(imm >= 0 && imm <= 255);
11571 delta += 4;
11572
11573 sV = newTemp(Ity_V128);
11574 dV = newTemp(Ity_V128);
11575 hi64 = newTemp(Ity_I64);
11576 lo64 = newTemp(Ity_I64);
11577 hi64r = newTemp(Ity_I64);
11578 lo64r = newTemp(Ity_I64);
11579
11580 if (imm >= 16) {
11581 putXMMReg(reg, mkV128(0x0000));
11582 goto decode_success;
11583 }
11584
11585 assign( sV, getXMMReg(reg) );
11586 assign( hi64, unop(Iop_V128HIto64, mkexpr(sV)) );
11587 assign( lo64, unop(Iop_V128to64, mkexpr(sV)) );
11588
11589 if (imm == 0) {
11590 assign( lo64r, mkexpr(lo64) );
11591 assign( hi64r, mkexpr(hi64) );
11592 }
11593 else
11594 if (imm == 8) {
11595 assign( hi64r, mkU64(0) );
11596 assign( lo64r, mkexpr(hi64) );
11597 }
11598 else
11599 if (imm > 8) {
11600 assign( hi64r, mkU64(0) );
11601 assign( lo64r, binop( Iop_Shr64,
11602 mkexpr(hi64),
11603 mkU8( 8*(imm-8) ) ));
11604 } else {
11605 assign( hi64r, binop( Iop_Shr64,
11606 mkexpr(hi64),
11607 mkU8(8 * imm) ));
11608 assign( lo64r,
11609 binop( Iop_Or64,
11610 binop(Iop_Shr64, mkexpr(lo64),
11611 mkU8(8 * imm)),
11612 binop(Iop_Shl64, mkexpr(hi64),
11613 mkU8(8 * (8 - imm)) )
11614 )
11615 );
11616 }
11617
11618 assign( dV, binop(Iop_64HLtoV128, mkexpr(hi64r), mkexpr(lo64r)) );
11619 putXMMReg(reg, mkexpr(dV));
11620 goto decode_success;
11621 }
11622
11623 /* 66 0F 73 /2 ib = PSRLQ by immediate */
11624 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x73
11625 && epartIsReg(insn[2])
11626 && gregOfRM(insn[2]) == 2) {
11627 delta = dis_SSE_shiftE_imm( delta+2, "psrlq", Iop_ShrN64x2 );
11628 goto decode_success;
11629 }
11630
11631 /* 66 0F D3 = PSRLQ by E */
11632 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD3) {
11633 delta = dis_SSE_shiftG_byE( sorb, delta+2, "psrlq", Iop_ShrN64x2 );
11634 goto decode_success;
11635 }
11636
11637 /* 66 0F 71 /2 ib = PSRLW by immediate */
11638 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x71
11639 && epartIsReg(insn[2])
11640 && gregOfRM(insn[2]) == 2) {
11641 delta = dis_SSE_shiftE_imm( delta+2, "psrlw", Iop_ShrN16x8 );
11642 goto decode_success;
11643 }
11644
11645 /* 66 0F D1 = PSRLW by E */
11646 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD1) {
11647 delta = dis_SSE_shiftG_byE( sorb, delta+2, "psrlw", Iop_ShrN16x8 );
11648 goto decode_success;
11649 }
11650
11651 /* 66 0F F8 = PSUBB */
11652 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF8) {
11653 delta = dis_SSEint_E_to_G( sorb, delta+2,
11654 "psubb", Iop_Sub8x16, False );
11655 goto decode_success;
11656 }
11657
11658 /* 66 0F FA = PSUBD */
11659 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xFA) {
11660 delta = dis_SSEint_E_to_G( sorb, delta+2,
11661 "psubd", Iop_Sub32x4, False );
11662 goto decode_success;
11663 }
11664
11665 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */
11666 /* 0F FB = PSUBQ -- sub 64x1 */
11667 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xFB) {
11668 do_MMX_preamble();
11669 delta = dis_MMXop_regmem_to_reg (
11670 sorb, delta+2, insn[1], "psubq", False );
11671 goto decode_success;
11672 }
11673
11674 /* 66 0F FB = PSUBQ */
11675 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xFB) {
11676 delta = dis_SSEint_E_to_G( sorb, delta+2,
11677 "psubq", Iop_Sub64x2, False );
11678 goto decode_success;
11679 }
11680
11681 /* 66 0F F9 = PSUBW */
11682 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF9) {
11683 delta = dis_SSEint_E_to_G( sorb, delta+2,
11684 "psubw", Iop_Sub16x8, False );
11685 goto decode_success;
11686 }
11687
11688 /* 66 0F E8 = PSUBSB */
11689 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE8) {
11690 delta = dis_SSEint_E_to_G( sorb, delta+2,
11691 "psubsb", Iop_QSub8Sx16, False );
11692 goto decode_success;
11693 }
11694
11695 /* 66 0F E9 = PSUBSW */
11696 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE9) {
11697 delta = dis_SSEint_E_to_G( sorb, delta+2,
11698 "psubsw", Iop_QSub16Sx8, False );
11699 goto decode_success;
11700 }
11701
11702 /* 66 0F D8 = PSUBSB */
11703 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD8) {
11704 delta = dis_SSEint_E_to_G( sorb, delta+2,
11705 "psubusb", Iop_QSub8Ux16, False );
11706 goto decode_success;
11707 }
11708
11709 /* 66 0F D9 = PSUBSW */
11710 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD9) {
11711 delta = dis_SSEint_E_to_G( sorb, delta+2,
11712 "psubusw", Iop_QSub16Ux8, False );
11713 goto decode_success;
11714 }
11715
11716 /* 66 0F 68 = PUNPCKHBW */
11717 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x68) {
11718 delta = dis_SSEint_E_to_G( sorb, delta+2,
11719 "punpckhbw",
11720 Iop_InterleaveHI8x16, True );
11721 goto decode_success;
11722 }
11723
11724 /* 66 0F 6A = PUNPCKHDQ */
11725 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x6A) {
11726 delta = dis_SSEint_E_to_G( sorb, delta+2,
11727 "punpckhdq",
11728 Iop_InterleaveHI32x4, True );
11729 goto decode_success;
11730 }
11731
11732 /* 66 0F 6D = PUNPCKHQDQ */
11733 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x6D) {
11734 delta = dis_SSEint_E_to_G( sorb, delta+2,
11735 "punpckhqdq",
11736 Iop_InterleaveHI64x2, True );
11737 goto decode_success;
11738 }
11739
11740 /* 66 0F 69 = PUNPCKHWD */
11741 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x69) {
11742 delta = dis_SSEint_E_to_G( sorb, delta+2,
11743 "punpckhwd",
11744 Iop_InterleaveHI16x8, True );
11745 goto decode_success;
11746 }
11747
11748 /* 66 0F 60 = PUNPCKLBW */
11749 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x60) {
11750 delta = dis_SSEint_E_to_G( sorb, delta+2,
11751 "punpcklbw",
11752 Iop_InterleaveLO8x16, True );
11753 goto decode_success;
11754 }
11755
11756 /* 66 0F 62 = PUNPCKLDQ */
11757 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x62) {
11758 delta = dis_SSEint_E_to_G( sorb, delta+2,
11759 "punpckldq",
11760 Iop_InterleaveLO32x4, True );
11761 goto decode_success;
11762 }
11763
11764 /* 66 0F 6C = PUNPCKLQDQ */
11765 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x6C) {
11766 delta = dis_SSEint_E_to_G( sorb, delta+2,
11767 "punpcklqdq",
11768 Iop_InterleaveLO64x2, True );
11769 goto decode_success;
11770 }
11771
11772 /* 66 0F 61 = PUNPCKLWD */
11773 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x61) {
11774 delta = dis_SSEint_E_to_G( sorb, delta+2,
11775 "punpcklwd",
11776 Iop_InterleaveLO16x8, True );
11777 goto decode_success;
11778 }
11779
11780 /* 66 0F EF = PXOR */
11781 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xEF) {
11782 delta = dis_SSE_E_to_G_all( sorb, delta+2, "pxor", Iop_XorV128 );
11783 goto decode_success;
11784 }
11785
11786 //-- /* FXSAVE/FXRSTOR m32 -- load/store the FPU/MMX/SSE state. */
11787 //-- if (insn[0] == 0x0F && insn[1] == 0xAE
11788 //-- && (!epartIsReg(insn[2]))
11789 //-- && (gregOfRM(insn[2]) == 1 || gregOfRM(insn[2]) == 0) ) {
11790 //-- Bool store = gregOfRM(insn[2]) == 0;
11791 //-- vg_assert(sz == 4);
11792 //-- pair = disAMode ( cb, sorb, eip+2, dis_buf );
11793 //-- t1 = LOW24(pair);
11794 //-- eip += 2+HI8(pair);
11795 //-- uInstr3(cb, store ? SSE2a_MemWr : SSE2a_MemRd, 512,
11796 //-- Lit16, (((UShort)insn[0]) << 8) | (UShort)insn[1],
11797 //-- Lit16, (UShort)insn[2],
11798 //-- TempReg, t1 );
11799 //-- DIP("fx%s %s\n", store ? "save" : "rstor", dis_buf );
11800 //-- goto decode_success;
11801 //-- }
11802
11803 /* 0F AE /7 = CLFLUSH -- flush cache line */
11804 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xAE
11805 && !epartIsReg(insn[2]) && gregOfRM(insn[2]) == 7) {
11806
11807 /* This is something of a hack. We need to know the size of the
11808 cache line containing addr. Since we don't (easily), assume
11809 256 on the basis that no real cache would have a line that
11810 big. It's safe to invalidate more stuff than we need, just
11811 inefficient. */
11812 UInt lineszB = 256;
11813
11814 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
11815 delta += 2+alen;
11816
11817 /* Round addr down to the start of the containing block. */
11818 stmt( IRStmt_Put(
11819 OFFB_CMSTART,
11820 binop( Iop_And32,
11821 mkexpr(addr),
11822 mkU32( ~(lineszB-1) ))) );
11823
11824 stmt( IRStmt_Put(OFFB_CMLEN, mkU32(lineszB) ) );
11825
11826 jmp_lit(&dres, Ijk_InvalICache, (Addr32)(guest_EIP_bbstart+delta));
11827
11828 DIP("clflush %s\n", dis_buf);
11829 goto decode_success;
11830 }
11831
11832 /* ---------------------------------------------------- */
11833 /* --- end of the SSE2 decoder. --- */
11834 /* ---------------------------------------------------- */
11835
11836 /* ---------------------------------------------------- */
11837 /* --- start of the SSE3 decoder. --- */
11838 /* ---------------------------------------------------- */
11839
11840 /* Skip parts of the decoder which don't apply given the stated
11841 guest subarchitecture. */
11842 if (0 == (archinfo->hwcaps & VEX_HWCAPS_X86_SSE3))
11843 goto after_sse_decoders; /* no SSE3 capabilities */
11844
11845 insn = &guest_code[delta];
11846
11847 /* F3 0F 12 = MOVSLDUP -- move from E (mem or xmm) to G (xmm),
11848 duplicating some lanes (2:2:0:0). */
11849 /* F3 0F 16 = MOVSHDUP -- move from E (mem or xmm) to G (xmm),
11850 duplicating some lanes (3:3:1:1). */
11851 if (sz == 4 && insn[0] == 0xF3 && insn[1] == 0x0F
11852 && (insn[2] == 0x12 || insn[2] == 0x16)) {
11853 IRTemp s3, s2, s1, s0;
11854 IRTemp sV = newTemp(Ity_V128);
11855 Bool isH = insn[2] == 0x16;
11856 s3 = s2 = s1 = s0 = IRTemp_INVALID;
11857
11858 modrm = insn[3];
11859 if (epartIsReg(modrm)) {
11860 assign( sV, getXMMReg( eregOfRM(modrm)) );
11861 DIP("movs%cdup %s,%s\n", isH ? 'h' : 'l',
11862 nameXMMReg(eregOfRM(modrm)),
11863 nameXMMReg(gregOfRM(modrm)));
11864 delta += 3+1;
11865 } else {
11866 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
11867 gen_SEGV_if_not_16_aligned( addr );
11868 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
11869 DIP("movs%cdup %s,%s\n", isH ? 'h' : 'l',
11870 dis_buf,
11871 nameXMMReg(gregOfRM(modrm)));
11872 delta += 3+alen;
11873 }
11874
11875 breakup128to32s( sV, &s3, &s2, &s1, &s0 );
11876 putXMMReg( gregOfRM(modrm),
11877 isH ? mk128from32s( s3, s3, s1, s1 )
11878 : mk128from32s( s2, s2, s0, s0 ) );
11879 goto decode_success;
11880 }
11881
11882 /* F2 0F 12 = MOVDDUP -- move from E (mem or xmm) to G (xmm),
11883 duplicating some lanes (0:1:0:1). */
11884 if (sz == 4 && insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x12) {
11885 IRTemp sV = newTemp(Ity_V128);
11886 IRTemp d0 = newTemp(Ity_I64);
11887
11888 modrm = insn[3];
11889 if (epartIsReg(modrm)) {
11890 assign( sV, getXMMReg( eregOfRM(modrm)) );
11891 DIP("movddup %s,%s\n", nameXMMReg(eregOfRM(modrm)),
11892 nameXMMReg(gregOfRM(modrm)));
11893 delta += 3+1;
11894 assign ( d0, unop(Iop_V128to64, mkexpr(sV)) );
11895 } else {
11896 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
11897 assign( d0, loadLE(Ity_I64, mkexpr(addr)) );
11898 DIP("movddup %s,%s\n", dis_buf,
11899 nameXMMReg(gregOfRM(modrm)));
11900 delta += 3+alen;
11901 }
11902
11903 putXMMReg( gregOfRM(modrm), binop(Iop_64HLtoV128,mkexpr(d0),mkexpr(d0)) );
11904 goto decode_success;
11905 }
11906
11907 /* F2 0F D0 = ADDSUBPS -- 32x4 +/-/+/- from E (mem or xmm) to G (xmm). */
11908 if (sz == 4 && insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0xD0) {
11909 IRTemp a3, a2, a1, a0, s3, s2, s1, s0;
11910 IRTemp eV = newTemp(Ity_V128);
11911 IRTemp gV = newTemp(Ity_V128);
11912 IRTemp addV = newTemp(Ity_V128);
11913 IRTemp subV = newTemp(Ity_V128);
11914 IRTemp rm = newTemp(Ity_I32);
11915 a3 = a2 = a1 = a0 = s3 = s2 = s1 = s0 = IRTemp_INVALID;
11916
11917 modrm = insn[3];
11918 if (epartIsReg(modrm)) {
11919 assign( eV, getXMMReg( eregOfRM(modrm)) );
11920 DIP("addsubps %s,%s\n", nameXMMReg(eregOfRM(modrm)),
11921 nameXMMReg(gregOfRM(modrm)));
11922 delta += 3+1;
11923 } else {
11924 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
11925 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
11926 DIP("addsubps %s,%s\n", dis_buf,
11927 nameXMMReg(gregOfRM(modrm)));
11928 delta += 3+alen;
11929 }
11930
11931 assign( gV, getXMMReg(gregOfRM(modrm)) );
11932
11933 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
11934 assign( addV, triop(Iop_Add32Fx4, mkexpr(rm), mkexpr(gV), mkexpr(eV)) );
11935 assign( subV, triop(Iop_Sub32Fx4, mkexpr(rm), mkexpr(gV), mkexpr(eV)) );
11936
11937 breakup128to32s( addV, &a3, &a2, &a1, &a0 );
11938 breakup128to32s( subV, &s3, &s2, &s1, &s0 );
11939
11940 putXMMReg( gregOfRM(modrm), mk128from32s( a3, s2, a1, s0 ));
11941 goto decode_success;
11942 }
11943
11944 /* 66 0F D0 = ADDSUBPD -- 64x4 +/- from E (mem or xmm) to G (xmm). */
11945 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD0) {
11946 IRTemp eV = newTemp(Ity_V128);
11947 IRTemp gV = newTemp(Ity_V128);
11948 IRTemp addV = newTemp(Ity_V128);
11949 IRTemp subV = newTemp(Ity_V128);
11950 IRTemp a1 = newTemp(Ity_I64);
11951 IRTemp s0 = newTemp(Ity_I64);
11952 IRTemp rm = newTemp(Ity_I32);
11953
11954 modrm = insn[2];
11955 if (epartIsReg(modrm)) {
11956 assign( eV, getXMMReg( eregOfRM(modrm)) );
11957 DIP("addsubpd %s,%s\n", nameXMMReg(eregOfRM(modrm)),
11958 nameXMMReg(gregOfRM(modrm)));
11959 delta += 2+1;
11960 } else {
11961 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
11962 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
11963 DIP("addsubpd %s,%s\n", dis_buf,
11964 nameXMMReg(gregOfRM(modrm)));
11965 delta += 2+alen;
11966 }
11967
11968 assign( gV, getXMMReg(gregOfRM(modrm)) );
11969
11970 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
11971 assign( addV, triop(Iop_Add64Fx2, mkexpr(rm), mkexpr(gV), mkexpr(eV)) );
11972 assign( subV, triop(Iop_Sub64Fx2, mkexpr(rm), mkexpr(gV), mkexpr(eV)) );
11973
11974 assign( a1, unop(Iop_V128HIto64, mkexpr(addV) ));
11975 assign( s0, unop(Iop_V128to64, mkexpr(subV) ));
11976
11977 putXMMReg( gregOfRM(modrm),
11978 binop(Iop_64HLtoV128, mkexpr(a1), mkexpr(s0)) );
11979 goto decode_success;
11980 }
11981
11982 /* F2 0F 7D = HSUBPS -- 32x4 sub across from E (mem or xmm) to G (xmm). */
11983 /* F2 0F 7C = HADDPS -- 32x4 add across from E (mem or xmm) to G (xmm). */
11984 if (sz == 4 && insn[0] == 0xF2 && insn[1] == 0x0F
11985 && (insn[2] == 0x7C || insn[2] == 0x7D)) {
11986 IRTemp e3, e2, e1, e0, g3, g2, g1, g0;
11987 IRTemp eV = newTemp(Ity_V128);
11988 IRTemp gV = newTemp(Ity_V128);
11989 IRTemp leftV = newTemp(Ity_V128);
11990 IRTemp rightV = newTemp(Ity_V128);
11991 IRTemp rm = newTemp(Ity_I32);
11992 Bool isAdd = insn[2] == 0x7C;
11993 const HChar* str = isAdd ? "add" : "sub";
11994 e3 = e2 = e1 = e0 = g3 = g2 = g1 = g0 = IRTemp_INVALID;
11995
11996 modrm = insn[3];
11997 if (epartIsReg(modrm)) {
11998 assign( eV, getXMMReg( eregOfRM(modrm)) );
11999 DIP("h%sps %s,%s\n", str, nameXMMReg(eregOfRM(modrm)),
12000 nameXMMReg(gregOfRM(modrm)));
12001 delta += 3+1;
12002 } else {
12003 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12004 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
12005 DIP("h%sps %s,%s\n", str, dis_buf,
12006 nameXMMReg(gregOfRM(modrm)));
12007 delta += 3+alen;
12008 }
12009
12010 assign( gV, getXMMReg(gregOfRM(modrm)) );
12011
12012 breakup128to32s( eV, &e3, &e2, &e1, &e0 );
12013 breakup128to32s( gV, &g3, &g2, &g1, &g0 );
12014
12015 assign( leftV, mk128from32s( e2, e0, g2, g0 ) );
12016 assign( rightV, mk128from32s( e3, e1, g3, g1 ) );
12017
12018 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
12019 putXMMReg( gregOfRM(modrm),
12020 triop(isAdd ? Iop_Add32Fx4 : Iop_Sub32Fx4,
12021 mkexpr(rm), mkexpr(leftV), mkexpr(rightV) ) );
12022 goto decode_success;
12023 }
12024
12025 /* 66 0F 7D = HSUBPD -- 64x2 sub across from E (mem or xmm) to G (xmm). */
12026 /* 66 0F 7C = HADDPD -- 64x2 add across from E (mem or xmm) to G (xmm). */
12027 if (sz == 2 && insn[0] == 0x0F && (insn[1] == 0x7C || insn[1] == 0x7D)) {
12028 IRTemp e1 = newTemp(Ity_I64);
12029 IRTemp e0 = newTemp(Ity_I64);
12030 IRTemp g1 = newTemp(Ity_I64);
12031 IRTemp g0 = newTemp(Ity_I64);
12032 IRTemp eV = newTemp(Ity_V128);
12033 IRTemp gV = newTemp(Ity_V128);
12034 IRTemp leftV = newTemp(Ity_V128);
12035 IRTemp rightV = newTemp(Ity_V128);
12036 IRTemp rm = newTemp(Ity_I32);
12037 Bool isAdd = insn[1] == 0x7C;
12038 const HChar* str = isAdd ? "add" : "sub";
12039
12040 modrm = insn[2];
12041 if (epartIsReg(modrm)) {
12042 assign( eV, getXMMReg( eregOfRM(modrm)) );
12043 DIP("h%spd %s,%s\n", str, nameXMMReg(eregOfRM(modrm)),
12044 nameXMMReg(gregOfRM(modrm)));
12045 delta += 2+1;
12046 } else {
12047 addr = disAMode ( &alen, sorb, delta+2, dis_buf );
12048 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
12049 DIP("h%spd %s,%s\n", str, dis_buf,
12050 nameXMMReg(gregOfRM(modrm)));
12051 delta += 2+alen;
12052 }
12053
12054 assign( gV, getXMMReg(gregOfRM(modrm)) );
12055
12056 assign( e1, unop(Iop_V128HIto64, mkexpr(eV) ));
12057 assign( e0, unop(Iop_V128to64, mkexpr(eV) ));
12058 assign( g1, unop(Iop_V128HIto64, mkexpr(gV) ));
12059 assign( g0, unop(Iop_V128to64, mkexpr(gV) ));
12060
12061 assign( leftV, binop(Iop_64HLtoV128, mkexpr(e0),mkexpr(g0)) );
12062 assign( rightV, binop(Iop_64HLtoV128, mkexpr(e1),mkexpr(g1)) );
12063
12064 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
12065 putXMMReg( gregOfRM(modrm),
12066 triop(isAdd ? Iop_Add64Fx2 : Iop_Sub64Fx2,
12067 mkexpr(rm), mkexpr(leftV), mkexpr(rightV) ) );
12068 goto decode_success;
12069 }
12070
12071 /* F2 0F F0 = LDDQU -- move from E (mem or xmm) to G (xmm). */
12072 if (sz == 4 && insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0xF0) {
12073 modrm = getIByte(delta+3);
12074 if (epartIsReg(modrm)) {
12075 goto decode_failure;
12076 } else {
12077 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12078 putXMMReg( gregOfRM(modrm),
12079 loadLE(Ity_V128, mkexpr(addr)) );
12080 DIP("lddqu %s,%s\n", dis_buf,
12081 nameXMMReg(gregOfRM(modrm)));
12082 delta += 3+alen;
12083 }
12084 goto decode_success;
12085 }
12086
12087 /* ---------------------------------------------------- */
12088 /* --- end of the SSE3 decoder. --- */
12089 /* ---------------------------------------------------- */
12090
12091 /* ---------------------------------------------------- */
12092 /* --- start of the SSSE3 decoder. --- */
12093 /* ---------------------------------------------------- */
12094
12095 /* 0F 38 04 = PMADDUBSW -- Multiply and Add Packed Signed and
12096 Unsigned Bytes (MMX) */
12097 if (sz == 4
12098 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x04) {
12099 IRTemp sV = newTemp(Ity_I64);
12100 IRTemp dV = newTemp(Ity_I64);
12101 IRTemp sVoddsSX = newTemp(Ity_I64);
12102 IRTemp sVevensSX = newTemp(Ity_I64);
12103 IRTemp dVoddsZX = newTemp(Ity_I64);
12104 IRTemp dVevensZX = newTemp(Ity_I64);
12105
12106 modrm = insn[3];
12107 do_MMX_preamble();
12108 assign( dV, getMMXReg(gregOfRM(modrm)) );
12109
12110 if (epartIsReg(modrm)) {
12111 assign( sV, getMMXReg(eregOfRM(modrm)) );
12112 delta += 3+1;
12113 DIP("pmaddubsw %s,%s\n", nameMMXReg(eregOfRM(modrm)),
12114 nameMMXReg(gregOfRM(modrm)));
12115 } else {
12116 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12117 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
12118 delta += 3+alen;
12119 DIP("pmaddubsw %s,%s\n", dis_buf,
12120 nameMMXReg(gregOfRM(modrm)));
12121 }
12122
12123 /* compute dV unsigned x sV signed */
12124 assign( sVoddsSX,
12125 binop(Iop_SarN16x4, mkexpr(sV), mkU8(8)) );
12126 assign( sVevensSX,
12127 binop(Iop_SarN16x4,
12128 binop(Iop_ShlN16x4, mkexpr(sV), mkU8(8)),
12129 mkU8(8)) );
12130 assign( dVoddsZX,
12131 binop(Iop_ShrN16x4, mkexpr(dV), mkU8(8)) );
12132 assign( dVevensZX,
12133 binop(Iop_ShrN16x4,
12134 binop(Iop_ShlN16x4, mkexpr(dV), mkU8(8)),
12135 mkU8(8)) );
12136
12137 putMMXReg(
12138 gregOfRM(modrm),
12139 binop(Iop_QAdd16Sx4,
12140 binop(Iop_Mul16x4, mkexpr(sVoddsSX), mkexpr(dVoddsZX)),
12141 binop(Iop_Mul16x4, mkexpr(sVevensSX), mkexpr(dVevensZX))
12142 )
12143 );
12144 goto decode_success;
12145 }
12146
12147 /* 66 0F 38 04 = PMADDUBSW -- Multiply and Add Packed Signed and
12148 Unsigned Bytes (XMM) */
12149 if (sz == 2
12150 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x04) {
12151 IRTemp sV = newTemp(Ity_V128);
12152 IRTemp dV = newTemp(Ity_V128);
12153 IRTemp sVoddsSX = newTemp(Ity_V128);
12154 IRTemp sVevensSX = newTemp(Ity_V128);
12155 IRTemp dVoddsZX = newTemp(Ity_V128);
12156 IRTemp dVevensZX = newTemp(Ity_V128);
12157
12158 modrm = insn[3];
12159 assign( dV, getXMMReg(gregOfRM(modrm)) );
12160
12161 if (epartIsReg(modrm)) {
12162 assign( sV, getXMMReg(eregOfRM(modrm)) );
12163 delta += 3+1;
12164 DIP("pmaddubsw %s,%s\n", nameXMMReg(eregOfRM(modrm)),
12165 nameXMMReg(gregOfRM(modrm)));
12166 } else {
12167 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12168 gen_SEGV_if_not_16_aligned( addr );
12169 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
12170 delta += 3+alen;
12171 DIP("pmaddubsw %s,%s\n", dis_buf,
12172 nameXMMReg(gregOfRM(modrm)));
12173 }
12174
12175 /* compute dV unsigned x sV signed */
12176 assign( sVoddsSX,
12177 binop(Iop_SarN16x8, mkexpr(sV), mkU8(8)) );
12178 assign( sVevensSX,
12179 binop(Iop_SarN16x8,
12180 binop(Iop_ShlN16x8, mkexpr(sV), mkU8(8)),
12181 mkU8(8)) );
12182 assign( dVoddsZX,
12183 binop(Iop_ShrN16x8, mkexpr(dV), mkU8(8)) );
12184 assign( dVevensZX,
12185 binop(Iop_ShrN16x8,
12186 binop(Iop_ShlN16x8, mkexpr(dV), mkU8(8)),
12187 mkU8(8)) );
12188
12189 putXMMReg(
12190 gregOfRM(modrm),
12191 binop(Iop_QAdd16Sx8,
12192 binop(Iop_Mul16x8, mkexpr(sVoddsSX), mkexpr(dVoddsZX)),
12193 binop(Iop_Mul16x8, mkexpr(sVevensSX), mkexpr(dVevensZX))
12194 )
12195 );
12196 goto decode_success;
12197 }
12198
12199 /* ***--- these are MMX class insns introduced in SSSE3 ---*** */
12200 /* 0F 38 03 = PHADDSW -- 16x4 signed qadd across from E (mem or
12201 mmx) and G to G (mmx). */
12202 /* 0F 38 07 = PHSUBSW -- 16x4 signed qsub across from E (mem or
12203 mmx) and G to G (mmx). */
12204 /* 0F 38 01 = PHADDW -- 16x4 add across from E (mem or mmx) and G
12205 to G (mmx). */
12206 /* 0F 38 05 = PHSUBW -- 16x4 sub across from E (mem or mmx) and G
12207 to G (mmx). */
12208 /* 0F 38 02 = PHADDD -- 32x2 add across from E (mem or mmx) and G
12209 to G (mmx). */
12210 /* 0F 38 06 = PHSUBD -- 32x2 sub across from E (mem or mmx) and G
12211 to G (mmx). */
12212
12213 if (sz == 4
12214 && insn[0] == 0x0F && insn[1] == 0x38
12215 && (insn[2] == 0x03 || insn[2] == 0x07 || insn[2] == 0x01
12216 || insn[2] == 0x05 || insn[2] == 0x02 || insn[2] == 0x06)) {
12217 const HChar* str = "???";
12218 IROp opV64 = Iop_INVALID;
12219 IROp opCatO = Iop_CatOddLanes16x4;
12220 IROp opCatE = Iop_CatEvenLanes16x4;
12221 IRTemp sV = newTemp(Ity_I64);
12222 IRTemp dV = newTemp(Ity_I64);
12223
12224 modrm = insn[3];
12225
12226 switch (insn[2]) {
12227 case 0x03: opV64 = Iop_QAdd16Sx4; str = "addsw"; break;
12228 case 0x07: opV64 = Iop_QSub16Sx4; str = "subsw"; break;
12229 case 0x01: opV64 = Iop_Add16x4; str = "addw"; break;
12230 case 0x05: opV64 = Iop_Sub16x4; str = "subw"; break;
12231 case 0x02: opV64 = Iop_Add32x2; str = "addd"; break;
12232 case 0x06: opV64 = Iop_Sub32x2; str = "subd"; break;
12233 default: vassert(0);
12234 }
12235 if (insn[2] == 0x02 || insn[2] == 0x06) {
12236 opCatO = Iop_InterleaveHI32x2;
12237 opCatE = Iop_InterleaveLO32x2;
12238 }
12239
12240 do_MMX_preamble();
12241 assign( dV, getMMXReg(gregOfRM(modrm)) );
12242
12243 if (epartIsReg(modrm)) {
12244 assign( sV, getMMXReg(eregOfRM(modrm)) );
12245 delta += 3+1;
12246 DIP("ph%s %s,%s\n", str, nameMMXReg(eregOfRM(modrm)),
12247 nameMMXReg(gregOfRM(modrm)));
12248 } else {
12249 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12250 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
12251 delta += 3+alen;
12252 DIP("ph%s %s,%s\n", str, dis_buf,
12253 nameMMXReg(gregOfRM(modrm)));
12254 }
12255
12256 putMMXReg(
12257 gregOfRM(modrm),
12258 binop(opV64,
12259 binop(opCatE,mkexpr(sV),mkexpr(dV)),
12260 binop(opCatO,mkexpr(sV),mkexpr(dV))
12261 )
12262 );
12263 goto decode_success;
12264 }
12265
12266 /* 66 0F 38 03 = PHADDSW -- 16x8 signed qadd across from E (mem or
12267 xmm) and G to G (xmm). */
12268 /* 66 0F 38 07 = PHSUBSW -- 16x8 signed qsub across from E (mem or
12269 xmm) and G to G (xmm). */
12270 /* 66 0F 38 01 = PHADDW -- 16x8 add across from E (mem or xmm) and
12271 G to G (xmm). */
12272 /* 66 0F 38 05 = PHSUBW -- 16x8 sub across from E (mem or xmm) and
12273 G to G (xmm). */
12274 /* 66 0F 38 02 = PHADDD -- 32x4 add across from E (mem or xmm) and
12275 G to G (xmm). */
12276 /* 66 0F 38 06 = PHSUBD -- 32x4 sub across from E (mem or xmm) and
12277 G to G (xmm). */
12278
12279 if (sz == 2
12280 && insn[0] == 0x0F && insn[1] == 0x38
12281 && (insn[2] == 0x03 || insn[2] == 0x07 || insn[2] == 0x01
12282 || insn[2] == 0x05 || insn[2] == 0x02 || insn[2] == 0x06)) {
12283 const HChar* str = "???";
12284 IROp opV64 = Iop_INVALID;
12285 IROp opCatO = Iop_CatOddLanes16x4;
12286 IROp opCatE = Iop_CatEvenLanes16x4;
12287 IRTemp sV = newTemp(Ity_V128);
12288 IRTemp dV = newTemp(Ity_V128);
12289 IRTemp sHi = newTemp(Ity_I64);
12290 IRTemp sLo = newTemp(Ity_I64);
12291 IRTemp dHi = newTemp(Ity_I64);
12292 IRTemp dLo = newTemp(Ity_I64);
12293
12294 modrm = insn[3];
12295
12296 switch (insn[2]) {
12297 case 0x03: opV64 = Iop_QAdd16Sx4; str = "addsw"; break;
12298 case 0x07: opV64 = Iop_QSub16Sx4; str = "subsw"; break;
12299 case 0x01: opV64 = Iop_Add16x4; str = "addw"; break;
12300 case 0x05: opV64 = Iop_Sub16x4; str = "subw"; break;
12301 case 0x02: opV64 = Iop_Add32x2; str = "addd"; break;
12302 case 0x06: opV64 = Iop_Sub32x2; str = "subd"; break;
12303 default: vassert(0);
12304 }
12305 if (insn[2] == 0x02 || insn[2] == 0x06) {
12306 opCatO = Iop_InterleaveHI32x2;
12307 opCatE = Iop_InterleaveLO32x2;
12308 }
12309
12310 assign( dV, getXMMReg(gregOfRM(modrm)) );
12311
12312 if (epartIsReg(modrm)) {
12313 assign( sV, getXMMReg( eregOfRM(modrm)) );
12314 DIP("ph%s %s,%s\n", str, nameXMMReg(eregOfRM(modrm)),
12315 nameXMMReg(gregOfRM(modrm)));
12316 delta += 3+1;
12317 } else {
12318 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12319 gen_SEGV_if_not_16_aligned( addr );
12320 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
12321 DIP("ph%s %s,%s\n", str, dis_buf,
12322 nameXMMReg(gregOfRM(modrm)));
12323 delta += 3+alen;
12324 }
12325
12326 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
12327 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
12328 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
12329 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
12330
12331 /* This isn't a particularly efficient way to compute the
12332 result, but at least it avoids a proliferation of IROps,
12333 hence avoids complication all the backends. */
12334 putXMMReg(
12335 gregOfRM(modrm),
12336 binop(Iop_64HLtoV128,
12337 binop(opV64,
12338 binop(opCatE,mkexpr(sHi),mkexpr(sLo)),
12339 binop(opCatO,mkexpr(sHi),mkexpr(sLo))
12340 ),
12341 binop(opV64,
12342 binop(opCatE,mkexpr(dHi),mkexpr(dLo)),
12343 binop(opCatO,mkexpr(dHi),mkexpr(dLo))
12344 )
12345 )
12346 );
12347 goto decode_success;
12348 }
12349
12350 /* 0F 38 0B = PMULHRSW -- Packed Multiply High with Round and Scale
12351 (MMX) */
12352 if (sz == 4
12353 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x0B) {
12354 IRTemp sV = newTemp(Ity_I64);
12355 IRTemp dV = newTemp(Ity_I64);
12356
12357 modrm = insn[3];
12358 do_MMX_preamble();
12359 assign( dV, getMMXReg(gregOfRM(modrm)) );
12360
12361 if (epartIsReg(modrm)) {
12362 assign( sV, getMMXReg(eregOfRM(modrm)) );
12363 delta += 3+1;
12364 DIP("pmulhrsw %s,%s\n", nameMMXReg(eregOfRM(modrm)),
12365 nameMMXReg(gregOfRM(modrm)));
12366 } else {
12367 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12368 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
12369 delta += 3+alen;
12370 DIP("pmulhrsw %s,%s\n", dis_buf,
12371 nameMMXReg(gregOfRM(modrm)));
12372 }
12373
12374 putMMXReg(
12375 gregOfRM(modrm),
12376 dis_PMULHRSW_helper( mkexpr(sV), mkexpr(dV) )
12377 );
12378 goto decode_success;
12379 }
12380
12381 /* 66 0F 38 0B = PMULHRSW -- Packed Multiply High with Round and
12382 Scale (XMM) */
12383 if (sz == 2
12384 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x0B) {
12385 IRTemp sV = newTemp(Ity_V128);
12386 IRTemp dV = newTemp(Ity_V128);
12387 IRTemp sHi = newTemp(Ity_I64);
12388 IRTemp sLo = newTemp(Ity_I64);
12389 IRTemp dHi = newTemp(Ity_I64);
12390 IRTemp dLo = newTemp(Ity_I64);
12391
12392 modrm = insn[3];
12393 assign( dV, getXMMReg(gregOfRM(modrm)) );
12394
12395 if (epartIsReg(modrm)) {
12396 assign( sV, getXMMReg(eregOfRM(modrm)) );
12397 delta += 3+1;
12398 DIP("pmulhrsw %s,%s\n", nameXMMReg(eregOfRM(modrm)),
12399 nameXMMReg(gregOfRM(modrm)));
12400 } else {
12401 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12402 gen_SEGV_if_not_16_aligned( addr );
12403 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
12404 delta += 3+alen;
12405 DIP("pmulhrsw %s,%s\n", dis_buf,
12406 nameXMMReg(gregOfRM(modrm)));
12407 }
12408
12409 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
12410 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
12411 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
12412 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
12413
12414 putXMMReg(
12415 gregOfRM(modrm),
12416 binop(Iop_64HLtoV128,
12417 dis_PMULHRSW_helper( mkexpr(sHi), mkexpr(dHi) ),
12418 dis_PMULHRSW_helper( mkexpr(sLo), mkexpr(dLo) )
12419 )
12420 );
12421 goto decode_success;
12422 }
12423
12424 /* 0F 38 08 = PSIGNB -- Packed Sign 8x8 (MMX) */
12425 /* 0F 38 09 = PSIGNW -- Packed Sign 16x4 (MMX) */
12426 /* 0F 38 09 = PSIGND -- Packed Sign 32x2 (MMX) */
12427 if (sz == 4
12428 && insn[0] == 0x0F && insn[1] == 0x38
12429 && (insn[2] == 0x08 || insn[2] == 0x09 || insn[2] == 0x0A)) {
12430 IRTemp sV = newTemp(Ity_I64);
12431 IRTemp dV = newTemp(Ity_I64);
12432 const HChar* str = "???";
12433 Int laneszB = 0;
12434
12435 switch (insn[2]) {
12436 case 0x08: laneszB = 1; str = "b"; break;
12437 case 0x09: laneszB = 2; str = "w"; break;
12438 case 0x0A: laneszB = 4; str = "d"; break;
12439 default: vassert(0);
12440 }
12441
12442 modrm = insn[3];
12443 do_MMX_preamble();
12444 assign( dV, getMMXReg(gregOfRM(modrm)) );
12445
12446 if (epartIsReg(modrm)) {
12447 assign( sV, getMMXReg(eregOfRM(modrm)) );
12448 delta += 3+1;
12449 DIP("psign%s %s,%s\n", str, nameMMXReg(eregOfRM(modrm)),
12450 nameMMXReg(gregOfRM(modrm)));
12451 } else {
12452 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12453 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
12454 delta += 3+alen;
12455 DIP("psign%s %s,%s\n", str, dis_buf,
12456 nameMMXReg(gregOfRM(modrm)));
12457 }
12458
12459 putMMXReg(
12460 gregOfRM(modrm),
12461 dis_PSIGN_helper( mkexpr(sV), mkexpr(dV), laneszB )
12462 );
12463 goto decode_success;
12464 }
12465
12466 /* 66 0F 38 08 = PSIGNB -- Packed Sign 8x16 (XMM) */
12467 /* 66 0F 38 09 = PSIGNW -- Packed Sign 16x8 (XMM) */
12468 /* 66 0F 38 09 = PSIGND -- Packed Sign 32x4 (XMM) */
12469 if (sz == 2
12470 && insn[0] == 0x0F && insn[1] == 0x38
12471 && (insn[2] == 0x08 || insn[2] == 0x09 || insn[2] == 0x0A)) {
12472 IRTemp sV = newTemp(Ity_V128);
12473 IRTemp dV = newTemp(Ity_V128);
12474 IRTemp sHi = newTemp(Ity_I64);
12475 IRTemp sLo = newTemp(Ity_I64);
12476 IRTemp dHi = newTemp(Ity_I64);
12477 IRTemp dLo = newTemp(Ity_I64);
12478 const HChar* str = "???";
12479 Int laneszB = 0;
12480
12481 switch (insn[2]) {
12482 case 0x08: laneszB = 1; str = "b"; break;
12483 case 0x09: laneszB = 2; str = "w"; break;
12484 case 0x0A: laneszB = 4; str = "d"; break;
12485 default: vassert(0);
12486 }
12487
12488 modrm = insn[3];
12489 assign( dV, getXMMReg(gregOfRM(modrm)) );
12490
12491 if (epartIsReg(modrm)) {
12492 assign( sV, getXMMReg(eregOfRM(modrm)) );
12493 delta += 3+1;
12494 DIP("psign%s %s,%s\n", str, nameXMMReg(eregOfRM(modrm)),
12495 nameXMMReg(gregOfRM(modrm)));
12496 } else {
12497 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12498 gen_SEGV_if_not_16_aligned( addr );
12499 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
12500 delta += 3+alen;
12501 DIP("psign%s %s,%s\n", str, dis_buf,
12502 nameXMMReg(gregOfRM(modrm)));
12503 }
12504
12505 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
12506 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
12507 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
12508 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
12509
12510 putXMMReg(
12511 gregOfRM(modrm),
12512 binop(Iop_64HLtoV128,
12513 dis_PSIGN_helper( mkexpr(sHi), mkexpr(dHi), laneszB ),
12514 dis_PSIGN_helper( mkexpr(sLo), mkexpr(dLo), laneszB )
12515 )
12516 );
12517 goto decode_success;
12518 }
12519
12520 /* 0F 38 1C = PABSB -- Packed Absolute Value 8x8 (MMX) */
12521 /* 0F 38 1D = PABSW -- Packed Absolute Value 16x4 (MMX) */
12522 /* 0F 38 1E = PABSD -- Packed Absolute Value 32x2 (MMX) */
12523 if (sz == 4
12524 && insn[0] == 0x0F && insn[1] == 0x38
12525 && (insn[2] == 0x1C || insn[2] == 0x1D || insn[2] == 0x1E)) {
12526 IRTemp sV = newTemp(Ity_I64);
12527 const HChar* str = "???";
12528 Int laneszB = 0;
12529
12530 switch (insn[2]) {
12531 case 0x1C: laneszB = 1; str = "b"; break;
12532 case 0x1D: laneszB = 2; str = "w"; break;
12533 case 0x1E: laneszB = 4; str = "d"; break;
12534 default: vassert(0);
12535 }
12536
12537 modrm = insn[3];
12538 do_MMX_preamble();
12539
12540 if (epartIsReg(modrm)) {
12541 assign( sV, getMMXReg(eregOfRM(modrm)) );
12542 delta += 3+1;
12543 DIP("pabs%s %s,%s\n", str, nameMMXReg(eregOfRM(modrm)),
12544 nameMMXReg(gregOfRM(modrm)));
12545 } else {
12546 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12547 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
12548 delta += 3+alen;
12549 DIP("pabs%s %s,%s\n", str, dis_buf,
12550 nameMMXReg(gregOfRM(modrm)));
12551 }
12552
12553 putMMXReg(
12554 gregOfRM(modrm),
12555 dis_PABS_helper( mkexpr(sV), laneszB )
12556 );
12557 goto decode_success;
12558 }
12559
12560 /* 66 0F 38 1C = PABSB -- Packed Absolute Value 8x16 (XMM) */
12561 /* 66 0F 38 1D = PABSW -- Packed Absolute Value 16x8 (XMM) */
12562 /* 66 0F 38 1E = PABSD -- Packed Absolute Value 32x4 (XMM) */
12563 if (sz == 2
12564 && insn[0] == 0x0F && insn[1] == 0x38
12565 && (insn[2] == 0x1C || insn[2] == 0x1D || insn[2] == 0x1E)) {
12566 IRTemp sV = newTemp(Ity_V128);
12567 IRTemp sHi = newTemp(Ity_I64);
12568 IRTemp sLo = newTemp(Ity_I64);
12569 const HChar* str = "???";
12570 Int laneszB = 0;
12571
12572 switch (insn[2]) {
12573 case 0x1C: laneszB = 1; str = "b"; break;
12574 case 0x1D: laneszB = 2; str = "w"; break;
12575 case 0x1E: laneszB = 4; str = "d"; break;
12576 default: vassert(0);
12577 }
12578
12579 modrm = insn[3];
12580
12581 if (epartIsReg(modrm)) {
12582 assign( sV, getXMMReg(eregOfRM(modrm)) );
12583 delta += 3+1;
12584 DIP("pabs%s %s,%s\n", str, nameXMMReg(eregOfRM(modrm)),
12585 nameXMMReg(gregOfRM(modrm)));
12586 } else {
12587 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12588 gen_SEGV_if_not_16_aligned( addr );
12589 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
12590 delta += 3+alen;
12591 DIP("pabs%s %s,%s\n", str, dis_buf,
12592 nameXMMReg(gregOfRM(modrm)));
12593 }
12594
12595 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
12596 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
12597
12598 putXMMReg(
12599 gregOfRM(modrm),
12600 binop(Iop_64HLtoV128,
12601 dis_PABS_helper( mkexpr(sHi), laneszB ),
12602 dis_PABS_helper( mkexpr(sLo), laneszB )
12603 )
12604 );
12605 goto decode_success;
12606 }
12607
12608 /* 0F 3A 0F = PALIGNR -- Packed Align Right (MMX) */
12609 if (sz == 4
12610 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x0F) {
12611 IRTemp sV = newTemp(Ity_I64);
12612 IRTemp dV = newTemp(Ity_I64);
12613 IRTemp res = newTemp(Ity_I64);
12614
12615 modrm = insn[3];
12616 do_MMX_preamble();
12617 assign( dV, getMMXReg(gregOfRM(modrm)) );
12618
12619 if (epartIsReg(modrm)) {
12620 assign( sV, getMMXReg(eregOfRM(modrm)) );
12621 d32 = (UInt)insn[3+1];
12622 delta += 3+1+1;
12623 DIP("palignr $%u,%s,%s\n", d32,
12624 nameMMXReg(eregOfRM(modrm)),
12625 nameMMXReg(gregOfRM(modrm)));
12626 } else {
12627 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12628 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
12629 d32 = (UInt)insn[3+alen];
12630 delta += 3+alen+1;
12631 DIP("palignr $%u%s,%s\n", d32,
12632 dis_buf,
12633 nameMMXReg(gregOfRM(modrm)));
12634 }
12635
12636 if (d32 == 0) {
12637 assign( res, mkexpr(sV) );
12638 }
12639 else if (d32 >= 1 && d32 <= 7) {
12640 assign(res,
12641 binop(Iop_Or64,
12642 binop(Iop_Shr64, mkexpr(sV), mkU8(8*d32)),
12643 binop(Iop_Shl64, mkexpr(dV), mkU8(8*(8-d32))
12644 )));
12645 }
12646 else if (d32 == 8) {
12647 assign( res, mkexpr(dV) );
12648 }
12649 else if (d32 >= 9 && d32 <= 15) {
12650 assign( res, binop(Iop_Shr64, mkexpr(dV), mkU8(8*(d32-8))) );
12651 }
12652 else if (d32 >= 16 && d32 <= 255) {
12653 assign( res, mkU64(0) );
12654 }
12655 else
12656 vassert(0);
12657
12658 putMMXReg( gregOfRM(modrm), mkexpr(res) );
12659 goto decode_success;
12660 }
12661
12662 /* 66 0F 3A 0F = PALIGNR -- Packed Align Right (XMM) */
12663 if (sz == 2
12664 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x0F) {
12665 IRTemp sV = newTemp(Ity_V128);
12666 IRTemp dV = newTemp(Ity_V128);
12667 IRTemp sHi = newTemp(Ity_I64);
12668 IRTemp sLo = newTemp(Ity_I64);
12669 IRTemp dHi = newTemp(Ity_I64);
12670 IRTemp dLo = newTemp(Ity_I64);
12671 IRTemp rHi = newTemp(Ity_I64);
12672 IRTemp rLo = newTemp(Ity_I64);
12673
12674 modrm = insn[3];
12675 assign( dV, getXMMReg(gregOfRM(modrm)) );
12676
12677 if (epartIsReg(modrm)) {
12678 assign( sV, getXMMReg(eregOfRM(modrm)) );
12679 d32 = (UInt)insn[3+1];
12680 delta += 3+1+1;
12681 DIP("palignr $%u,%s,%s\n", d32,
12682 nameXMMReg(eregOfRM(modrm)),
12683 nameXMMReg(gregOfRM(modrm)));
12684 } else {
12685 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12686 gen_SEGV_if_not_16_aligned( addr );
12687 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
12688 d32 = (UInt)insn[3+alen];
12689 delta += 3+alen+1;
12690 DIP("palignr $%u,%s,%s\n", d32,
12691 dis_buf,
12692 nameXMMReg(gregOfRM(modrm)));
12693 }
12694
12695 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
12696 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
12697 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
12698 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
12699
12700 if (d32 == 0) {
12701 assign( rHi, mkexpr(sHi) );
12702 assign( rLo, mkexpr(sLo) );
12703 }
12704 else if (d32 >= 1 && d32 <= 7) {
12705 assign( rHi, dis_PALIGNR_XMM_helper(dLo, sHi, d32) );
12706 assign( rLo, dis_PALIGNR_XMM_helper(sHi, sLo, d32) );
12707 }
12708 else if (d32 == 8) {
12709 assign( rHi, mkexpr(dLo) );
12710 assign( rLo, mkexpr(sHi) );
12711 }
12712 else if (d32 >= 9 && d32 <= 15) {
12713 assign( rHi, dis_PALIGNR_XMM_helper(dHi, dLo, d32-8) );
12714 assign( rLo, dis_PALIGNR_XMM_helper(dLo, sHi, d32-8) );
12715 }
12716 else if (d32 == 16) {
12717 assign( rHi, mkexpr(dHi) );
12718 assign( rLo, mkexpr(dLo) );
12719 }
12720 else if (d32 >= 17 && d32 <= 23) {
12721 assign( rHi, binop(Iop_Shr64, mkexpr(dHi), mkU8(8*(d32-16))) );
12722 assign( rLo, dis_PALIGNR_XMM_helper(dHi, dLo, d32-16) );
12723 }
12724 else if (d32 == 24) {
12725 assign( rHi, mkU64(0) );
12726 assign( rLo, mkexpr(dHi) );
12727 }
12728 else if (d32 >= 25 && d32 <= 31) {
12729 assign( rHi, mkU64(0) );
12730 assign( rLo, binop(Iop_Shr64, mkexpr(dHi), mkU8(8*(d32-24))) );
12731 }
12732 else if (d32 >= 32 && d32 <= 255) {
12733 assign( rHi, mkU64(0) );
12734 assign( rLo, mkU64(0) );
12735 }
12736 else
12737 vassert(0);
12738
12739 putXMMReg(
12740 gregOfRM(modrm),
12741 binop(Iop_64HLtoV128, mkexpr(rHi), mkexpr(rLo))
12742 );
12743 goto decode_success;
12744 }
12745
12746 /* 0F 38 00 = PSHUFB -- Packed Shuffle Bytes 8x8 (MMX) */
12747 if (sz == 4
12748 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x00) {
12749 IRTemp sV = newTemp(Ity_I64);
12750 IRTemp dV = newTemp(Ity_I64);
12751
12752 modrm = insn[3];
12753 do_MMX_preamble();
12754 assign( dV, getMMXReg(gregOfRM(modrm)) );
12755
12756 if (epartIsReg(modrm)) {
12757 assign( sV, getMMXReg(eregOfRM(modrm)) );
12758 delta += 3+1;
12759 DIP("pshufb %s,%s\n", nameMMXReg(eregOfRM(modrm)),
12760 nameMMXReg(gregOfRM(modrm)));
12761 } else {
12762 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12763 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
12764 delta += 3+alen;
12765 DIP("pshufb %s,%s\n", dis_buf,
12766 nameMMXReg(gregOfRM(modrm)));
12767 }
12768
12769 putMMXReg(
12770 gregOfRM(modrm),
12771 binop(
12772 Iop_And64,
12773 /* permute the lanes */
12774 binop(
12775 Iop_Perm8x8,
12776 mkexpr(dV),
12777 binop(Iop_And64, mkexpr(sV), mkU64(0x0707070707070707ULL))
12778 ),
12779 /* mask off lanes which have (index & 0x80) == 0x80 */
12780 unop(Iop_Not64, binop(Iop_SarN8x8, mkexpr(sV), mkU8(7)))
12781 )
12782 );
12783 goto decode_success;
12784 }
12785
12786 /* 66 0F 38 00 = PSHUFB -- Packed Shuffle Bytes 8x16 (XMM) */
12787 if (sz == 2
12788 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x00) {
12789 IRTemp sV = newTemp(Ity_V128);
12790 IRTemp dV = newTemp(Ity_V128);
12791 IRTemp sHi = newTemp(Ity_I64);
12792 IRTemp sLo = newTemp(Ity_I64);
12793 IRTemp dHi = newTemp(Ity_I64);
12794 IRTemp dLo = newTemp(Ity_I64);
12795 IRTemp rHi = newTemp(Ity_I64);
12796 IRTemp rLo = newTemp(Ity_I64);
12797 IRTemp sevens = newTemp(Ity_I64);
12798 IRTemp mask0x80hi = newTemp(Ity_I64);
12799 IRTemp mask0x80lo = newTemp(Ity_I64);
12800 IRTemp maskBit3hi = newTemp(Ity_I64);
12801 IRTemp maskBit3lo = newTemp(Ity_I64);
12802 IRTemp sAnd7hi = newTemp(Ity_I64);
12803 IRTemp sAnd7lo = newTemp(Ity_I64);
12804 IRTemp permdHi = newTemp(Ity_I64);
12805 IRTemp permdLo = newTemp(Ity_I64);
12806
12807 modrm = insn[3];
12808 assign( dV, getXMMReg(gregOfRM(modrm)) );
12809
12810 if (epartIsReg(modrm)) {
12811 assign( sV, getXMMReg(eregOfRM(modrm)) );
12812 delta += 3+1;
12813 DIP("pshufb %s,%s\n", nameXMMReg(eregOfRM(modrm)),
12814 nameXMMReg(gregOfRM(modrm)));
12815 } else {
12816 addr = disAMode ( &alen, sorb, delta+3, dis_buf );
12817 gen_SEGV_if_not_16_aligned( addr );
12818 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
12819 delta += 3+alen;
12820 DIP("pshufb %s,%s\n", dis_buf,
12821 nameXMMReg(gregOfRM(modrm)));
12822 }
12823
12824 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
12825 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
12826 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
12827 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
12828
12829 assign( sevens, mkU64(0x0707070707070707ULL) );
12830
12831 /*
12832 mask0x80hi = Not(SarN8x8(sHi,7))
12833 maskBit3hi = SarN8x8(ShlN8x8(sHi,4),7)
12834 sAnd7hi = And(sHi,sevens)
12835 permdHi = Or( And(Perm8x8(dHi,sAnd7hi),maskBit3hi),
12836 And(Perm8x8(dLo,sAnd7hi),Not(maskBit3hi)) )
12837 rHi = And(permdHi,mask0x80hi)
12838 */
12839 assign(
12840 mask0x80hi,
12841 unop(Iop_Not64, binop(Iop_SarN8x8,mkexpr(sHi),mkU8(7))));
12842
12843 assign(
12844 maskBit3hi,
12845 binop(Iop_SarN8x8,
12846 binop(Iop_ShlN8x8,mkexpr(sHi),mkU8(4)),
12847 mkU8(7)));
12848
12849 assign(sAnd7hi, binop(Iop_And64,mkexpr(sHi),mkexpr(sevens)));
12850
12851 assign(
12852 permdHi,
12853 binop(
12854 Iop_Or64,
12855 binop(Iop_And64,
12856 binop(Iop_Perm8x8,mkexpr(dHi),mkexpr(sAnd7hi)),
12857 mkexpr(maskBit3hi)),
12858 binop(Iop_And64,
12859 binop(Iop_Perm8x8,mkexpr(dLo),mkexpr(sAnd7hi)),
12860 unop(Iop_Not64,mkexpr(maskBit3hi))) ));
12861
12862 assign(rHi, binop(Iop_And64,mkexpr(permdHi),mkexpr(mask0x80hi)) );
12863
12864 /* And the same for the lower half of the result. What fun. */
12865
12866 assign(
12867 mask0x80lo,
12868 unop(Iop_Not64, binop(Iop_SarN8x8,mkexpr(sLo),mkU8(7))));
12869
12870 assign(
12871 maskBit3lo,
12872 binop(Iop_SarN8x8,
12873 binop(Iop_ShlN8x8,mkexpr(sLo),mkU8(4)),
12874 mkU8(7)));
12875
12876 assign(sAnd7lo, binop(Iop_And64,mkexpr(sLo),mkexpr(sevens)));
12877
12878 assign(
12879 permdLo,
12880 binop(
12881 Iop_Or64,
12882 binop(Iop_And64,
12883 binop(Iop_Perm8x8,mkexpr(dHi),mkexpr(sAnd7lo)),
12884 mkexpr(maskBit3lo)),
12885 binop(Iop_And64,
12886 binop(Iop_Perm8x8,mkexpr(dLo),mkexpr(sAnd7lo)),
12887 unop(Iop_Not64,mkexpr(maskBit3lo))) ));
12888
12889 assign(rLo, binop(Iop_And64,mkexpr(permdLo),mkexpr(mask0x80lo)) );
12890
12891 putXMMReg(
12892 gregOfRM(modrm),
12893 binop(Iop_64HLtoV128, mkexpr(rHi), mkexpr(rLo))
12894 );
12895 goto decode_success;
12896 }
12897
12898 /* 0F 38 F0 = MOVBE m16/32(E), r16/32(G) */
12899 /* 0F 38 F1 = MOVBE r16/32(G), m16/32(E) */
12900 if ((sz == 2 || sz == 4)
12901 && insn[0] == 0x0F && insn[1] == 0x38
12902 && (insn[2] == 0xF0 || insn[2] == 0xF1)
12903 && !epartIsReg(insn[3])) {
12904
12905 modrm = insn[3];
12906 addr = disAMode(&alen, sorb, delta + 3, dis_buf);
12907 delta += 3 + alen;
12908 ty = szToITy(sz);
12909 IRTemp src = newTemp(ty);
12910
12911 if (insn[2] == 0xF0) { /* LOAD */
12912 assign(src, loadLE(ty, mkexpr(addr)));
12913 IRTemp dst = math_BSWAP(src, ty);
12914 putIReg(sz, gregOfRM(modrm), mkexpr(dst));
12915 DIP("movbe %s,%s\n", dis_buf, nameIReg(sz, gregOfRM(modrm)));
12916 } else { /* STORE */
12917 assign(src, getIReg(sz, gregOfRM(modrm)));
12918 IRTemp dst = math_BSWAP(src, ty);
12919 storeLE(mkexpr(addr), mkexpr(dst));
12920 DIP("movbe %s,%s\n", nameIReg(sz, gregOfRM(modrm)), dis_buf);
12921 }
12922 goto decode_success;
12923 }
12924
12925 /* ---------------------------------------------------- */
12926 /* --- end of the SSSE3 decoder. --- */
12927 /* ---------------------------------------------------- */
12928
12929 /* ---------------------------------------------------- */
12930 /* --- start of the SSE4 decoder --- */
12931 /* ---------------------------------------------------- */
12932
12933 /* 66 0F 3A 0B /r ib = ROUNDSD imm8, xmm2/m64, xmm1
12934 (Partial implementation only -- only deal with cases where
12935 the rounding mode is specified directly by the immediate byte.)
12936 66 0F 3A 0A /r ib = ROUNDSS imm8, xmm2/m32, xmm1
12937 (Limitations ditto)
12938 */
12939 if (sz == 2
12940 && insn[0] == 0x0F && insn[1] == 0x3A
12941 && (insn[2] == 0x0B || insn[2] == 0x0A)) {
12942
12943 Bool isD = insn[2] == 0x0B;
12944 IRTemp src = newTemp(isD ? Ity_F64 : Ity_F32);
12945 IRTemp res = newTemp(isD ? Ity_F64 : Ity_F32);
12946 Int imm = 0;
12947
12948 modrm = insn[3];
12949
12950 if (epartIsReg(modrm)) {
12951 assign( src,
12952 isD ? getXMMRegLane64F( eregOfRM(modrm), 0 )
12953 : getXMMRegLane32F( eregOfRM(modrm), 0 ) );
12954 imm = insn[3+1];
12955 if (imm & ~3) goto decode_failure;
12956 delta += 3+1+1;
12957 DIP( "rounds%c $%d,%s,%s\n",
12958 isD ? 'd' : 's',
12959 imm, nameXMMReg( eregOfRM(modrm) ),
12960 nameXMMReg( gregOfRM(modrm) ) );
12961 } else {
12962 addr = disAMode( &alen, sorb, delta+3, dis_buf );
12963 assign( src, loadLE( isD ? Ity_F64 : Ity_F32, mkexpr(addr) ));
12964 imm = insn[3+alen];
12965 if (imm & ~3) goto decode_failure;
12966 delta += 3+alen+1;
12967 DIP( "roundsd $%d,%s,%s\n",
12968 imm, dis_buf, nameXMMReg( gregOfRM(modrm) ) );
12969 }
12970
12971 /* (imm & 3) contains an Intel-encoded rounding mode. Because
12972 that encoding is the same as the encoding for IRRoundingMode,
12973 we can use that value directly in the IR as a rounding
12974 mode. */
12975 assign(res, binop(isD ? Iop_RoundF64toInt : Iop_RoundF32toInt,
12976 mkU32(imm & 3), mkexpr(src)) );
12977
12978 if (isD)
12979 putXMMRegLane64F( gregOfRM(modrm), 0, mkexpr(res) );
12980 else
12981 putXMMRegLane32F( gregOfRM(modrm), 0, mkexpr(res) );
12982
12983 goto decode_success;
12984 }
12985
12986 /* F3 0F BD -- LZCNT (count leading zeroes. An AMD extension,
12987 which we can only decode if we're sure this is an AMD cpu that
12988 supports LZCNT, since otherwise it's BSR, which behaves
12989 differently. */
12990 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0xBD
12991 && 0 != (archinfo->hwcaps & VEX_HWCAPS_X86_LZCNT)) {
12992 vassert(sz == 2 || sz == 4);
12993 /*IRType*/ ty = szToITy(sz);
12994 IRTemp src = newTemp(ty);
12995 modrm = insn[3];
12996 if (epartIsReg(modrm)) {
12997 assign(src, getIReg(sz, eregOfRM(modrm)));
12998 delta += 3+1;
12999 DIP("lzcnt%c %s, %s\n", nameISize(sz),
13000 nameIReg(sz, eregOfRM(modrm)),
13001 nameIReg(sz, gregOfRM(modrm)));
13002 } else {
13003 addr = disAMode( &alen, sorb, delta+3, dis_buf );
13004 assign(src, loadLE(ty, mkexpr(addr)));
13005 delta += 3+alen;
13006 DIP("lzcnt%c %s, %s\n", nameISize(sz), dis_buf,
13007 nameIReg(sz, gregOfRM(modrm)));
13008 }
13009
13010 IRTemp res = gen_LZCNT(ty, src);
13011 putIReg(sz, gregOfRM(modrm), mkexpr(res));
13012
13013 // Update flags. This is pretty lame .. perhaps can do better
13014 // if this turns out to be performance critical.
13015 // O S A P are cleared. Z is set if RESULT == 0.
13016 // C is set if SRC is zero.
13017 IRTemp src32 = newTemp(Ity_I32);
13018 IRTemp res32 = newTemp(Ity_I32);
13019 assign(src32, widenUto32(mkexpr(src)));
13020 assign(res32, widenUto32(mkexpr(res)));
13021
13022 IRTemp oszacp = newTemp(Ity_I32);
13023 assign(
13024 oszacp,
13025 binop(Iop_Or32,
13026 binop(Iop_Shl32,
13027 unop(Iop_1Uto32,
13028 binop(Iop_CmpEQ32, mkexpr(res32), mkU32(0))),
13029 mkU8(X86G_CC_SHIFT_Z)),
13030 binop(Iop_Shl32,
13031 unop(Iop_1Uto32,
13032 binop(Iop_CmpEQ32, mkexpr(src32), mkU32(0))),
13033 mkU8(X86G_CC_SHIFT_C))
13034 )
13035 );
13036
13037 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
13038 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
13039 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
13040 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(oszacp) ));
13041
13042 goto decode_success;
13043 }
13044
13045 /* ---------------------------------------------------- */
13046 /* --- end of the SSE4 decoder --- */
13047 /* ---------------------------------------------------- */
13048
13049 after_sse_decoders:
13050
13051 /* ---------------------------------------------------- */
13052 /* --- deal with misc 0x67 pfxs (addr size override) -- */
13053 /* ---------------------------------------------------- */
13054
13055 /* 67 E3 = JCXZ (for JECXZ see below) */
13056 if (insn[0] == 0x67 && insn[1] == 0xE3 && sz == 4) {
13057 delta += 2;
13058 d32 = (((Addr32)guest_EIP_bbstart)+delta+1) + getSDisp8(delta);
13059 delta ++;
13060 stmt( IRStmt_Exit(
13061 binop(Iop_CmpEQ16, getIReg(2,R_ECX), mkU16(0)),
13062 Ijk_Boring,
13063 IRConst_U32(d32),
13064 OFFB_EIP
13065 ));
13066 DIP("jcxz 0x%x\n", d32);
13067 goto decode_success;
13068 }
13069
13070 /* 67 E8 = CALL with redundant addr16 prefix */
13071 if (insn[0] == 0x67 && insn[1] == 0xE8) {
13072 delta++;
13073 }
13074
13075 /* ---------------------------------------------------- */
13076 /* --- start of the baseline insn decoder -- */
13077 /* ---------------------------------------------------- */
13078
13079 /* Get the primary opcode. */
13080 opc = getIByte(delta); delta++;
13081
13082 /* We get here if the current insn isn't SSE, or this CPU doesn't
13083 support SSE. */
13084
13085 switch (opc) {
13086
13087 /* ------------------------ Control flow --------------- */
13088
13089 case 0xC2: /* RET imm16 */
13090 d32 = getUDisp16(delta);
13091 delta += 2;
13092 dis_ret(&dres, d32);
13093 DIP("ret %u\n", d32);
13094 break;
13095 case 0xC3: /* RET */
13096 dis_ret(&dres, 0);
13097 DIP("ret\n");
13098 break;
13099
13100 case 0xCF: /* IRET */
13101 /* Note, this is an extremely kludgey and limited implementation
13102 of iret. All it really does is:
13103 popl %EIP; popl %CS; popl %EFLAGS.
13104 %CS is set but ignored (as it is in (eg) popw %cs)". */
13105 t1 = newTemp(Ity_I32); /* ESP */
13106 t2 = newTemp(Ity_I32); /* new EIP */
13107 t3 = newTemp(Ity_I32); /* new CS */
13108 t4 = newTemp(Ity_I32); /* new EFLAGS */
13109 assign(t1, getIReg(4,R_ESP));
13110 assign(t2, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t1),mkU32(0) )));
13111 assign(t3, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t1),mkU32(4) )));
13112 assign(t4, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t1),mkU32(8) )));
13113 /* Get stuff off stack */
13114 putIReg(4, R_ESP,binop(Iop_Add32, mkexpr(t1), mkU32(12)));
13115 /* set %CS (which is ignored anyway) */
13116 putSReg( R_CS, unop(Iop_32to16, mkexpr(t3)) );
13117 /* set %EFLAGS */
13118 set_EFLAGS_from_value( t4, False/*!emit_AC_emwarn*/, 0/*unused*/ );
13119 /* goto new EIP value */
13120 jmp_treg(&dres, Ijk_Ret, t2);
13121 vassert(dres.whatNext == Dis_StopHere);
13122 DIP("iret (very kludgey)\n");
13123 break;
13124
13125 case 0xE8: /* CALL J4 */
13126 d32 = getUDisp32(delta); delta += 4;
13127 d32 += (guest_EIP_bbstart+delta);
13128 /* (guest_eip_bbstart+delta) == return-to addr, d32 == call-to addr */
13129 if (d32 == guest_EIP_bbstart+delta && getIByte(delta) >= 0x58
13130 && getIByte(delta) <= 0x5F) {
13131 /* Specially treat the position-independent-code idiom
13132 call X
13133 X: popl %reg
13134 as
13135 movl %eip, %reg.
13136 since this generates better code, but for no other reason. */
13137 Int archReg = getIByte(delta) - 0x58;
13138 /* vex_printf("-- fPIC thingy\n"); */
13139 putIReg(4, archReg, mkU32(guest_EIP_bbstart+delta));
13140 delta++; /* Step over the POP */
13141 DIP("call 0x%x ; popl %s\n",d32,nameIReg(4,archReg));
13142 } else {
13143 /* The normal sequence for a call. */
13144 t1 = newTemp(Ity_I32);
13145 assign(t1, binop(Iop_Sub32, getIReg(4,R_ESP), mkU32(4)));
13146 putIReg(4, R_ESP, mkexpr(t1));
13147 storeLE( mkexpr(t1), mkU32(guest_EIP_bbstart+delta));
13148 if (resteerOkFn( callback_opaque, (Addr32)d32 )) {
13149 /* follow into the call target. */
13150 dres.whatNext = Dis_ResteerU;
13151 dres.continueAt = (Addr32)d32;
13152 } else {
13153 jmp_lit(&dres, Ijk_Call, d32);
13154 vassert(dres.whatNext == Dis_StopHere);
13155 }
13156 DIP("call 0x%x\n",d32);
13157 }
13158 break;
13159
13160 //-- case 0xC8: /* ENTER */
13161 //-- d32 = getUDisp16(eip); eip += 2;
13162 //-- abyte = getIByte(delta); delta++;
13163 //--
13164 //-- vg_assert(sz == 4);
13165 //-- vg_assert(abyte == 0);
13166 //--
13167 //-- t1 = newTemp(cb); t2 = newTemp(cb);
13168 //-- uInstr2(cb, GET, sz, ArchReg, R_EBP, TempReg, t1);
13169 //-- uInstr2(cb, GET, 4, ArchReg, R_ESP, TempReg, t2);
13170 //-- uInstr2(cb, SUB, 4, Literal, 0, TempReg, t2);
13171 //-- uLiteral(cb, sz);
13172 //-- uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_ESP);
13173 //-- uInstr2(cb, STORE, 4, TempReg, t1, TempReg, t2);
13174 //-- uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_EBP);
13175 //-- if (d32) {
13176 //-- uInstr2(cb, SUB, 4, Literal, 0, TempReg, t2);
13177 //-- uLiteral(cb, d32);
13178 //-- uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_ESP);
13179 //-- }
13180 //-- DIP("enter 0x%x, 0x%x", d32, abyte);
13181 //-- break;
13182
13183 case 0xC9: /* LEAVE */
13184 vassert(sz == 4);
13185 t1 = newTemp(Ity_I32); t2 = newTemp(Ity_I32);
13186 assign(t1, getIReg(4,R_EBP));
13187 /* First PUT ESP looks redundant, but need it because ESP must
13188 always be up-to-date for Memcheck to work... */
13189 putIReg(4, R_ESP, mkexpr(t1));
13190 assign(t2, loadLE(Ity_I32,mkexpr(t1)));
13191 putIReg(4, R_EBP, mkexpr(t2));
13192 putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(t1), mkU32(4)) );
13193 DIP("leave\n");
13194 break;
13195
13196 /* ---------------- Misc weird-ass insns --------------- */
13197
13198 case 0x27: /* DAA */
13199 case 0x2F: /* DAS */
13200 case 0x37: /* AAA */
13201 case 0x3F: /* AAS */
13202 /* An ugly implementation for some ugly instructions. Oh
13203 well. */
13204 if (sz != 4) goto decode_failure;
13205 t1 = newTemp(Ity_I32);
13206 t2 = newTemp(Ity_I32);
13207 /* Make up a 32-bit value (t1), with the old value of AX in the
13208 bottom 16 bits, and the old OSZACP bitmask in the upper 16
13209 bits. */
13210 assign(t1,
13211 binop(Iop_16HLto32,
13212 unop(Iop_32to16,
13213 mk_x86g_calculate_eflags_all()),
13214 getIReg(2, R_EAX)
13215 ));
13216 /* Call the helper fn, to get a new AX and OSZACP value, and
13217 poke both back into the guest state. Also pass the helper
13218 the actual opcode so it knows which of the 4 instructions it
13219 is doing the computation for. */
13220 vassert(opc == 0x27 || opc == 0x2F || opc == 0x37 || opc == 0x3F);
13221 assign(t2,
13222 mkIRExprCCall(
13223 Ity_I32, 0/*regparm*/, "x86g_calculate_daa_das_aaa_aas",
13224 &x86g_calculate_daa_das_aaa_aas,
13225 mkIRExprVec_2( mkexpr(t1), mkU32( opc & 0xFF) )
13226 ));
13227 putIReg(2, R_EAX, unop(Iop_32to16, mkexpr(t2) ));
13228
13229 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
13230 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
13231 stmt( IRStmt_Put( OFFB_CC_DEP1,
13232 binop(Iop_And32,
13233 binop(Iop_Shr32, mkexpr(t2), mkU8(16)),
13234 mkU32( X86G_CC_MASK_C | X86G_CC_MASK_P
13235 | X86G_CC_MASK_A | X86G_CC_MASK_Z
13236 | X86G_CC_MASK_S| X86G_CC_MASK_O )
13237 )
13238 )
13239 );
13240 /* Set NDEP even though it isn't used. This makes redundant-PUT
13241 elimination of previous stores to this field work better. */
13242 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
13243 switch (opc) {
13244 case 0x27: DIP("daa\n"); break;
13245 case 0x2F: DIP("das\n"); break;
13246 case 0x37: DIP("aaa\n"); break;
13247 case 0x3F: DIP("aas\n"); break;
13248 default: vassert(0);
13249 }
13250 break;
13251
13252 case 0xD4: /* AAM */
13253 case 0xD5: /* AAD */
13254 d32 = getIByte(delta); delta++;
13255 if (sz != 4 || d32 != 10) goto decode_failure;
13256 t1 = newTemp(Ity_I32);
13257 t2 = newTemp(Ity_I32);
13258 /* Make up a 32-bit value (t1), with the old value of AX in the
13259 bottom 16 bits, and the old OSZACP bitmask in the upper 16
13260 bits. */
13261 assign(t1,
13262 binop(Iop_16HLto32,
13263 unop(Iop_32to16,
13264 mk_x86g_calculate_eflags_all()),
13265 getIReg(2, R_EAX)
13266 ));
13267 /* Call the helper fn, to get a new AX and OSZACP value, and
13268 poke both back into the guest state. Also pass the helper
13269 the actual opcode so it knows which of the 2 instructions it
13270 is doing the computation for. */
13271 assign(t2,
13272 mkIRExprCCall(
13273 Ity_I32, 0/*regparm*/, "x86g_calculate_aad_aam",
13274 &x86g_calculate_aad_aam,
13275 mkIRExprVec_2( mkexpr(t1), mkU32( opc & 0xFF) )
13276 ));
13277 putIReg(2, R_EAX, unop(Iop_32to16, mkexpr(t2) ));
13278
13279 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
13280 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
13281 stmt( IRStmt_Put( OFFB_CC_DEP1,
13282 binop(Iop_And32,
13283 binop(Iop_Shr32, mkexpr(t2), mkU8(16)),
13284 mkU32( X86G_CC_MASK_C | X86G_CC_MASK_P
13285 | X86G_CC_MASK_A | X86G_CC_MASK_Z
13286 | X86G_CC_MASK_S| X86G_CC_MASK_O )
13287 )
13288 )
13289 );
13290 /* Set NDEP even though it isn't used. This makes
13291 redundant-PUT elimination of previous stores to this field
13292 work better. */
13293 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
13294
13295 DIP(opc == 0xD4 ? "aam\n" : "aad\n");
13296 break;
13297
13298 /* ------------------------ CWD/CDQ -------------------- */
13299
13300 case 0x98: /* CBW */
13301 if (sz == 4) {
13302 putIReg(4, R_EAX, unop(Iop_16Sto32, getIReg(2, R_EAX)));
13303 DIP("cwde\n");
13304 } else {
13305 vassert(sz == 2);
13306 putIReg(2, R_EAX, unop(Iop_8Sto16, getIReg(1, R_EAX)));
13307 DIP("cbw\n");
13308 }
13309 break;
13310
13311 case 0x99: /* CWD/CDQ */
13312 ty = szToITy(sz);
13313 putIReg(sz, R_EDX,
13314 binop(mkSizedOp(ty,Iop_Sar8),
13315 getIReg(sz, R_EAX),
13316 mkU8(sz == 2 ? 15 : 31)) );
13317 DIP(sz == 2 ? "cwdq\n" : "cdqq\n");
13318 break;
13319
13320 /* ------------------------ FPU ops -------------------- */
13321
13322 case 0x9E: /* SAHF */
13323 codegen_SAHF();
13324 DIP("sahf\n");
13325 break;
13326
13327 case 0x9F: /* LAHF */
13328 codegen_LAHF();
13329 DIP("lahf\n");
13330 break;
13331
13332 case 0x9B: /* FWAIT */
13333 /* ignore? */
13334 DIP("fwait\n");
13335 break;
13336
13337 case 0xD8:
13338 case 0xD9:
13339 case 0xDA:
13340 case 0xDB:
13341 case 0xDC:
13342 case 0xDD:
13343 case 0xDE:
13344 case 0xDF: {
13345 Int delta0 = delta;
13346 Bool decode_OK = False;
13347 delta = dis_FPU ( &decode_OK, sorb, delta );
13348 if (!decode_OK) {
13349 delta = delta0;
13350 goto decode_failure;
13351 }
13352 break;
13353 }
13354
13355 /* ------------------------ INC & DEC ------------------ */
13356
13357 case 0x40: /* INC eAX */
13358 case 0x41: /* INC eCX */
13359 case 0x42: /* INC eDX */
13360 case 0x43: /* INC eBX */
13361 case 0x44: /* INC eSP */
13362 case 0x45: /* INC eBP */
13363 case 0x46: /* INC eSI */
13364 case 0x47: /* INC eDI */
13365 vassert(sz == 2 || sz == 4);
13366 ty = szToITy(sz);
13367 t1 = newTemp(ty);
13368 assign( t1, binop(mkSizedOp(ty,Iop_Add8),
13369 getIReg(sz, (UInt)(opc - 0x40)),
13370 mkU(ty,1)) );
13371 setFlags_INC_DEC( True, t1, ty );
13372 putIReg(sz, (UInt)(opc - 0x40), mkexpr(t1));
13373 DIP("inc%c %s\n", nameISize(sz), nameIReg(sz,opc-0x40));
13374 break;
13375
13376 case 0x48: /* DEC eAX */
13377 case 0x49: /* DEC eCX */
13378 case 0x4A: /* DEC eDX */
13379 case 0x4B: /* DEC eBX */
13380 case 0x4C: /* DEC eSP */
13381 case 0x4D: /* DEC eBP */
13382 case 0x4E: /* DEC eSI */
13383 case 0x4F: /* DEC eDI */
13384 vassert(sz == 2 || sz == 4);
13385 ty = szToITy(sz);
13386 t1 = newTemp(ty);
13387 assign( t1, binop(mkSizedOp(ty,Iop_Sub8),
13388 getIReg(sz, (UInt)(opc - 0x48)),
13389 mkU(ty,1)) );
13390 setFlags_INC_DEC( False, t1, ty );
13391 putIReg(sz, (UInt)(opc - 0x48), mkexpr(t1));
13392 DIP("dec%c %s\n", nameISize(sz), nameIReg(sz,opc-0x48));
13393 break;
13394
13395 /* ------------------------ INT ------------------------ */
13396
13397 case 0xCC: /* INT 3 */
13398 jmp_lit(&dres, Ijk_SigTRAP, ((Addr32)guest_EIP_bbstart)+delta);
13399 vassert(dres.whatNext == Dis_StopHere);
13400 DIP("int $0x3\n");
13401 break;
13402
13403 case 0xCD: /* INT imm8 */
13404 d32 = getIByte(delta); delta++;
13405
13406 /* For any of the cases where we emit a jump (that is, for all
13407 currently handled cases), it's important that all ArchRegs
13408 carry their up-to-date value at this point. So we declare an
13409 end-of-block here, which forces any TempRegs caching ArchRegs
13410 to be flushed. */
13411
13412 /* Handle int $0x3F .. $0x4F by synthesising a segfault and a
13413 restart of this instruction (hence the "-2" two lines below,
13414 to get the restart EIP to be this instruction. This is
13415 probably Linux-specific and it would be more correct to only
13416 do this if the VexAbiInfo says that is what we should do.
13417 This used to handle just 0x40-0x43; Jikes RVM uses a larger
13418 range (0x3F-0x49), and this allows some slack as well. */
13419 if (d32 >= 0x3F && d32 <= 0x4F) {
13420 jmp_lit(&dres, Ijk_SigSEGV, ((Addr32)guest_EIP_bbstart)+delta-2);
13421 vassert(dres.whatNext == Dis_StopHere);
13422 DIP("int $0x%x\n", d32);
13423 break;
13424 }
13425
13426 /* Handle int $0x80 (linux syscalls), int $0x81 and $0x82
13427 (darwin syscalls), int $0x91 (Solaris syscalls) and int $0xD2
13428 (Solaris fasttrap syscalls). As part of this, note where we are, so we
13429 can back up the guest to this point if the syscall needs to
13430 be restarted. */
13431 IRJumpKind jump_kind;
13432 switch (d32) {
13433 case 0x80:
13434 jump_kind = Ijk_Sys_int128;
13435 break;
13436 case 0x81:
13437 jump_kind = Ijk_Sys_int129;
13438 break;
13439 case 0x82:
13440 jump_kind = Ijk_Sys_int130;
13441 break;
13442 case 0x91:
13443 jump_kind = Ijk_Sys_int145;
13444 break;
13445 case 0xD2:
13446 jump_kind = Ijk_Sys_int210;
13447 break;
13448 default:
13449 /* none of the above */
13450 goto decode_failure;
13451 }
13452
13453 stmt( IRStmt_Put( OFFB_IP_AT_SYSCALL,
13454 mkU32(guest_EIP_curr_instr) ) );
13455 jmp_lit(&dres, jump_kind, ((Addr32)guest_EIP_bbstart)+delta);
13456 vassert(dres.whatNext == Dis_StopHere);
13457 DIP("int $0x%x\n", d32);
13458 break;
13459
13460 /* ------------------------ Jcond, byte offset --------- */
13461
13462 case 0xEB: /* Jb (jump, byte offset) */
13463 d32 = (((Addr32)guest_EIP_bbstart)+delta+1) + getSDisp8(delta);
13464 delta++;
13465 if (resteerOkFn( callback_opaque, (Addr32)d32) ) {
13466 dres.whatNext = Dis_ResteerU;
13467 dres.continueAt = (Addr32)d32;
13468 } else {
13469 jmp_lit(&dres, Ijk_Boring, d32);
13470 vassert(dres.whatNext == Dis_StopHere);
13471 }
13472 DIP("jmp-8 0x%x\n", d32);
13473 break;
13474
13475 case 0xE9: /* Jv (jump, 16/32 offset) */
13476 vassert(sz == 4); /* JRS added 2004 July 11 */
13477 d32 = (((Addr32)guest_EIP_bbstart)+delta+sz) + getSDisp(sz,delta);
13478 delta += sz;
13479 if (resteerOkFn( callback_opaque, (Addr32)d32) ) {
13480 dres.whatNext = Dis_ResteerU;
13481 dres.continueAt = (Addr32)d32;
13482 } else {
13483 jmp_lit(&dres, Ijk_Boring, d32);
13484 vassert(dres.whatNext == Dis_StopHere);
13485 }
13486 DIP("jmp 0x%x\n", d32);
13487 break;
13488
13489 case 0x70:
13490 case 0x71:
13491 case 0x72: /* JBb/JNAEb (jump below) */
13492 case 0x73: /* JNBb/JAEb (jump not below) */
13493 case 0x74: /* JZb/JEb (jump zero) */
13494 case 0x75: /* JNZb/JNEb (jump not zero) */
13495 case 0x76: /* JBEb/JNAb (jump below or equal) */
13496 case 0x77: /* JNBEb/JAb (jump not below or equal) */
13497 case 0x78: /* JSb (jump negative) */
13498 case 0x79: /* JSb (jump not negative) */
13499 case 0x7A: /* JP (jump parity even) */
13500 case 0x7B: /* JNP/JPO (jump parity odd) */
13501 case 0x7C: /* JLb/JNGEb (jump less) */
13502 case 0x7D: /* JGEb/JNLb (jump greater or equal) */
13503 case 0x7E: /* JLEb/JNGb (jump less or equal) */
13504 case 0x7F: /* JGb/JNLEb (jump greater) */
13505 { Int jmpDelta;
13506 const HChar* comment = "";
13507 jmpDelta = (Int)getSDisp8(delta);
13508 vassert(-128 <= jmpDelta && jmpDelta < 128);
13509 d32 = (((Addr32)guest_EIP_bbstart)+delta+1) + jmpDelta;
13510 delta++;
13511 if (resteerCisOk
13512 && vex_control.guest_chase_cond
13513 && (Addr32)d32 != (Addr32)guest_EIP_bbstart
13514 && jmpDelta < 0
13515 && resteerOkFn( callback_opaque, (Addr32)d32) ) {
13516 /* Speculation: assume this backward branch is taken. So we
13517 need to emit a side-exit to the insn following this one,
13518 on the negation of the condition, and continue at the
13519 branch target address (d32). If we wind up back at the
13520 first instruction of the trace, just stop; it's better to
13521 let the IR loop unroller handle that case. */
13522 stmt( IRStmt_Exit(
13523 mk_x86g_calculate_condition((X86Condcode)(1 ^ (opc - 0x70))),
13524 Ijk_Boring,
13525 IRConst_U32(guest_EIP_bbstart+delta),
13526 OFFB_EIP ) );
13527 dres.whatNext = Dis_ResteerC;
13528 dres.continueAt = (Addr32)d32;
13529 comment = "(assumed taken)";
13530 }
13531 else
13532 if (resteerCisOk
13533 && vex_control.guest_chase_cond
13534 && (Addr32)d32 != (Addr32)guest_EIP_bbstart
13535 && jmpDelta >= 0
13536 && resteerOkFn( callback_opaque,
13537 (Addr32)(guest_EIP_bbstart+delta)) ) {
13538 /* Speculation: assume this forward branch is not taken. So
13539 we need to emit a side-exit to d32 (the dest) and continue
13540 disassembling at the insn immediately following this
13541 one. */
13542 stmt( IRStmt_Exit(
13543 mk_x86g_calculate_condition((X86Condcode)(opc - 0x70)),
13544 Ijk_Boring,
13545 IRConst_U32(d32),
13546 OFFB_EIP ) );
13547 dres.whatNext = Dis_ResteerC;
13548 dres.continueAt = guest_EIP_bbstart + delta;
13549 comment = "(assumed not taken)";
13550 }
13551 else {
13552 /* Conservative default translation - end the block at this
13553 point. */
13554 jcc_01( &dres, (X86Condcode)(opc - 0x70),
13555 (Addr32)(guest_EIP_bbstart+delta), d32);
13556 vassert(dres.whatNext == Dis_StopHere);
13557 }
13558 DIP("j%s-8 0x%x %s\n", name_X86Condcode(opc - 0x70), d32, comment);
13559 break;
13560 }
13561
13562 case 0xE3: /* JECXZ (for JCXZ see above) */
13563 if (sz != 4) goto decode_failure;
13564 d32 = (((Addr32)guest_EIP_bbstart)+delta+1) + getSDisp8(delta);
13565 delta ++;
13566 stmt( IRStmt_Exit(
13567 binop(Iop_CmpEQ32, getIReg(4,R_ECX), mkU32(0)),
13568 Ijk_Boring,
13569 IRConst_U32(d32),
13570 OFFB_EIP
13571 ));
13572 DIP("jecxz 0x%x\n", d32);
13573 break;
13574
13575 case 0xE0: /* LOOPNE disp8: decrement count, jump if count != 0 && ZF==0 */
13576 case 0xE1: /* LOOPE disp8: decrement count, jump if count != 0 && ZF==1 */
13577 case 0xE2: /* LOOP disp8: decrement count, jump if count != 0 */
13578 { /* Again, the docs say this uses ECX/CX as a count depending on
13579 the address size override, not the operand one. Since we
13580 don't handle address size overrides, I guess that means
13581 ECX. */
13582 IRExpr* zbit = NULL;
13583 IRExpr* count = NULL;
13584 IRExpr* cond = NULL;
13585 const HChar* xtra = NULL;
13586
13587 if (sz != 4) goto decode_failure;
13588 d32 = (((Addr32)guest_EIP_bbstart)+delta+1) + getSDisp8(delta);
13589 delta++;
13590 putIReg(4, R_ECX, binop(Iop_Sub32, getIReg(4,R_ECX), mkU32(1)));
13591
13592 count = getIReg(4,R_ECX);
13593 cond = binop(Iop_CmpNE32, count, mkU32(0));
13594 switch (opc) {
13595 case 0xE2:
13596 xtra = "";
13597 break;
13598 case 0xE1:
13599 xtra = "e";
13600 zbit = mk_x86g_calculate_condition( X86CondZ );
13601 cond = mkAnd1(cond, zbit);
13602 break;
13603 case 0xE0:
13604 xtra = "ne";
13605 zbit = mk_x86g_calculate_condition( X86CondNZ );
13606 cond = mkAnd1(cond, zbit);
13607 break;
13608 default:
13609 vassert(0);
13610 }
13611 stmt( IRStmt_Exit(cond, Ijk_Boring, IRConst_U32(d32), OFFB_EIP) );
13612
13613 DIP("loop%s 0x%x\n", xtra, d32);
13614 break;
13615 }
13616
13617 /* ------------------------ IMUL ----------------------- */
13618
13619 case 0x69: /* IMUL Iv, Ev, Gv */
13620 delta = dis_imul_I_E_G ( sorb, sz, delta, sz );
13621 break;
13622 case 0x6B: /* IMUL Ib, Ev, Gv */
13623 delta = dis_imul_I_E_G ( sorb, sz, delta, 1 );
13624 break;
13625
13626 /* ------------------------ MOV ------------------------ */
13627
13628 case 0x88: /* MOV Gb,Eb */
13629 delta = dis_mov_G_E(sorb, 1, delta);
13630 break;
13631
13632 case 0x89: /* MOV Gv,Ev */
13633 delta = dis_mov_G_E(sorb, sz, delta);
13634 break;
13635
13636 case 0x8A: /* MOV Eb,Gb */
13637 delta = dis_mov_E_G(sorb, 1, delta);
13638 break;
13639
13640 case 0x8B: /* MOV Ev,Gv */
13641 delta = dis_mov_E_G(sorb, sz, delta);
13642 break;
13643
13644 case 0x8D: /* LEA M,Gv */
13645 if (sz != 4)
13646 goto decode_failure;
13647 modrm = getIByte(delta);
13648 if (epartIsReg(modrm))
13649 goto decode_failure;
13650 /* NOTE! this is the one place where a segment override prefix
13651 has no effect on the address calculation. Therefore we pass
13652 zero instead of sorb here. */
13653 addr = disAMode ( &alen, /*sorb*/ 0, delta, dis_buf );
13654 delta += alen;
13655 putIReg(sz, gregOfRM(modrm), mkexpr(addr));
13656 DIP("lea%c %s, %s\n", nameISize(sz), dis_buf,
13657 nameIReg(sz,gregOfRM(modrm)));
13658 break;
13659
13660 case 0x8C: /* MOV Sw,Ew -- MOV from a SEGMENT REGISTER */
13661 delta = dis_mov_Sw_Ew(sorb, sz, delta);
13662 break;
13663
13664 case 0x8E: /* MOV Ew,Sw -- MOV to a SEGMENT REGISTER */
13665 delta = dis_mov_Ew_Sw(sorb, delta);
13666 break;
13667
13668 case 0xA0: /* MOV Ob,AL */
13669 sz = 1;
13670 /* Fall through ... */
13671 case 0xA1: /* MOV Ov,eAX */
13672 d32 = getUDisp32(delta); delta += 4;
13673 ty = szToITy(sz);
13674 addr = newTemp(Ity_I32);
13675 assign( addr, handleSegOverride(sorb, mkU32(d32)) );
13676 putIReg(sz, R_EAX, loadLE(ty, mkexpr(addr)));
13677 DIP("mov%c %s0x%x, %s\n", nameISize(sz), sorbTxt(sorb),
13678 d32, nameIReg(sz,R_EAX));
13679 break;
13680
13681 case 0xA2: /* MOV Ob,AL */
13682 sz = 1;
13683 /* Fall through ... */
13684 case 0xA3: /* MOV eAX,Ov */
13685 d32 = getUDisp32(delta); delta += 4;
13686 ty = szToITy(sz);
13687 addr = newTemp(Ity_I32);
13688 assign( addr, handleSegOverride(sorb, mkU32(d32)) );
13689 storeLE( mkexpr(addr), getIReg(sz,R_EAX) );
13690 DIP("mov%c %s, %s0x%x\n", nameISize(sz), nameIReg(sz,R_EAX),
13691 sorbTxt(sorb), d32);
13692 break;
13693
13694 case 0xB0: /* MOV imm,AL */
13695 case 0xB1: /* MOV imm,CL */
13696 case 0xB2: /* MOV imm,DL */
13697 case 0xB3: /* MOV imm,BL */
13698 case 0xB4: /* MOV imm,AH */
13699 case 0xB5: /* MOV imm,CH */
13700 case 0xB6: /* MOV imm,DH */
13701 case 0xB7: /* MOV imm,BH */
13702 d32 = getIByte(delta); delta += 1;
13703 putIReg(1, opc-0xB0, mkU8(d32));
13704 DIP("movb $0x%x,%s\n", d32, nameIReg(1,opc-0xB0));
13705 break;
13706
13707 case 0xB8: /* MOV imm,eAX */
13708 case 0xB9: /* MOV imm,eCX */
13709 case 0xBA: /* MOV imm,eDX */
13710 case 0xBB: /* MOV imm,eBX */
13711 case 0xBC: /* MOV imm,eSP */
13712 case 0xBD: /* MOV imm,eBP */
13713 case 0xBE: /* MOV imm,eSI */
13714 case 0xBF: /* MOV imm,eDI */
13715 d32 = getUDisp(sz,delta); delta += sz;
13716 putIReg(sz, opc-0xB8, mkU(szToITy(sz), d32));
13717 DIP("mov%c $0x%x,%s\n", nameISize(sz), d32, nameIReg(sz,opc-0xB8));
13718 break;
13719
13720 case 0xC6: /* C6 /0 = MOV Ib,Eb */
13721 sz = 1;
13722 goto maybe_do_Mov_I_E;
13723 case 0xC7: /* C7 /0 = MOV Iv,Ev */
13724 goto maybe_do_Mov_I_E;
13725
13726 maybe_do_Mov_I_E:
13727 modrm = getIByte(delta);
13728 if (gregOfRM(modrm) == 0) {
13729 if (epartIsReg(modrm)) {
13730 delta++; /* mod/rm byte */
13731 d32 = getUDisp(sz,delta); delta += sz;
13732 putIReg(sz, eregOfRM(modrm), mkU(szToITy(sz), d32));
13733 DIP("mov%c $0x%x, %s\n", nameISize(sz), d32,
13734 nameIReg(sz,eregOfRM(modrm)));
13735 } else {
13736 addr = disAMode ( &alen, sorb, delta, dis_buf );
13737 delta += alen;
13738 d32 = getUDisp(sz,delta); delta += sz;
13739 storeLE(mkexpr(addr), mkU(szToITy(sz), d32));
13740 DIP("mov%c $0x%x, %s\n", nameISize(sz), d32, dis_buf);
13741 }
13742 break;
13743 }
13744 goto decode_failure;
13745
13746 /* ------------------------ opl imm, A ----------------- */
13747
13748 case 0x04: /* ADD Ib, AL */
13749 delta = dis_op_imm_A( 1, False, Iop_Add8, True, delta, "add" );
13750 break;
13751 case 0x05: /* ADD Iv, eAX */
13752 delta = dis_op_imm_A( sz, False, Iop_Add8, True, delta, "add" );
13753 break;
13754
13755 case 0x0C: /* OR Ib, AL */
13756 delta = dis_op_imm_A( 1, False, Iop_Or8, True, delta, "or" );
13757 break;
13758 case 0x0D: /* OR Iv, eAX */
13759 delta = dis_op_imm_A( sz, False, Iop_Or8, True, delta, "or" );
13760 break;
13761
13762 case 0x14: /* ADC Ib, AL */
13763 delta = dis_op_imm_A( 1, True, Iop_Add8, True, delta, "adc" );
13764 break;
13765 case 0x15: /* ADC Iv, eAX */
13766 delta = dis_op_imm_A( sz, True, Iop_Add8, True, delta, "adc" );
13767 break;
13768
13769 case 0x1C: /* SBB Ib, AL */
13770 delta = dis_op_imm_A( 1, True, Iop_Sub8, True, delta, "sbb" );
13771 break;
13772 case 0x1D: /* SBB Iv, eAX */
13773 delta = dis_op_imm_A( sz, True, Iop_Sub8, True, delta, "sbb" );
13774 break;
13775
13776 case 0x24: /* AND Ib, AL */
13777 delta = dis_op_imm_A( 1, False, Iop_And8, True, delta, "and" );
13778 break;
13779 case 0x25: /* AND Iv, eAX */
13780 delta = dis_op_imm_A( sz, False, Iop_And8, True, delta, "and" );
13781 break;
13782
13783 case 0x2C: /* SUB Ib, AL */
13784 delta = dis_op_imm_A( 1, False, Iop_Sub8, True, delta, "sub" );
13785 break;
13786 case 0x2D: /* SUB Iv, eAX */
13787 delta = dis_op_imm_A( sz, False, Iop_Sub8, True, delta, "sub" );
13788 break;
13789
13790 case 0x34: /* XOR Ib, AL */
13791 delta = dis_op_imm_A( 1, False, Iop_Xor8, True, delta, "xor" );
13792 break;
13793 case 0x35: /* XOR Iv, eAX */
13794 delta = dis_op_imm_A( sz, False, Iop_Xor8, True, delta, "xor" );
13795 break;
13796
13797 case 0x3C: /* CMP Ib, AL */
13798 delta = dis_op_imm_A( 1, False, Iop_Sub8, False, delta, "cmp" );
13799 break;
13800 case 0x3D: /* CMP Iv, eAX */
13801 delta = dis_op_imm_A( sz, False, Iop_Sub8, False, delta, "cmp" );
13802 break;
13803
13804 case 0xA8: /* TEST Ib, AL */
13805 delta = dis_op_imm_A( 1, False, Iop_And8, False, delta, "test" );
13806 break;
13807 case 0xA9: /* TEST Iv, eAX */
13808 delta = dis_op_imm_A( sz, False, Iop_And8, False, delta, "test" );
13809 break;
13810
13811 /* ------------------------ opl Ev, Gv ----------------- */
13812
13813 case 0x02: /* ADD Eb,Gb */
13814 delta = dis_op2_E_G ( sorb, False, Iop_Add8, True, 1, delta, "add" );
13815 break;
13816 case 0x03: /* ADD Ev,Gv */
13817 delta = dis_op2_E_G ( sorb, False, Iop_Add8, True, sz, delta, "add" );
13818 break;
13819
13820 case 0x0A: /* OR Eb,Gb */
13821 delta = dis_op2_E_G ( sorb, False, Iop_Or8, True, 1, delta, "or" );
13822 break;
13823 case 0x0B: /* OR Ev,Gv */
13824 delta = dis_op2_E_G ( sorb, False, Iop_Or8, True, sz, delta, "or" );
13825 break;
13826
13827 case 0x12: /* ADC Eb,Gb */
13828 delta = dis_op2_E_G ( sorb, True, Iop_Add8, True, 1, delta, "adc" );
13829 break;
13830 case 0x13: /* ADC Ev,Gv */
13831 delta = dis_op2_E_G ( sorb, True, Iop_Add8, True, sz, delta, "adc" );
13832 break;
13833
13834 case 0x1A: /* SBB Eb,Gb */
13835 delta = dis_op2_E_G ( sorb, True, Iop_Sub8, True, 1, delta, "sbb" );
13836 break;
13837 case 0x1B: /* SBB Ev,Gv */
13838 delta = dis_op2_E_G ( sorb, True, Iop_Sub8, True, sz, delta, "sbb" );
13839 break;
13840
13841 case 0x22: /* AND Eb,Gb */
13842 delta = dis_op2_E_G ( sorb, False, Iop_And8, True, 1, delta, "and" );
13843 break;
13844 case 0x23: /* AND Ev,Gv */
13845 delta = dis_op2_E_G ( sorb, False, Iop_And8, True, sz, delta, "and" );
13846 break;
13847
13848 case 0x2A: /* SUB Eb,Gb */
13849 delta = dis_op2_E_G ( sorb, False, Iop_Sub8, True, 1, delta, "sub" );
13850 break;
13851 case 0x2B: /* SUB Ev,Gv */
13852 delta = dis_op2_E_G ( sorb, False, Iop_Sub8, True, sz, delta, "sub" );
13853 break;
13854
13855 case 0x32: /* XOR Eb,Gb */
13856 delta = dis_op2_E_G ( sorb, False, Iop_Xor8, True, 1, delta, "xor" );
13857 break;
13858 case 0x33: /* XOR Ev,Gv */
13859 delta = dis_op2_E_G ( sorb, False, Iop_Xor8, True, sz, delta, "xor" );
13860 break;
13861
13862 case 0x3A: /* CMP Eb,Gb */
13863 delta = dis_op2_E_G ( sorb, False, Iop_Sub8, False, 1, delta, "cmp" );
13864 break;
13865 case 0x3B: /* CMP Ev,Gv */
13866 delta = dis_op2_E_G ( sorb, False, Iop_Sub8, False, sz, delta, "cmp" );
13867 break;
13868
13869 case 0x84: /* TEST Eb,Gb */
13870 delta = dis_op2_E_G ( sorb, False, Iop_And8, False, 1, delta, "test" );
13871 break;
13872 case 0x85: /* TEST Ev,Gv */
13873 delta = dis_op2_E_G ( sorb, False, Iop_And8, False, sz, delta, "test" );
13874 break;
13875
13876 /* ------------------------ opl Gv, Ev ----------------- */
13877
13878 case 0x00: /* ADD Gb,Eb */
13879 delta = dis_op2_G_E ( sorb, pfx_lock, False,
13880 Iop_Add8, True, 1, delta, "add" );
13881 break;
13882 case 0x01: /* ADD Gv,Ev */
13883 delta = dis_op2_G_E ( sorb, pfx_lock, False,
13884 Iop_Add8, True, sz, delta, "add" );
13885 break;
13886
13887 case 0x08: /* OR Gb,Eb */
13888 delta = dis_op2_G_E ( sorb, pfx_lock, False,
13889 Iop_Or8, True, 1, delta, "or" );
13890 break;
13891 case 0x09: /* OR Gv,Ev */
13892 delta = dis_op2_G_E ( sorb, pfx_lock, False,
13893 Iop_Or8, True, sz, delta, "or" );
13894 break;
13895
13896 case 0x10: /* ADC Gb,Eb */
13897 delta = dis_op2_G_E ( sorb, pfx_lock, True,
13898 Iop_Add8, True, 1, delta, "adc" );
13899 break;
13900 case 0x11: /* ADC Gv,Ev */
13901 delta = dis_op2_G_E ( sorb, pfx_lock, True,
13902 Iop_Add8, True, sz, delta, "adc" );
13903 break;
13904
13905 case 0x18: /* SBB Gb,Eb */
13906 delta = dis_op2_G_E ( sorb, pfx_lock, True,
13907 Iop_Sub8, True, 1, delta, "sbb" );
13908 break;
13909 case 0x19: /* SBB Gv,Ev */
13910 delta = dis_op2_G_E ( sorb, pfx_lock, True,
13911 Iop_Sub8, True, sz, delta, "sbb" );
13912 break;
13913
13914 case 0x20: /* AND Gb,Eb */
13915 delta = dis_op2_G_E ( sorb, pfx_lock, False,
13916 Iop_And8, True, 1, delta, "and" );
13917 break;
13918 case 0x21: /* AND Gv,Ev */
13919 delta = dis_op2_G_E ( sorb, pfx_lock, False,
13920 Iop_And8, True, sz, delta, "and" );
13921 break;
13922
13923 case 0x28: /* SUB Gb,Eb */
13924 delta = dis_op2_G_E ( sorb, pfx_lock, False,
13925 Iop_Sub8, True, 1, delta, "sub" );
13926 break;
13927 case 0x29: /* SUB Gv,Ev */
13928 delta = dis_op2_G_E ( sorb, pfx_lock, False,
13929 Iop_Sub8, True, sz, delta, "sub" );
13930 break;
13931
13932 case 0x30: /* XOR Gb,Eb */
13933 delta = dis_op2_G_E ( sorb, pfx_lock, False,
13934 Iop_Xor8, True, 1, delta, "xor" );
13935 break;
13936 case 0x31: /* XOR Gv,Ev */
13937 delta = dis_op2_G_E ( sorb, pfx_lock, False,
13938 Iop_Xor8, True, sz, delta, "xor" );
13939 break;
13940
13941 case 0x38: /* CMP Gb,Eb */
13942 delta = dis_op2_G_E ( sorb, pfx_lock, False,
13943 Iop_Sub8, False, 1, delta, "cmp" );
13944 break;
13945 case 0x39: /* CMP Gv,Ev */
13946 delta = dis_op2_G_E ( sorb, pfx_lock, False,
13947 Iop_Sub8, False, sz, delta, "cmp" );
13948 break;
13949
13950 /* ------------------------ POP ------------------------ */
13951
13952 case 0x58: /* POP eAX */
13953 case 0x59: /* POP eCX */
13954 case 0x5A: /* POP eDX */
13955 case 0x5B: /* POP eBX */
13956 case 0x5D: /* POP eBP */
13957 case 0x5E: /* POP eSI */
13958 case 0x5F: /* POP eDI */
13959 case 0x5C: /* POP eSP */
13960 vassert(sz == 2 || sz == 4);
13961 t1 = newTemp(szToITy(sz)); t2 = newTemp(Ity_I32);
13962 assign(t2, getIReg(4, R_ESP));
13963 assign(t1, loadLE(szToITy(sz),mkexpr(t2)));
13964 putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(t2), mkU32(sz)));
13965 putIReg(sz, opc-0x58, mkexpr(t1));
13966 DIP("pop%c %s\n", nameISize(sz), nameIReg(sz,opc-0x58));
13967 break;
13968
13969 case 0x9D: /* POPF */
13970 vassert(sz == 2 || sz == 4);
13971 t1 = newTemp(Ity_I32); t2 = newTemp(Ity_I32);
13972 assign(t2, getIReg(4, R_ESP));
13973 assign(t1, widenUto32(loadLE(szToITy(sz),mkexpr(t2))));
13974 putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(t2), mkU32(sz)));
13975
13976 /* Generate IR to set %EFLAGS{O,S,Z,A,C,P,D,ID,AC} from the
13977 value in t1. */
13978 set_EFLAGS_from_value( t1, True/*emit_AC_emwarn*/,
13979 ((Addr32)guest_EIP_bbstart)+delta );
13980
13981 DIP("popf%c\n", nameISize(sz));
13982 break;
13983
13984 case 0x61: /* POPA */
13985 /* This is almost certainly wrong for sz==2. So ... */
13986 if (sz != 4) goto decode_failure;
13987
13988 /* t5 is the old %ESP value. */
13989 t5 = newTemp(Ity_I32);
13990 assign( t5, getIReg(4, R_ESP) );
13991
13992 /* Reload all the registers, except %esp. */
13993 putIReg(4,R_EAX, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32(28)) ));
13994 putIReg(4,R_ECX, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32(24)) ));
13995 putIReg(4,R_EDX, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32(20)) ));
13996 putIReg(4,R_EBX, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32(16)) ));
13997 /* ignore saved %ESP */
13998 putIReg(4,R_EBP, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32( 8)) ));
13999 putIReg(4,R_ESI, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32( 4)) ));
14000 putIReg(4,R_EDI, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32( 0)) ));
14001
14002 /* and move %ESP back up */
14003 putIReg( 4, R_ESP, binop(Iop_Add32, mkexpr(t5), mkU32(8*4)) );
14004
14005 DIP("popa%c\n", nameISize(sz));
14006 break;
14007
14008 case 0x8F: /* POPL/POPW m32 */
14009 { Int len;
14010 UChar rm = getIByte(delta);
14011
14012 /* make sure this instruction is correct POP */
14013 if (epartIsReg(rm) || gregOfRM(rm) != 0)
14014 goto decode_failure;
14015 /* and has correct size */
14016 if (sz != 4 && sz != 2)
14017 goto decode_failure;
14018 ty = szToITy(sz);
14019
14020 t1 = newTemp(Ity_I32); /* stack address */
14021 t3 = newTemp(ty); /* data */
14022 /* set t1 to ESP: t1 = ESP */
14023 assign( t1, getIReg(4, R_ESP) );
14024 /* load M[ESP] to virtual register t3: t3 = M[t1] */
14025 assign( t3, loadLE(ty, mkexpr(t1)) );
14026
14027 /* increase ESP; must be done before the STORE. Intel manual says:
14028 If the ESP register is used as a base register for addressing
14029 a destination operand in memory, the POP instruction computes
14030 the effective address of the operand after it increments the
14031 ESP register.
14032 */
14033 putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(t1), mkU32(sz)) );
14034
14035 /* resolve MODR/M */
14036 addr = disAMode ( &len, sorb, delta, dis_buf);
14037 storeLE( mkexpr(addr), mkexpr(t3) );
14038
14039 DIP("pop%c %s\n", sz==2 ? 'w' : 'l', dis_buf);
14040
14041 delta += len;
14042 break;
14043 }
14044
14045 case 0x1F: /* POP %DS */
14046 dis_pop_segreg( R_DS, sz ); break;
14047 case 0x07: /* POP %ES */
14048 dis_pop_segreg( R_ES, sz ); break;
14049 case 0x17: /* POP %SS */
14050 dis_pop_segreg( R_SS, sz ); break;
14051
14052 /* ------------------------ PUSH ----------------------- */
14053
14054 case 0x50: /* PUSH eAX */
14055 case 0x51: /* PUSH eCX */
14056 case 0x52: /* PUSH eDX */
14057 case 0x53: /* PUSH eBX */
14058 case 0x55: /* PUSH eBP */
14059 case 0x56: /* PUSH eSI */
14060 case 0x57: /* PUSH eDI */
14061 case 0x54: /* PUSH eSP */
14062 /* This is the Right Way, in that the value to be pushed is
14063 established before %esp is changed, so that pushl %esp
14064 correctly pushes the old value. */
14065 vassert(sz == 2 || sz == 4);
14066 ty = sz==2 ? Ity_I16 : Ity_I32;
14067 t1 = newTemp(ty); t2 = newTemp(Ity_I32);
14068 assign(t1, getIReg(sz, opc-0x50));
14069 assign(t2, binop(Iop_Sub32, getIReg(4, R_ESP), mkU32(sz)));
14070 putIReg(4, R_ESP, mkexpr(t2) );
14071 storeLE(mkexpr(t2),mkexpr(t1));
14072 DIP("push%c %s\n", nameISize(sz), nameIReg(sz,opc-0x50));
14073 break;
14074
14075
14076 case 0x68: /* PUSH Iv */
14077 d32 = getUDisp(sz,delta); delta += sz;
14078 goto do_push_I;
14079 case 0x6A: /* PUSH Ib, sign-extended to sz */
14080 d32 = getSDisp8(delta); delta += 1;
14081 goto do_push_I;
14082 do_push_I:
14083 ty = szToITy(sz);
14084 t1 = newTemp(Ity_I32); t2 = newTemp(ty);
14085 assign( t1, binop(Iop_Sub32,getIReg(4,R_ESP),mkU32(sz)) );
14086 putIReg(4, R_ESP, mkexpr(t1) );
14087 /* stop mkU16 asserting if d32 is a negative 16-bit number
14088 (bug #132813) */
14089 if (ty == Ity_I16)
14090 d32 &= 0xFFFF;
14091 storeLE( mkexpr(t1), mkU(ty,d32) );
14092 DIP("push%c $0x%x\n", nameISize(sz), d32);
14093 break;
14094
14095 case 0x9C: /* PUSHF */ {
14096 vassert(sz == 2 || sz == 4);
14097
14098 t1 = newTemp(Ity_I32);
14099 assign( t1, binop(Iop_Sub32,getIReg(4,R_ESP),mkU32(sz)) );
14100 putIReg(4, R_ESP, mkexpr(t1) );
14101
14102 /* Calculate OSZACP, and patch in fixed fields as per
14103 Intel docs.
14104 - bit 1 is always 1
14105 - bit 9 is Interrupt Enable (should always be 1 in user mode?)
14106 */
14107 t2 = newTemp(Ity_I32);
14108 assign( t2, binop(Iop_Or32,
14109 mk_x86g_calculate_eflags_all(),
14110 mkU32( (1<<1)|(1<<9) ) ));
14111
14112 /* Patch in the D flag. This can simply be a copy of bit 10 of
14113 baseBlock[OFFB_DFLAG]. */
14114 t3 = newTemp(Ity_I32);
14115 assign( t3, binop(Iop_Or32,
14116 mkexpr(t2),
14117 binop(Iop_And32,
14118 IRExpr_Get(OFFB_DFLAG,Ity_I32),
14119 mkU32(1<<10)))
14120 );
14121
14122 /* And patch in the ID flag. */
14123 t4 = newTemp(Ity_I32);
14124 assign( t4, binop(Iop_Or32,
14125 mkexpr(t3),
14126 binop(Iop_And32,
14127 binop(Iop_Shl32, IRExpr_Get(OFFB_IDFLAG,Ity_I32),
14128 mkU8(21)),
14129 mkU32(1<<21)))
14130 );
14131
14132 /* And patch in the AC flag. */
14133 t5 = newTemp(Ity_I32);
14134 assign( t5, binop(Iop_Or32,
14135 mkexpr(t4),
14136 binop(Iop_And32,
14137 binop(Iop_Shl32, IRExpr_Get(OFFB_ACFLAG,Ity_I32),
14138 mkU8(18)),
14139 mkU32(1<<18)))
14140 );
14141
14142 /* if sz==2, the stored value needs to be narrowed. */
14143 if (sz == 2)
14144 storeLE( mkexpr(t1), unop(Iop_32to16,mkexpr(t5)) );
14145 else
14146 storeLE( mkexpr(t1), mkexpr(t5) );
14147
14148 DIP("pushf%c\n", nameISize(sz));
14149 break;
14150 }
14151
14152 case 0x60: /* PUSHA */
14153 /* This is almost certainly wrong for sz==2. So ... */
14154 if (sz != 4) goto decode_failure;
14155
14156 /* This is the Right Way, in that the value to be pushed is
14157 established before %esp is changed, so that pusha
14158 correctly pushes the old %esp value. New value of %esp is
14159 pushed at start. */
14160 /* t0 is the %ESP value we're going to push. */
14161 t0 = newTemp(Ity_I32);
14162 assign( t0, getIReg(4, R_ESP) );
14163
14164 /* t5 will be the new %ESP value. */
14165 t5 = newTemp(Ity_I32);
14166 assign( t5, binop(Iop_Sub32, mkexpr(t0), mkU32(8*4)) );
14167
14168 /* Update guest state before prodding memory. */
14169 putIReg(4, R_ESP, mkexpr(t5));
14170
14171 /* Dump all the registers. */
14172 storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(28)), getIReg(4,R_EAX) );
14173 storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(24)), getIReg(4,R_ECX) );
14174 storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(20)), getIReg(4,R_EDX) );
14175 storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(16)), getIReg(4,R_EBX) );
14176 storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(12)), mkexpr(t0) /*esp*/);
14177 storeLE( binop(Iop_Add32,mkexpr(t5),mkU32( 8)), getIReg(4,R_EBP) );
14178 storeLE( binop(Iop_Add32,mkexpr(t5),mkU32( 4)), getIReg(4,R_ESI) );
14179 storeLE( binop(Iop_Add32,mkexpr(t5),mkU32( 0)), getIReg(4,R_EDI) );
14180
14181 DIP("pusha%c\n", nameISize(sz));
14182 break;
14183
14184 case 0x0E: /* PUSH %CS */
14185 dis_push_segreg( R_CS, sz ); break;
14186 case 0x1E: /* PUSH %DS */
14187 dis_push_segreg( R_DS, sz ); break;
14188 case 0x06: /* PUSH %ES */
14189 dis_push_segreg( R_ES, sz ); break;
14190 case 0x16: /* PUSH %SS */
14191 dis_push_segreg( R_SS, sz ); break;
14192
14193 /* ------------------------ SCAS et al ----------------- */
14194
14195 case 0xA4: /* MOVS, no REP prefix */
14196 case 0xA5:
14197 if (sorb != 0)
14198 goto decode_failure; /* else dis_string_op asserts */
14199 dis_string_op( dis_MOVS, ( opc == 0xA4 ? 1 : sz ), "movs", sorb );
14200 break;
14201
14202 case 0xA6: /* CMPSb, no REP prefix */
14203 case 0xA7:
14204 if (sorb != 0)
14205 goto decode_failure; /* else dis_string_op asserts */
14206 dis_string_op( dis_CMPS, ( opc == 0xA6 ? 1 : sz ), "cmps", sorb );
14207 break;
14208
14209 case 0xAA: /* STOS, no REP prefix */
14210 case 0xAB:
14211 if (sorb != 0)
14212 goto decode_failure; /* else dis_string_op asserts */
14213 dis_string_op( dis_STOS, ( opc == 0xAA ? 1 : sz ), "stos", sorb );
14214 break;
14215
14216 case 0xAC: /* LODS, no REP prefix */
14217 case 0xAD:
14218 if (sorb != 0)
14219 goto decode_failure; /* else dis_string_op asserts */
14220 dis_string_op( dis_LODS, ( opc == 0xAC ? 1 : sz ), "lods", sorb );
14221 break;
14222
14223 case 0xAE: /* SCAS, no REP prefix */
14224 case 0xAF:
14225 if (sorb != 0)
14226 goto decode_failure; /* else dis_string_op asserts */
14227 dis_string_op( dis_SCAS, ( opc == 0xAE ? 1 : sz ), "scas", sorb );
14228 break;
14229
14230
14231 case 0xFC: /* CLD */
14232 stmt( IRStmt_Put( OFFB_DFLAG, mkU32(1)) );
14233 DIP("cld\n");
14234 break;
14235
14236 case 0xFD: /* STD */
14237 stmt( IRStmt_Put( OFFB_DFLAG, mkU32(0xFFFFFFFF)) );
14238 DIP("std\n");
14239 break;
14240
14241 case 0xF8: /* CLC */
14242 case 0xF9: /* STC */
14243 case 0xF5: /* CMC */
14244 t0 = newTemp(Ity_I32);
14245 t1 = newTemp(Ity_I32);
14246 assign( t0, mk_x86g_calculate_eflags_all() );
14247 switch (opc) {
14248 case 0xF8:
14249 assign( t1, binop(Iop_And32, mkexpr(t0),
14250 mkU32(~X86G_CC_MASK_C)));
14251 DIP("clc\n");
14252 break;
14253 case 0xF9:
14254 assign( t1, binop(Iop_Or32, mkexpr(t0),
14255 mkU32(X86G_CC_MASK_C)));
14256 DIP("stc\n");
14257 break;
14258 case 0xF5:
14259 assign( t1, binop(Iop_Xor32, mkexpr(t0),
14260 mkU32(X86G_CC_MASK_C)));
14261 DIP("cmc\n");
14262 break;
14263 default:
14264 vpanic("disInstr(x86)(clc/stc/cmc)");
14265 }
14266 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
14267 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
14268 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t1) ));
14269 /* Set NDEP even though it isn't used. This makes redundant-PUT
14270 elimination of previous stores to this field work better. */
14271 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
14272 break;
14273
14274 case 0xD6: /* SALC */
14275 t0 = newTemp(Ity_I32);
14276 t1 = newTemp(Ity_I32);
14277 assign( t0, binop(Iop_And32,
14278 mk_x86g_calculate_eflags_c(),
14279 mkU32(1)) );
14280 assign( t1, binop(Iop_Sar32,
14281 binop(Iop_Shl32, mkexpr(t0), mkU8(31)),
14282 mkU8(31)) );
14283 putIReg(1, R_EAX, unop(Iop_32to8, mkexpr(t1)) );
14284 DIP("salc\n");
14285 break;
14286
14287 /* REPNE prefix insn */
14288 case 0xF2: {
14289 Addr32 eip_orig = guest_EIP_bbstart + delta_start;
14290 if (sorb != 0) goto decode_failure;
14291 abyte = getIByte(delta); delta++;
14292
14293 if (abyte == 0x66) { sz = 2; abyte = getIByte(delta); delta++; }
14294
14295 switch (abyte) {
14296 /* According to the Intel manual, "repne movs" should never occur, but
14297 * in practice it has happened, so allow for it here... */
14298 case 0xA4: sz = 1; /* REPNE MOVS<sz> */
14299 case 0xA5:
14300 dis_REP_op ( &dres, X86CondNZ, dis_MOVS, sz, eip_orig,
14301 guest_EIP_bbstart+delta, "repne movs" );
14302 break;
14303
14304 case 0xA6: sz = 1; /* REPNE CMP<sz> */
14305 case 0xA7:
14306 dis_REP_op ( &dres, X86CondNZ, dis_CMPS, sz, eip_orig,
14307 guest_EIP_bbstart+delta, "repne cmps" );
14308 break;
14309
14310 case 0xAA: sz = 1; /* REPNE STOS<sz> */
14311 case 0xAB:
14312 dis_REP_op ( &dres, X86CondNZ, dis_STOS, sz, eip_orig,
14313 guest_EIP_bbstart+delta, "repne stos" );
14314 break;
14315
14316 case 0xAE: sz = 1; /* REPNE SCAS<sz> */
14317 case 0xAF:
14318 dis_REP_op ( &dres, X86CondNZ, dis_SCAS, sz, eip_orig,
14319 guest_EIP_bbstart+delta, "repne scas" );
14320 break;
14321
14322 default:
14323 goto decode_failure;
14324 }
14325 break;
14326 }
14327
14328 /* REP/REPE prefix insn (for SCAS and CMPS, 0xF3 means REPE,
14329 for the rest, it means REP) */
14330 case 0xF3: {
14331 Addr32 eip_orig = guest_EIP_bbstart + delta_start;
14332 abyte = getIByte(delta); delta++;
14333
14334 if (abyte == 0x66) { sz = 2; abyte = getIByte(delta); delta++; }
14335
14336 if (sorb != 0 && abyte != 0x0F) goto decode_failure;
14337
14338 switch (abyte) {
14339 case 0x0F:
14340 switch (getIByte(delta)) {
14341 /* On older CPUs, TZCNT behaves the same as BSF. */
14342 case 0xBC: /* REP BSF Gv,Ev */
14343 delta = dis_bs_E_G ( sorb, sz, delta + 1, True );
14344 break;
14345 /* On older CPUs, LZCNT behaves the same as BSR. */
14346 case 0xBD: /* REP BSR Gv,Ev */
14347 delta = dis_bs_E_G ( sorb, sz, delta + 1, False );
14348 break;
14349 default:
14350 goto decode_failure;
14351 }
14352 break;
14353
14354 case 0xA4: sz = 1; /* REP MOVS<sz> */
14355 case 0xA5:
14356 dis_REP_op ( &dres, X86CondAlways, dis_MOVS, sz, eip_orig,
14357 guest_EIP_bbstart+delta, "rep movs" );
14358 break;
14359
14360 case 0xA6: sz = 1; /* REPE CMP<sz> */
14361 case 0xA7:
14362 dis_REP_op ( &dres, X86CondZ, dis_CMPS, sz, eip_orig,
14363 guest_EIP_bbstart+delta, "repe cmps" );
14364 break;
14365
14366 case 0xAA: sz = 1; /* REP STOS<sz> */
14367 case 0xAB:
14368 dis_REP_op ( &dres, X86CondAlways, dis_STOS, sz, eip_orig,
14369 guest_EIP_bbstart+delta, "rep stos" );
14370 break;
14371
14372 case 0xAC: sz = 1; /* REP LODS<sz> */
14373 case 0xAD:
14374 dis_REP_op ( &dres, X86CondAlways, dis_LODS, sz, eip_orig,
14375 guest_EIP_bbstart+delta, "rep lods" );
14376 break;
14377
14378 case 0xAE: sz = 1; /* REPE SCAS<sz> */
14379 case 0xAF:
14380 dis_REP_op ( &dres, X86CondZ, dis_SCAS, sz, eip_orig,
14381 guest_EIP_bbstart+delta, "repe scas" );
14382 break;
14383
14384 case 0x90: /* REP NOP (PAUSE) */
14385 /* a hint to the P4 re spin-wait loop */
14386 DIP("rep nop (P4 pause)\n");
14387 /* "observe" the hint. The Vex client needs to be careful not
14388 to cause very long delays as a result, though. */
14389 jmp_lit(&dres, Ijk_Yield, ((Addr32)guest_EIP_bbstart)+delta);
14390 vassert(dres.whatNext == Dis_StopHere);
14391 break;
14392
14393 case 0xC3: /* REP RET -- same as normal ret? */
14394 dis_ret(&dres, 0);
14395 DIP("rep ret\n");
14396 break;
14397
14398 default:
14399 goto decode_failure;
14400 }
14401 break;
14402 }
14403
14404 /* ------------------------ XCHG ----------------------- */
14405
14406 /* XCHG reg,mem automatically asserts LOCK# even without a LOCK
14407 prefix; hence it must be translated with an IRCAS (at least, the
14408 memory variant). */
14409 case 0x86: /* XCHG Gb,Eb */
14410 sz = 1;
14411 /* Fall through ... */
14412 case 0x87: /* XCHG Gv,Ev */
14413 modrm = getIByte(delta);
14414 ty = szToITy(sz);
14415 t1 = newTemp(ty); t2 = newTemp(ty);
14416 if (epartIsReg(modrm)) {
14417 assign(t1, getIReg(sz, eregOfRM(modrm)));
14418 assign(t2, getIReg(sz, gregOfRM(modrm)));
14419 putIReg(sz, gregOfRM(modrm), mkexpr(t1));
14420 putIReg(sz, eregOfRM(modrm), mkexpr(t2));
14421 delta++;
14422 DIP("xchg%c %s, %s\n",
14423 nameISize(sz), nameIReg(sz,gregOfRM(modrm)),
14424 nameIReg(sz,eregOfRM(modrm)));
14425 } else {
14426 *expect_CAS = True;
14427 addr = disAMode ( &alen, sorb, delta, dis_buf );
14428 assign( t1, loadLE(ty,mkexpr(addr)) );
14429 assign( t2, getIReg(sz,gregOfRM(modrm)) );
14430 casLE( mkexpr(addr),
14431 mkexpr(t1), mkexpr(t2), guest_EIP_curr_instr );
14432 putIReg( sz, gregOfRM(modrm), mkexpr(t1) );
14433 delta += alen;
14434 DIP("xchg%c %s, %s\n", nameISize(sz),
14435 nameIReg(sz,gregOfRM(modrm)), dis_buf);
14436 }
14437 break;
14438
14439 case 0x90: /* XCHG eAX,eAX */
14440 DIP("nop\n");
14441 break;
14442 case 0x91: /* XCHG eAX,eCX */
14443 case 0x92: /* XCHG eAX,eDX */
14444 case 0x93: /* XCHG eAX,eBX */
14445 case 0x94: /* XCHG eAX,eSP */
14446 case 0x95: /* XCHG eAX,eBP */
14447 case 0x96: /* XCHG eAX,eSI */
14448 case 0x97: /* XCHG eAX,eDI */
14449 codegen_xchg_eAX_Reg ( sz, opc - 0x90 );
14450 break;
14451
14452 /* ------------------------ XLAT ----------------------- */
14453
14454 case 0xD7: /* XLAT */
14455 if (sz != 4) goto decode_failure; /* sz == 2 is also allowed (0x66) */
14456 putIReg(
14457 1,
14458 R_EAX/*AL*/,
14459 loadLE(Ity_I8,
14460 handleSegOverride(
14461 sorb,
14462 binop(Iop_Add32,
14463 getIReg(4, R_EBX),
14464 unop(Iop_8Uto32, getIReg(1, R_EAX/*AL*/))))));
14465
14466 DIP("xlat%c [ebx]\n", nameISize(sz));
14467 break;
14468
14469 /* ------------------------ IN / OUT ----------------------- */
14470
14471 case 0xE4: /* IN imm8, AL */
14472 sz = 1;
14473 t1 = newTemp(Ity_I32);
14474 abyte = getIByte(delta); delta++;
14475 assign(t1, mkU32( abyte & 0xFF ));
14476 DIP("in%c $%d,%s\n", nameISize(sz), abyte, nameIReg(sz,R_EAX));
14477 goto do_IN;
14478 case 0xE5: /* IN imm8, eAX */
14479 vassert(sz == 2 || sz == 4);
14480 t1 = newTemp(Ity_I32);
14481 abyte = getIByte(delta); delta++;
14482 assign(t1, mkU32( abyte & 0xFF ));
14483 DIP("in%c $%d,%s\n", nameISize(sz), abyte, nameIReg(sz,R_EAX));
14484 goto do_IN;
14485 case 0xEC: /* IN %DX, AL */
14486 sz = 1;
14487 t1 = newTemp(Ity_I32);
14488 assign(t1, unop(Iop_16Uto32, getIReg(2, R_EDX)));
14489 DIP("in%c %s,%s\n", nameISize(sz), nameIReg(2,R_EDX),
14490 nameIReg(sz,R_EAX));
14491 goto do_IN;
14492 case 0xED: /* IN %DX, eAX */
14493 vassert(sz == 2 || sz == 4);
14494 t1 = newTemp(Ity_I32);
14495 assign(t1, unop(Iop_16Uto32, getIReg(2, R_EDX)));
14496 DIP("in%c %s,%s\n", nameISize(sz), nameIReg(2,R_EDX),
14497 nameIReg(sz,R_EAX));
14498 goto do_IN;
14499 do_IN: {
14500 /* At this point, sz indicates the width, and t1 is a 32-bit
14501 value giving port number. */
14502 IRDirty* d;
14503 vassert(sz == 1 || sz == 2 || sz == 4);
14504 ty = szToITy(sz);
14505 t2 = newTemp(Ity_I32);
14506 d = unsafeIRDirty_1_N(
14507 t2,
14508 0/*regparms*/,
14509 "x86g_dirtyhelper_IN",
14510 &x86g_dirtyhelper_IN,
14511 mkIRExprVec_2( mkexpr(t1), mkU32(sz) )
14512 );
14513 /* do the call, dumping the result in t2. */
14514 stmt( IRStmt_Dirty(d) );
14515 putIReg(sz, R_EAX, narrowTo( ty, mkexpr(t2) ) );
14516 break;
14517 }
14518
14519 case 0xE6: /* OUT AL, imm8 */
14520 sz = 1;
14521 t1 = newTemp(Ity_I32);
14522 abyte = getIByte(delta); delta++;
14523 assign( t1, mkU32( abyte & 0xFF ) );
14524 DIP("out%c %s,$%d\n", nameISize(sz), nameIReg(sz,R_EAX), abyte);
14525 goto do_OUT;
14526 case 0xE7: /* OUT eAX, imm8 */
14527 vassert(sz == 2 || sz == 4);
14528 t1 = newTemp(Ity_I32);
14529 abyte = getIByte(delta); delta++;
14530 assign( t1, mkU32( abyte & 0xFF ) );
14531 DIP("out%c %s,$%d\n", nameISize(sz), nameIReg(sz,R_EAX), abyte);
14532 goto do_OUT;
14533 case 0xEE: /* OUT AL, %DX */
14534 sz = 1;
14535 t1 = newTemp(Ity_I32);
14536 assign( t1, unop(Iop_16Uto32, getIReg(2, R_EDX)) );
14537 DIP("out%c %s,%s\n", nameISize(sz), nameIReg(sz,R_EAX),
14538 nameIReg(2,R_EDX));
14539 goto do_OUT;
14540 case 0xEF: /* OUT eAX, %DX */
14541 vassert(sz == 2 || sz == 4);
14542 t1 = newTemp(Ity_I32);
14543 assign( t1, unop(Iop_16Uto32, getIReg(2, R_EDX)) );
14544 DIP("out%c %s,%s\n", nameISize(sz), nameIReg(sz,R_EAX),
14545 nameIReg(2,R_EDX));
14546 goto do_OUT;
14547 do_OUT: {
14548 /* At this point, sz indicates the width, and t1 is a 32-bit
14549 value giving port number. */
14550 IRDirty* d;
14551 vassert(sz == 1 || sz == 2 || sz == 4);
14552 ty = szToITy(sz);
14553 d = unsafeIRDirty_0_N(
14554 0/*regparms*/,
14555 "x86g_dirtyhelper_OUT",
14556 &x86g_dirtyhelper_OUT,
14557 mkIRExprVec_3( mkexpr(t1),
14558 widenUto32( getIReg(sz, R_EAX) ),
14559 mkU32(sz) )
14560 );
14561 stmt( IRStmt_Dirty(d) );
14562 break;
14563 }
14564
14565 /* ------------------------ (Grp1 extensions) ---------- */
14566
14567 case 0x82: /* Grp1 Ib,Eb too. Apparently this is the same as
14568 case 0x80, but only in 32-bit mode. */
14569 /* fallthru */
14570 case 0x80: /* Grp1 Ib,Eb */
14571 modrm = getIByte(delta);
14572 am_sz = lengthAMode(delta);
14573 sz = 1;
14574 d_sz = 1;
14575 d32 = getUChar(delta + am_sz);
14576 delta = dis_Grp1 ( sorb, pfx_lock, delta, modrm, am_sz, d_sz, sz, d32 );
14577 break;
14578
14579 case 0x81: /* Grp1 Iv,Ev */
14580 modrm = getIByte(delta);
14581 am_sz = lengthAMode(delta);
14582 d_sz = sz;
14583 d32 = getUDisp(d_sz, delta + am_sz);
14584 delta = dis_Grp1 ( sorb, pfx_lock, delta, modrm, am_sz, d_sz, sz, d32 );
14585 break;
14586
14587 case 0x83: /* Grp1 Ib,Ev */
14588 modrm = getIByte(delta);
14589 am_sz = lengthAMode(delta);
14590 d_sz = 1;
14591 d32 = getSDisp8(delta + am_sz);
14592 delta = dis_Grp1 ( sorb, pfx_lock, delta, modrm, am_sz, d_sz, sz, d32 );
14593 break;
14594
14595 /* ------------------------ (Grp2 extensions) ---------- */
14596
14597 case 0xC0: { /* Grp2 Ib,Eb */
14598 Bool decode_OK = True;
14599 modrm = getIByte(delta);
14600 am_sz = lengthAMode(delta);
14601 d_sz = 1;
14602 d32 = getUChar(delta + am_sz);
14603 sz = 1;
14604 delta = dis_Grp2 ( sorb, delta, modrm, am_sz, d_sz, sz,
14605 mkU8(d32 & 0xFF), NULL, &decode_OK );
14606 if (!decode_OK)
14607 goto decode_failure;
14608 break;
14609 }
14610 case 0xC1: { /* Grp2 Ib,Ev */
14611 Bool decode_OK = True;
14612 modrm = getIByte(delta);
14613 am_sz = lengthAMode(delta);
14614 d_sz = 1;
14615 d32 = getUChar(delta + am_sz);
14616 delta = dis_Grp2 ( sorb, delta, modrm, am_sz, d_sz, sz,
14617 mkU8(d32 & 0xFF), NULL, &decode_OK );
14618 if (!decode_OK)
14619 goto decode_failure;
14620 break;
14621 }
14622 case 0xD0: { /* Grp2 1,Eb */
14623 Bool decode_OK = True;
14624 modrm = getIByte(delta);
14625 am_sz = lengthAMode(delta);
14626 d_sz = 0;
14627 d32 = 1;
14628 sz = 1;
14629 delta = dis_Grp2 ( sorb, delta, modrm, am_sz, d_sz, sz,
14630 mkU8(d32), NULL, &decode_OK );
14631 if (!decode_OK)
14632 goto decode_failure;
14633 break;
14634 }
14635 case 0xD1: { /* Grp2 1,Ev */
14636 Bool decode_OK = True;
14637 modrm = getUChar(delta);
14638 am_sz = lengthAMode(delta);
14639 d_sz = 0;
14640 d32 = 1;
14641 delta = dis_Grp2 ( sorb, delta, modrm, am_sz, d_sz, sz,
14642 mkU8(d32), NULL, &decode_OK );
14643 if (!decode_OK)
14644 goto decode_failure;
14645 break;
14646 }
14647 case 0xD2: { /* Grp2 CL,Eb */
14648 Bool decode_OK = True;
14649 modrm = getUChar(delta);
14650 am_sz = lengthAMode(delta);
14651 d_sz = 0;
14652 sz = 1;
14653 delta = dis_Grp2 ( sorb, delta, modrm, am_sz, d_sz, sz,
14654 getIReg(1,R_ECX), "%cl", &decode_OK );
14655 if (!decode_OK)
14656 goto decode_failure;
14657 break;
14658 }
14659 case 0xD3: { /* Grp2 CL,Ev */
14660 Bool decode_OK = True;
14661 modrm = getIByte(delta);
14662 am_sz = lengthAMode(delta);
14663 d_sz = 0;
14664 delta = dis_Grp2 ( sorb, delta, modrm, am_sz, d_sz, sz,
14665 getIReg(1,R_ECX), "%cl", &decode_OK );
14666 if (!decode_OK)
14667 goto decode_failure;
14668 break;
14669 }
14670
14671 /* ------------------------ (Grp3 extensions) ---------- */
14672
14673 case 0xF6: { /* Grp3 Eb */
14674 Bool decode_OK = True;
14675 delta = dis_Grp3 ( sorb, pfx_lock, 1, delta, &decode_OK );
14676 if (!decode_OK)
14677 goto decode_failure;
14678 break;
14679 }
14680 case 0xF7: { /* Grp3 Ev */
14681 Bool decode_OK = True;
14682 delta = dis_Grp3 ( sorb, pfx_lock, sz, delta, &decode_OK );
14683 if (!decode_OK)
14684 goto decode_failure;
14685 break;
14686 }
14687
14688 /* ------------------------ (Grp4 extensions) ---------- */
14689
14690 case 0xFE: { /* Grp4 Eb */
14691 Bool decode_OK = True;
14692 delta = dis_Grp4 ( sorb, pfx_lock, delta, &decode_OK );
14693 if (!decode_OK)
14694 goto decode_failure;
14695 break;
14696 }
14697
14698 /* ------------------------ (Grp5 extensions) ---------- */
14699
14700 case 0xFF: { /* Grp5 Ev */
14701 Bool decode_OK = True;
14702 delta = dis_Grp5 ( sorb, pfx_lock, sz, delta, &dres, &decode_OK );
14703 if (!decode_OK)
14704 goto decode_failure;
14705 break;
14706 }
14707
14708 /* ------------------------ Escapes to 2-byte opcodes -- */
14709
14710 case 0x0F: {
14711 opc = getIByte(delta); delta++;
14712 switch (opc) {
14713
14714 /* =-=-=-=-=-=-=-=-=- Grp8 =-=-=-=-=-=-=-=-=-=-=-= */
14715
14716 case 0xBA: { /* Grp8 Ib,Ev */
14717 Bool decode_OK = False;
14718 modrm = getUChar(delta);
14719 am_sz = lengthAMode(delta);
14720 d32 = getSDisp8(delta + am_sz);
14721 delta = dis_Grp8_Imm ( sorb, pfx_lock, delta, modrm,
14722 am_sz, sz, d32, &decode_OK );
14723 if (!decode_OK)
14724 goto decode_failure;
14725 break;
14726 }
14727
14728 /* =-=-=-=-=-=-=-=-=- BSF/BSR -=-=-=-=-=-=-=-=-=-= */
14729
14730 case 0xBC: /* BSF Gv,Ev */
14731 delta = dis_bs_E_G ( sorb, sz, delta, True );
14732 break;
14733 case 0xBD: /* BSR Gv,Ev */
14734 delta = dis_bs_E_G ( sorb, sz, delta, False );
14735 break;
14736
14737 /* =-=-=-=-=-=-=-=-=- BSWAP -=-=-=-=-=-=-=-=-=-=-= */
14738
14739 case 0xC8: /* BSWAP %eax */
14740 case 0xC9:
14741 case 0xCA:
14742 case 0xCB:
14743 case 0xCC:
14744 case 0xCD:
14745 case 0xCE:
14746 case 0xCF: /* BSWAP %edi */
14747 /* AFAICS from the Intel docs, this only exists at size 4. */
14748 if (sz != 4) goto decode_failure;
14749
14750 t1 = newTemp(Ity_I32);
14751 assign( t1, getIReg(4, opc-0xC8) );
14752 t2 = math_BSWAP(t1, Ity_I32);
14753
14754 putIReg(4, opc-0xC8, mkexpr(t2));
14755 DIP("bswapl %s\n", nameIReg(4, opc-0xC8));
14756 break;
14757
14758 /* =-=-=-=-=-=-=-=-=- BT/BTS/BTR/BTC =-=-=-=-=-=-= */
14759
14760 case 0xA3: /* BT Gv,Ev */
14761 delta = dis_bt_G_E ( vbi, sorb, pfx_lock, sz, delta, BtOpNone );
14762 break;
14763 case 0xB3: /* BTR Gv,Ev */
14764 delta = dis_bt_G_E ( vbi, sorb, pfx_lock, sz, delta, BtOpReset );
14765 break;
14766 case 0xAB: /* BTS Gv,Ev */
14767 delta = dis_bt_G_E ( vbi, sorb, pfx_lock, sz, delta, BtOpSet );
14768 break;
14769 case 0xBB: /* BTC Gv,Ev */
14770 delta = dis_bt_G_E ( vbi, sorb, pfx_lock, sz, delta, BtOpComp );
14771 break;
14772
14773 /* =-=-=-=-=-=-=-=-=- CMOV =-=-=-=-=-=-=-=-=-=-=-= */
14774
14775 case 0x40:
14776 case 0x41:
14777 case 0x42: /* CMOVBb/CMOVNAEb (cmov below) */
14778 case 0x43: /* CMOVNBb/CMOVAEb (cmov not below) */
14779 case 0x44: /* CMOVZb/CMOVEb (cmov zero) */
14780 case 0x45: /* CMOVNZb/CMOVNEb (cmov not zero) */
14781 case 0x46: /* CMOVBEb/CMOVNAb (cmov below or equal) */
14782 case 0x47: /* CMOVNBEb/CMOVAb (cmov not below or equal) */
14783 case 0x48: /* CMOVSb (cmov negative) */
14784 case 0x49: /* CMOVSb (cmov not negative) */
14785 case 0x4A: /* CMOVP (cmov parity even) */
14786 case 0x4B: /* CMOVNP (cmov parity odd) */
14787 case 0x4C: /* CMOVLb/CMOVNGEb (cmov less) */
14788 case 0x4D: /* CMOVGEb/CMOVNLb (cmov greater or equal) */
14789 case 0x4E: /* CMOVLEb/CMOVNGb (cmov less or equal) */
14790 case 0x4F: /* CMOVGb/CMOVNLEb (cmov greater) */
14791 delta = dis_cmov_E_G(sorb, sz, (X86Condcode)(opc - 0x40), delta);
14792 break;
14793
14794 /* =-=-=-=-=-=-=-=-=- CMPXCHG -=-=-=-=-=-=-=-=-=-= */
14795
14796 case 0xB0: /* CMPXCHG Gb,Eb */
14797 delta = dis_cmpxchg_G_E ( sorb, pfx_lock, 1, delta );
14798 break;
14799 case 0xB1: /* CMPXCHG Gv,Ev */
14800 delta = dis_cmpxchg_G_E ( sorb, pfx_lock, sz, delta );
14801 break;
14802
14803 case 0xC7: { /* CMPXCHG8B Gv (0F C7 /1) */
14804 IRTemp expdHi = newTemp(Ity_I32);
14805 IRTemp expdLo = newTemp(Ity_I32);
14806 IRTemp dataHi = newTemp(Ity_I32);
14807 IRTemp dataLo = newTemp(Ity_I32);
14808 IRTemp oldHi = newTemp(Ity_I32);
14809 IRTemp oldLo = newTemp(Ity_I32);
14810 IRTemp flags_old = newTemp(Ity_I32);
14811 IRTemp flags_new = newTemp(Ity_I32);
14812 IRTemp success = newTemp(Ity_I1);
14813
14814 /* Translate this using a DCAS, even if there is no LOCK
14815 prefix. Life is too short to bother with generating two
14816 different translations for the with/without-LOCK-prefix
14817 cases. */
14818 *expect_CAS = True;
14819
14820 /* Decode, and generate address. */
14821 if (sz != 4) goto decode_failure;
14822 modrm = getIByte(delta);
14823 if (epartIsReg(modrm)) goto decode_failure;
14824 if (gregOfRM(modrm) != 1) goto decode_failure;
14825 addr = disAMode ( &alen, sorb, delta, dis_buf );
14826 delta += alen;
14827
14828 /* Get the expected and new values. */
14829 assign( expdHi, getIReg(4,R_EDX) );
14830 assign( expdLo, getIReg(4,R_EAX) );
14831 assign( dataHi, getIReg(4,R_ECX) );
14832 assign( dataLo, getIReg(4,R_EBX) );
14833
14834 /* Do the DCAS */
14835 stmt( IRStmt_CAS(
14836 mkIRCAS( oldHi, oldLo,
14837 Iend_LE, mkexpr(addr),
14838 mkexpr(expdHi), mkexpr(expdLo),
14839 mkexpr(dataHi), mkexpr(dataLo)
14840 )));
14841
14842 /* success when oldHi:oldLo == expdHi:expdLo */
14843 assign( success,
14844 binop(Iop_CasCmpEQ32,
14845 binop(Iop_Or32,
14846 binop(Iop_Xor32, mkexpr(oldHi), mkexpr(expdHi)),
14847 binop(Iop_Xor32, mkexpr(oldLo), mkexpr(expdLo))
14848 ),
14849 mkU32(0)
14850 ));
14851
14852 /* If the DCAS is successful, that is to say oldHi:oldLo ==
14853 expdHi:expdLo, then put expdHi:expdLo back in EDX:EAX,
14854 which is where they came from originally. Both the actual
14855 contents of these two regs, and any shadow values, are
14856 unchanged. If the DCAS fails then we're putting into
14857 EDX:EAX the value seen in memory. */
14858 putIReg(4, R_EDX,
14859 IRExpr_ITE( mkexpr(success),
14860 mkexpr(expdHi), mkexpr(oldHi)
14861 ));
14862 putIReg(4, R_EAX,
14863 IRExpr_ITE( mkexpr(success),
14864 mkexpr(expdLo), mkexpr(oldLo)
14865 ));
14866
14867 /* Copy the success bit into the Z flag and leave the others
14868 unchanged */
14869 assign( flags_old, widenUto32(mk_x86g_calculate_eflags_all()));
14870 assign(
14871 flags_new,
14872 binop(Iop_Or32,
14873 binop(Iop_And32, mkexpr(flags_old),
14874 mkU32(~X86G_CC_MASK_Z)),
14875 binop(Iop_Shl32,
14876 binop(Iop_And32,
14877 unop(Iop_1Uto32, mkexpr(success)), mkU32(1)),
14878 mkU8(X86G_CC_SHIFT_Z)) ));
14879
14880 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
14881 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(flags_new) ));
14882 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
14883 /* Set NDEP even though it isn't used. This makes
14884 redundant-PUT elimination of previous stores to this field
14885 work better. */
14886 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
14887
14888 /* Sheesh. Aren't you glad it was me and not you that had to
14889 write and validate all this grunge? */
14890
14891 DIP("cmpxchg8b %s\n", dis_buf);
14892 break;
14893 }
14894
14895 /* =-=-=-=-=-=-=-=-=- CPUID -=-=-=-=-=-=-=-=-=-=-= */
14896
14897 case 0xA2: { /* CPUID */
14898 /* Uses dirty helper:
14899 void dirtyhelper_CPUID_sse[012] ( VexGuestX86State* )
14900 declared to mod eax, wr ebx, ecx, edx
14901 */
14902 IRDirty* d = NULL;
14903 void* fAddr = NULL;
14904 const HChar* fName = NULL;
14905 if (archinfo->hwcaps & VEX_HWCAPS_X86_SSE3) {
14906 fName = "x86g_dirtyhelper_CPUID_sse3";
14907 fAddr = &x86g_dirtyhelper_CPUID_sse3;
14908 }
14909 else
14910 if (archinfo->hwcaps & VEX_HWCAPS_X86_SSE2) {
14911 fName = "x86g_dirtyhelper_CPUID_sse2";
14912 fAddr = &x86g_dirtyhelper_CPUID_sse2;
14913 }
14914 else
14915 if (archinfo->hwcaps & VEX_HWCAPS_X86_SSE1) {
14916 fName = "x86g_dirtyhelper_CPUID_sse1";
14917 fAddr = &x86g_dirtyhelper_CPUID_sse1;
14918 }
14919 else
14920 if (archinfo->hwcaps & VEX_HWCAPS_X86_MMXEXT) {
14921 fName = "x86g_dirtyhelper_CPUID_mmxext";
14922 fAddr = &x86g_dirtyhelper_CPUID_mmxext;
14923 }
14924 else
14925 if (archinfo->hwcaps == 0/*no SSE*/) {
14926 fName = "x86g_dirtyhelper_CPUID_sse0";
14927 fAddr = &x86g_dirtyhelper_CPUID_sse0;
14928 } else
14929 vpanic("disInstr(x86)(cpuid)");
14930
14931 vassert(fName); vassert(fAddr);
14932 d = unsafeIRDirty_0_N ( 0/*regparms*/,
14933 fName, fAddr, mkIRExprVec_1(IRExpr_GSPTR()) );
14934 /* declare guest state effects */
14935 d->nFxState = 4;
14936 vex_bzero(&d->fxState, sizeof(d->fxState));
14937 d->fxState[0].fx = Ifx_Modify;
14938 d->fxState[0].offset = OFFB_EAX;
14939 d->fxState[0].size = 4;
14940 d->fxState[1].fx = Ifx_Write;
14941 d->fxState[1].offset = OFFB_EBX;
14942 d->fxState[1].size = 4;
14943 d->fxState[2].fx = Ifx_Modify;
14944 d->fxState[2].offset = OFFB_ECX;
14945 d->fxState[2].size = 4;
14946 d->fxState[3].fx = Ifx_Write;
14947 d->fxState[3].offset = OFFB_EDX;
14948 d->fxState[3].size = 4;
14949 /* execute the dirty call, side-effecting guest state */
14950 stmt( IRStmt_Dirty(d) );
14951 /* CPUID is a serialising insn. So, just in case someone is
14952 using it as a memory fence ... */
14953 stmt( IRStmt_MBE(Imbe_Fence) );
14954 DIP("cpuid\n");
14955 break;
14956 }
14957
14958 //-- if (!VG_(cpu_has_feature)(VG_X86_FEAT_CPUID))
14959 //-- goto decode_failure;
14960 //--
14961 //-- t1 = newTemp(cb);
14962 //-- t2 = newTemp(cb);
14963 //-- t3 = newTemp(cb);
14964 //-- t4 = newTemp(cb);
14965 //-- uInstr0(cb, CALLM_S, 0);
14966 //--
14967 //-- uInstr2(cb, GET, 4, ArchReg, R_EAX, TempReg, t1);
14968 //-- uInstr1(cb, PUSH, 4, TempReg, t1);
14969 //--
14970 //-- uInstr2(cb, MOV, 4, Literal, 0, TempReg, t2);
14971 //-- uLiteral(cb, 0);
14972 //-- uInstr1(cb, PUSH, 4, TempReg, t2);
14973 //--
14974 //-- uInstr2(cb, MOV, 4, Literal, 0, TempReg, t3);
14975 //-- uLiteral(cb, 0);
14976 //-- uInstr1(cb, PUSH, 4, TempReg, t3);
14977 //--
14978 //-- uInstr2(cb, MOV, 4, Literal, 0, TempReg, t4);
14979 //-- uLiteral(cb, 0);
14980 //-- uInstr1(cb, PUSH, 4, TempReg, t4);
14981 //--
14982 //-- uInstr1(cb, CALLM, 0, Lit16, VGOFF_(helper_CPUID));
14983 //-- uFlagsRWU(cb, FlagsEmpty, FlagsEmpty, FlagsEmpty);
14984 //--
14985 //-- uInstr1(cb, POP, 4, TempReg, t4);
14986 //-- uInstr2(cb, PUT, 4, TempReg, t4, ArchReg, R_EDX);
14987 //--
14988 //-- uInstr1(cb, POP, 4, TempReg, t3);
14989 //-- uInstr2(cb, PUT, 4, TempReg, t3, ArchReg, R_ECX);
14990 //--
14991 //-- uInstr1(cb, POP, 4, TempReg, t2);
14992 //-- uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_EBX);
14993 //--
14994 //-- uInstr1(cb, POP, 4, TempReg, t1);
14995 //-- uInstr2(cb, PUT, 4, TempReg, t1, ArchReg, R_EAX);
14996 //--
14997 //-- uInstr0(cb, CALLM_E, 0);
14998 //-- DIP("cpuid\n");
14999 //-- break;
15000 //--
15001 /* =-=-=-=-=-=-=-=-=- MOVZX, MOVSX =-=-=-=-=-=-=-= */
15002
15003 case 0xB6: /* MOVZXb Eb,Gv */
15004 if (sz != 2 && sz != 4)
15005 goto decode_failure;
15006 delta = dis_movx_E_G ( sorb, delta, 1, sz, False );
15007 break;
15008
15009 case 0xB7: /* MOVZXw Ew,Gv */
15010 if (sz != 4)
15011 goto decode_failure;
15012 delta = dis_movx_E_G ( sorb, delta, 2, 4, False );
15013 break;
15014
15015 case 0xBE: /* MOVSXb Eb,Gv */
15016 if (sz != 2 && sz != 4)
15017 goto decode_failure;
15018 delta = dis_movx_E_G ( sorb, delta, 1, sz, True );
15019 break;
15020
15021 case 0xBF: /* MOVSXw Ew,Gv */
15022 if (sz != 4 && /* accept movsww, sigh, see #250799 */sz != 2)
15023 goto decode_failure;
15024 delta = dis_movx_E_G ( sorb, delta, 2, sz, True );
15025 break;
15026
15027 //-- /* =-=-=-=-=-=-=-=-=-=-= MOVNTI -=-=-=-=-=-=-=-=-= */
15028 //--
15029 //-- case 0xC3: /* MOVNTI Gv,Ev */
15030 //-- vg_assert(sz == 4);
15031 //-- modrm = getUChar(eip);
15032 //-- vg_assert(!epartIsReg(modrm));
15033 //-- t1 = newTemp(cb);
15034 //-- uInstr2(cb, GET, 4, ArchReg, gregOfRM(modrm), TempReg, t1);
15035 //-- pair = disAMode ( cb, sorb, eip, dis_buf );
15036 //-- t2 = LOW24(pair);
15037 //-- eip += HI8(pair);
15038 //-- uInstr2(cb, STORE, 4, TempReg, t1, TempReg, t2);
15039 //-- DIP("movnti %s,%s\n", nameIReg(4,gregOfRM(modrm)), dis_buf);
15040 //-- break;
15041
15042 /* =-=-=-=-=-=-=-=-=- MUL/IMUL =-=-=-=-=-=-=-=-=-= */
15043
15044 case 0xAF: /* IMUL Ev, Gv */
15045 delta = dis_mul_E_G ( sorb, sz, delta );
15046 break;
15047
15048 /* =-=-=-=-=-=-=-=-=- NOPs =-=-=-=-=-=-=-=-=-=-=-= */
15049
15050 case 0x1F:
15051 modrm = getUChar(delta);
15052 if (epartIsReg(modrm)) goto decode_failure;
15053 addr = disAMode ( &alen, sorb, delta, dis_buf );
15054 delta += alen;
15055 DIP("nop%c %s\n", nameISize(sz), dis_buf);
15056 break;
15057
15058 /* =-=-=-=-=-=-=-=-=- Jcond d32 -=-=-=-=-=-=-=-=-= */
15059 case 0x80:
15060 case 0x81:
15061 case 0x82: /* JBb/JNAEb (jump below) */
15062 case 0x83: /* JNBb/JAEb (jump not below) */
15063 case 0x84: /* JZb/JEb (jump zero) */
15064 case 0x85: /* JNZb/JNEb (jump not zero) */
15065 case 0x86: /* JBEb/JNAb (jump below or equal) */
15066 case 0x87: /* JNBEb/JAb (jump not below or equal) */
15067 case 0x88: /* JSb (jump negative) */
15068 case 0x89: /* JSb (jump not negative) */
15069 case 0x8A: /* JP (jump parity even) */
15070 case 0x8B: /* JNP/JPO (jump parity odd) */
15071 case 0x8C: /* JLb/JNGEb (jump less) */
15072 case 0x8D: /* JGEb/JNLb (jump greater or equal) */
15073 case 0x8E: /* JLEb/JNGb (jump less or equal) */
15074 case 0x8F: /* JGb/JNLEb (jump greater) */
15075 { Int jmpDelta;
15076 const HChar* comment = "";
15077 jmpDelta = (Int)getUDisp32(delta);
15078 d32 = (((Addr32)guest_EIP_bbstart)+delta+4) + jmpDelta;
15079 delta += 4;
15080 if (resteerCisOk
15081 && vex_control.guest_chase_cond
15082 && (Addr32)d32 != (Addr32)guest_EIP_bbstart
15083 && jmpDelta < 0
15084 && resteerOkFn( callback_opaque, (Addr32)d32) ) {
15085 /* Speculation: assume this backward branch is taken. So
15086 we need to emit a side-exit to the insn following this
15087 one, on the negation of the condition, and continue at
15088 the branch target address (d32). If we wind up back at
15089 the first instruction of the trace, just stop; it's
15090 better to let the IR loop unroller handle that case.*/
15091 stmt( IRStmt_Exit(
15092 mk_x86g_calculate_condition((X86Condcode)
15093 (1 ^ (opc - 0x80))),
15094 Ijk_Boring,
15095 IRConst_U32(guest_EIP_bbstart+delta),
15096 OFFB_EIP ) );
15097 dres.whatNext = Dis_ResteerC;
15098 dres.continueAt = (Addr32)d32;
15099 comment = "(assumed taken)";
15100 }
15101 else
15102 if (resteerCisOk
15103 && vex_control.guest_chase_cond
15104 && (Addr32)d32 != (Addr32)guest_EIP_bbstart
15105 && jmpDelta >= 0
15106 && resteerOkFn( callback_opaque,
15107 (Addr32)(guest_EIP_bbstart+delta)) ) {
15108 /* Speculation: assume this forward branch is not taken.
15109 So we need to emit a side-exit to d32 (the dest) and
15110 continue disassembling at the insn immediately
15111 following this one. */
15112 stmt( IRStmt_Exit(
15113 mk_x86g_calculate_condition((X86Condcode)(opc - 0x80)),
15114 Ijk_Boring,
15115 IRConst_U32(d32),
15116 OFFB_EIP ) );
15117 dres.whatNext = Dis_ResteerC;
15118 dres.continueAt = guest_EIP_bbstart + delta;
15119 comment = "(assumed not taken)";
15120 }
15121 else {
15122 /* Conservative default translation - end the block at
15123 this point. */
15124 jcc_01( &dres, (X86Condcode)(opc - 0x80),
15125 (Addr32)(guest_EIP_bbstart+delta), d32);
15126 vassert(dres.whatNext == Dis_StopHere);
15127 }
15128 DIP("j%s-32 0x%x %s\n", name_X86Condcode(opc - 0x80), d32, comment);
15129 break;
15130 }
15131
15132 /* =-=-=-=-=-=-=-=-=- RDTSC -=-=-=-=-=-=-=-=-=-=-= */
15133 case 0x31: { /* RDTSC */
15134 IRTemp val = newTemp(Ity_I64);
15135 IRExpr** args = mkIRExprVec_0();
15136 IRDirty* d = unsafeIRDirty_1_N (
15137 val,
15138 0/*regparms*/,
15139 "x86g_dirtyhelper_RDTSC",
15140 &x86g_dirtyhelper_RDTSC,
15141 args
15142 );
15143 /* execute the dirty call, dumping the result in val. */
15144 stmt( IRStmt_Dirty(d) );
15145 putIReg(4, R_EDX, unop(Iop_64HIto32, mkexpr(val)));
15146 putIReg(4, R_EAX, unop(Iop_64to32, mkexpr(val)));
15147 DIP("rdtsc\n");
15148 break;
15149 }
15150
15151 /* =-=-=-=-=-=-=-=-=- PUSH/POP Sreg =-=-=-=-=-=-=-=-=-= */
15152
15153 case 0xA1: /* POP %FS */
15154 dis_pop_segreg( R_FS, sz ); break;
15155 case 0xA9: /* POP %GS */
15156 dis_pop_segreg( R_GS, sz ); break;
15157
15158 case 0xA0: /* PUSH %FS */
15159 dis_push_segreg( R_FS, sz ); break;
15160 case 0xA8: /* PUSH %GS */
15161 dis_push_segreg( R_GS, sz ); break;
15162
15163 /* =-=-=-=-=-=-=-=-=- SETcc Eb =-=-=-=-=-=-=-=-=-= */
15164 case 0x90:
15165 case 0x91:
15166 case 0x92: /* set-Bb/set-NAEb (jump below) */
15167 case 0x93: /* set-NBb/set-AEb (jump not below) */
15168 case 0x94: /* set-Zb/set-Eb (jump zero) */
15169 case 0x95: /* set-NZb/set-NEb (jump not zero) */
15170 case 0x96: /* set-BEb/set-NAb (jump below or equal) */
15171 case 0x97: /* set-NBEb/set-Ab (jump not below or equal) */
15172 case 0x98: /* set-Sb (jump negative) */
15173 case 0x99: /* set-Sb (jump not negative) */
15174 case 0x9A: /* set-P (jump parity even) */
15175 case 0x9B: /* set-NP (jump parity odd) */
15176 case 0x9C: /* set-Lb/set-NGEb (jump less) */
15177 case 0x9D: /* set-GEb/set-NLb (jump greater or equal) */
15178 case 0x9E: /* set-LEb/set-NGb (jump less or equal) */
15179 case 0x9F: /* set-Gb/set-NLEb (jump greater) */
15180 t1 = newTemp(Ity_I8);
15181 assign( t1, unop(Iop_1Uto8,mk_x86g_calculate_condition(opc-0x90)) );
15182 modrm = getIByte(delta);
15183 if (epartIsReg(modrm)) {
15184 delta++;
15185 putIReg(1, eregOfRM(modrm), mkexpr(t1));
15186 DIP("set%s %s\n", name_X86Condcode(opc-0x90),
15187 nameIReg(1,eregOfRM(modrm)));
15188 } else {
15189 addr = disAMode ( &alen, sorb, delta, dis_buf );
15190 delta += alen;
15191 storeLE( mkexpr(addr), mkexpr(t1) );
15192 DIP("set%s %s\n", name_X86Condcode(opc-0x90), dis_buf);
15193 }
15194 break;
15195
15196 /* =-=-=-=-=-=-=-=-=- SHLD/SHRD -=-=-=-=-=-=-=-=-= */
15197
15198 case 0xA4: /* SHLDv imm8,Gv,Ev */
15199 modrm = getIByte(delta);
15200 d32 = delta + lengthAMode(delta);
15201 vex_sprintf(dis_buf, "$%d", getIByte(d32));
15202 delta = dis_SHLRD_Gv_Ev (
15203 sorb, delta, modrm, sz,
15204 mkU8(getIByte(d32)), True, /* literal */
15205 dis_buf, True );
15206 break;
15207 case 0xA5: /* SHLDv %cl,Gv,Ev */
15208 modrm = getIByte(delta);
15209 delta = dis_SHLRD_Gv_Ev (
15210 sorb, delta, modrm, sz,
15211 getIReg(1,R_ECX), False, /* not literal */
15212 "%cl", True );
15213 break;
15214
15215 case 0xAC: /* SHRDv imm8,Gv,Ev */
15216 modrm = getIByte(delta);
15217 d32 = delta + lengthAMode(delta);
15218 vex_sprintf(dis_buf, "$%d", getIByte(d32));
15219 delta = dis_SHLRD_Gv_Ev (
15220 sorb, delta, modrm, sz,
15221 mkU8(getIByte(d32)), True, /* literal */
15222 dis_buf, False );
15223 break;
15224 case 0xAD: /* SHRDv %cl,Gv,Ev */
15225 modrm = getIByte(delta);
15226 delta = dis_SHLRD_Gv_Ev (
15227 sorb, delta, modrm, sz,
15228 getIReg(1,R_ECX), False, /* not literal */
15229 "%cl", False );
15230 break;
15231
15232 /* =-=-=-=-=-=-=-=-=- SYSENTER -=-=-=-=-=-=-=-=-=-= */
15233
15234 case 0x34:
15235 /* Simple implementation needing a long explaination.
15236
15237 sysenter is a kind of syscall entry. The key thing here
15238 is that the return address is not known -- that is
15239 something that is beyond Vex's knowledge. So this IR
15240 forces a return to the scheduler, which can do what it
15241 likes to simulate the systenter, but it MUST set this
15242 thread's guest_EIP field with the continuation address
15243 before resuming execution. If that doesn't happen, the
15244 thread will jump to address zero, which is probably
15245 fatal.
15246 */
15247
15248 /* Note where we are, so we can back up the guest to this
15249 point if the syscall needs to be restarted. */
15250 stmt( IRStmt_Put( OFFB_IP_AT_SYSCALL,
15251 mkU32(guest_EIP_curr_instr) ) );
15252 jmp_lit(&dres, Ijk_Sys_sysenter, 0/*bogus next EIP value*/);
15253 vassert(dres.whatNext == Dis_StopHere);
15254 DIP("sysenter");
15255 break;
15256
15257 /* =-=-=-=-=-=-=-=-=- XADD -=-=-=-=-=-=-=-=-=-= */
15258
15259 case 0xC0: { /* XADD Gb,Eb */
15260 Bool decodeOK;
15261 delta = dis_xadd_G_E ( sorb, pfx_lock, 1, delta, &decodeOK );
15262 if (!decodeOK) goto decode_failure;
15263 break;
15264 }
15265 case 0xC1: { /* XADD Gv,Ev */
15266 Bool decodeOK;
15267 delta = dis_xadd_G_E ( sorb, pfx_lock, sz, delta, &decodeOK );
15268 if (!decodeOK) goto decode_failure;
15269 break;
15270 }
15271
15272 /* =-=-=-=-=-=-=-=-=- MMXery =-=-=-=-=-=-=-=-=-=-= */
15273
15274 case 0x71:
15275 case 0x72:
15276 case 0x73: /* PSLLgg/PSRAgg/PSRLgg mmxreg by imm8 */
15277
15278 case 0x6E: /* MOVD (src)ireg-or-mem, (dst)mmxreg */
15279 case 0x7E: /* MOVD (src)mmxreg, (dst)ireg-or-mem */
15280 case 0x7F: /* MOVQ (src)mmxreg, (dst)mmxreg-or-mem */
15281 case 0x6F: /* MOVQ (src)mmxreg-or-mem, (dst)mmxreg */
15282
15283 case 0xFC:
15284 case 0xFD:
15285 case 0xFE: /* PADDgg (src)mmxreg-or-mem, (dst)mmxreg */
15286
15287 case 0xEC:
15288 case 0xED: /* PADDSgg (src)mmxreg-or-mem, (dst)mmxreg */
15289
15290 case 0xDC:
15291 case 0xDD: /* PADDUSgg (src)mmxreg-or-mem, (dst)mmxreg */
15292
15293 case 0xF8:
15294 case 0xF9:
15295 case 0xFA: /* PSUBgg (src)mmxreg-or-mem, (dst)mmxreg */
15296
15297 case 0xE8:
15298 case 0xE9: /* PSUBSgg (src)mmxreg-or-mem, (dst)mmxreg */
15299
15300 case 0xD8:
15301 case 0xD9: /* PSUBUSgg (src)mmxreg-or-mem, (dst)mmxreg */
15302
15303 case 0xE5: /* PMULHW (src)mmxreg-or-mem, (dst)mmxreg */
15304 case 0xD5: /* PMULLW (src)mmxreg-or-mem, (dst)mmxreg */
15305
15306 case 0xF5: /* PMADDWD (src)mmxreg-or-mem, (dst)mmxreg */
15307
15308 case 0x74:
15309 case 0x75:
15310 case 0x76: /* PCMPEQgg (src)mmxreg-or-mem, (dst)mmxreg */
15311
15312 case 0x64:
15313 case 0x65:
15314 case 0x66: /* PCMPGTgg (src)mmxreg-or-mem, (dst)mmxreg */
15315
15316 case 0x6B: /* PACKSSDW (src)mmxreg-or-mem, (dst)mmxreg */
15317 case 0x63: /* PACKSSWB (src)mmxreg-or-mem, (dst)mmxreg */
15318 case 0x67: /* PACKUSWB (src)mmxreg-or-mem, (dst)mmxreg */
15319
15320 case 0x68:
15321 case 0x69:
15322 case 0x6A: /* PUNPCKHgg (src)mmxreg-or-mem, (dst)mmxreg */
15323
15324 case 0x60:
15325 case 0x61:
15326 case 0x62: /* PUNPCKLgg (src)mmxreg-or-mem, (dst)mmxreg */
15327
15328 case 0xDB: /* PAND (src)mmxreg-or-mem, (dst)mmxreg */
15329 case 0xDF: /* PANDN (src)mmxreg-or-mem, (dst)mmxreg */
15330 case 0xEB: /* POR (src)mmxreg-or-mem, (dst)mmxreg */
15331 case 0xEF: /* PXOR (src)mmxreg-or-mem, (dst)mmxreg */
15332
15333 case 0xF1: /* PSLLgg (src)mmxreg-or-mem, (dst)mmxreg */
15334 case 0xF2:
15335 case 0xF3:
15336
15337 case 0xD1: /* PSRLgg (src)mmxreg-or-mem, (dst)mmxreg */
15338 case 0xD2:
15339 case 0xD3:
15340
15341 case 0xE1: /* PSRAgg (src)mmxreg-or-mem, (dst)mmxreg */
15342 case 0xE2:
15343 {
15344 Int delta0 = delta-1;
15345 Bool decode_OK = False;
15346
15347 /* If sz==2 this is SSE, and we assume sse idec has
15348 already spotted those cases by now. */
15349 if (sz != 4)
15350 goto decode_failure;
15351
15352 delta = dis_MMX ( &decode_OK, sorb, sz, delta-1 );
15353 if (!decode_OK) {
15354 delta = delta0;
15355 goto decode_failure;
15356 }
15357 break;
15358 }
15359
15360 case 0x0E: /* FEMMS */
15361 case 0x77: /* EMMS */
15362 if (sz != 4)
15363 goto decode_failure;
15364 do_EMMS_preamble();
15365 DIP("{f}emms\n");
15366 break;
15367
15368 /* =-=-=-=-=-=-=-=-=- SGDT and SIDT =-=-=-=-=-=-=-=-=-=-= */
15369 case 0x01: /* 0F 01 /0 -- SGDT */
15370 /* 0F 01 /1 -- SIDT */
15371 {
15372 /* This is really revolting, but ... since each processor
15373 (core) only has one IDT and one GDT, just let the guest
15374 see it (pass-through semantics). I can't see any way to
15375 construct a faked-up value, so don't bother to try. */
15376 modrm = getUChar(delta);
15377 if (epartIsReg(modrm)) goto decode_failure;
15378 if (gregOfRM(modrm) != 0 && gregOfRM(modrm) != 1)
15379 goto decode_failure;
15380 addr = disAMode ( &alen, sorb, delta, dis_buf );
15381 delta += alen;
15382 switch (gregOfRM(modrm)) {
15383 case 0: DIP("sgdt %s\n", dis_buf); break;
15384 case 1: DIP("sidt %s\n", dis_buf); break;
15385 default: vassert(0); /*NOTREACHED*/
15386 }
15387
15388 IRDirty* d = unsafeIRDirty_0_N (
15389 0/*regparms*/,
15390 "x86g_dirtyhelper_SxDT",
15391 &x86g_dirtyhelper_SxDT,
15392 mkIRExprVec_2( mkexpr(addr),
15393 mkU32(gregOfRM(modrm)) )
15394 );
15395 /* declare we're writing memory */
15396 d->mFx = Ifx_Write;
15397 d->mAddr = mkexpr(addr);
15398 d->mSize = 6;
15399 stmt( IRStmt_Dirty(d) );
15400 break;
15401 }
15402
15403 case 0x05: /* AMD's syscall */
15404 stmt( IRStmt_Put( OFFB_IP_AT_SYSCALL,
15405 mkU32(guest_EIP_curr_instr) ) );
15406 jmp_lit(&dres, Ijk_Sys_syscall, ((Addr32)guest_EIP_bbstart)+delta);
15407 vassert(dres.whatNext == Dis_StopHere);
15408 DIP("syscall\n");
15409 break;
15410
15411 /* =-=-=-=-=-=-=-=-=-=- UD2 =-=-=-=-=-=-=-=-=-=-=-= */
15412
15413 case 0x0B: /* UD2 */
15414 stmt( IRStmt_Put( OFFB_EIP, mkU32(guest_EIP_curr_instr) ) );
15415 jmp_lit(&dres, Ijk_NoDecode, guest_EIP_curr_instr);
15416 vassert(dres.whatNext == Dis_StopHere);
15417 DIP("ud2\n");
15418 break;
15419
15420 /* =-=-=-=-=-=-=-=-=- unimp2 =-=-=-=-=-=-=-=-=-=-= */
15421
15422 default:
15423 goto decode_failure;
15424 } /* switch (opc) for the 2-byte opcodes */
15425 goto decode_success;
15426 } /* case 0x0F: of primary opcode */
15427
15428 /* ------------------------ ??? ------------------------ */
15429
15430 default:
15431 decode_failure:
15432 /* All decode failures end up here. */
15433 if (sigill_diag) {
15434 vex_printf("vex x86->IR: unhandled instruction bytes: "
15435 "0x%x 0x%x 0x%x 0x%x\n",
15436 getIByte(delta_start+0),
15437 getIByte(delta_start+1),
15438 getIByte(delta_start+2),
15439 getIByte(delta_start+3));
15440 }
15441
15442 /* Tell the dispatcher that this insn cannot be decoded, and so has
15443 not been executed, and (is currently) the next to be executed.
15444 EIP should be up-to-date since it made so at the start of each
15445 insn, but nevertheless be paranoid and update it again right
15446 now. */
15447 stmt( IRStmt_Put( OFFB_EIP, mkU32(guest_EIP_curr_instr) ) );
15448 jmp_lit(&dres, Ijk_NoDecode, guest_EIP_curr_instr);
15449 vassert(dres.whatNext == Dis_StopHere);
15450 dres.len = 0;
15451 /* We also need to say that a CAS is not expected now, regardless
15452 of what it might have been set to at the start of the function,
15453 since the IR that we've emitted just above (to synthesis a
15454 SIGILL) does not involve any CAS, and presumably no other IR has
15455 been emitted for this (non-decoded) insn. */
15456 *expect_CAS = False;
15457 return dres;
15458
15459 } /* switch (opc) for the main (primary) opcode switch. */
15460
15461 decode_success:
15462 /* All decode successes end up here. */
15463 switch (dres.whatNext) {
15464 case Dis_Continue:
15465 stmt( IRStmt_Put( OFFB_EIP, mkU32(guest_EIP_bbstart + delta) ) );
15466 break;
15467 case Dis_ResteerU:
15468 case Dis_ResteerC:
15469 stmt( IRStmt_Put( OFFB_EIP, mkU32(dres.continueAt) ) );
15470 break;
15471 case Dis_StopHere:
15472 break;
15473 default:
15474 vassert(0);
15475 }
15476
15477 DIP("\n");
15478 dres.len = delta - delta_start;
15479 return dres;
15480 }
15481
15482 #undef DIP
15483 #undef DIS
15484
15485
15486 /*------------------------------------------------------------*/
15487 /*--- Top-level fn ---*/
15488 /*------------------------------------------------------------*/
15489
15490 /* Disassemble a single instruction into IR. The instruction
15491 is located in host memory at &guest_code[delta]. */
15492
disInstr_X86(IRSB * irsb_IN,Bool (* resteerOkFn)(void *,Addr),Bool resteerCisOk,void * callback_opaque,const UChar * guest_code_IN,Long delta,Addr guest_IP,VexArch guest_arch,const VexArchInfo * archinfo,const VexAbiInfo * abiinfo,VexEndness host_endness_IN,Bool sigill_diag_IN)15493 DisResult disInstr_X86 ( IRSB* irsb_IN,
15494 Bool (*resteerOkFn) ( void*, Addr ),
15495 Bool resteerCisOk,
15496 void* callback_opaque,
15497 const UChar* guest_code_IN,
15498 Long delta,
15499 Addr guest_IP,
15500 VexArch guest_arch,
15501 const VexArchInfo* archinfo,
15502 const VexAbiInfo* abiinfo,
15503 VexEndness host_endness_IN,
15504 Bool sigill_diag_IN )
15505 {
15506 Int i, x1, x2;
15507 Bool expect_CAS, has_CAS;
15508 DisResult dres;
15509
15510 /* Set globals (see top of this file) */
15511 vassert(guest_arch == VexArchX86);
15512 guest_code = guest_code_IN;
15513 irsb = irsb_IN;
15514 host_endness = host_endness_IN;
15515 guest_EIP_curr_instr = (Addr32)guest_IP;
15516 guest_EIP_bbstart = (Addr32)toUInt(guest_IP - delta);
15517
15518 x1 = irsb_IN->stmts_used;
15519 expect_CAS = False;
15520 dres = disInstr_X86_WRK ( &expect_CAS, resteerOkFn,
15521 resteerCisOk,
15522 callback_opaque,
15523 delta, archinfo, abiinfo, sigill_diag_IN );
15524 x2 = irsb_IN->stmts_used;
15525 vassert(x2 >= x1);
15526
15527 /* See comment at the top of disInstr_X86_WRK for meaning of
15528 expect_CAS. Here, we (sanity-)check for the presence/absence of
15529 IRCAS as directed by the returned expect_CAS value. */
15530 has_CAS = False;
15531 for (i = x1; i < x2; i++) {
15532 if (irsb_IN->stmts[i]->tag == Ist_CAS)
15533 has_CAS = True;
15534 }
15535
15536 if (expect_CAS != has_CAS) {
15537 /* inconsistency detected. re-disassemble the instruction so as
15538 to generate a useful error message; then assert. */
15539 vex_traceflags |= VEX_TRACE_FE;
15540 dres = disInstr_X86_WRK ( &expect_CAS, resteerOkFn,
15541 resteerCisOk,
15542 callback_opaque,
15543 delta, archinfo, abiinfo, sigill_diag_IN );
15544 for (i = x1; i < x2; i++) {
15545 vex_printf("\t\t");
15546 ppIRStmt(irsb_IN->stmts[i]);
15547 vex_printf("\n");
15548 }
15549 /* Failure of this assertion is serious and denotes a bug in
15550 disInstr. */
15551 vpanic("disInstr_X86: inconsistency in LOCK prefix handling");
15552 }
15553
15554 return dres;
15555 }
15556
15557
15558 /*--------------------------------------------------------------------*/
15559 /*--- end guest_x86_toIR.c ---*/
15560 /*--------------------------------------------------------------------*/
15561