1
2 /*---------------------------------------------------------------*/
3 /*--- begin host_x86_isel.c ---*/
4 /*---------------------------------------------------------------*/
5
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
9
10 Copyright (C) 2004-2017 OpenWorks LLP
11 info@open-works.net
12
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 02110-1301, USA.
27
28 The GNU General Public License is contained in the file COPYING.
29
30 Neither the names of the U.S. Department of Energy nor the
31 University of California nor the names of its contributors may be
32 used to endorse or promote products derived from this software
33 without prior written permission.
34 */
35
36 #include "libvex_basictypes.h"
37 #include "libvex_ir.h"
38 #include "libvex.h"
39
40 #include "ir_match.h"
41 #include "main_util.h"
42 #include "main_globals.h"
43 #include "host_generic_regs.h"
44 #include "host_generic_simd64.h"
45 #include "host_generic_simd128.h"
46 #include "host_x86_defs.h"
47
48 /* TODO 21 Apr 2005:
49
50 -- (Really an assembler issue) don't emit CMov32 as a cmov
51 insn, since that's expensive on P4 and conditional branch
52 is cheaper if (as we expect) the condition is highly predictable
53
54 -- preserve xmm registers across function calls (by declaring them
55 as trashed by call insns)
56
57 -- preserve x87 ST stack discipline across function calls. Sigh.
58
59 -- Check doHelperCall: if a call is conditional, we cannot safely
60 compute any regparm args directly to registers. Hence, the
61 fast-regparm marshalling should be restricted to unconditional
62 calls only.
63 */
64
65 /*---------------------------------------------------------*/
66 /*--- x87 control word stuff ---*/
67 /*---------------------------------------------------------*/
68
69 /* Vex-generated code expects to run with the FPU set as follows: all
70 exceptions masked, round-to-nearest, precision = 53 bits. This
71 corresponds to a FPU control word value of 0x027F.
72
73 Similarly the SSE control word (%mxcsr) should be 0x1F80.
74
75 %fpucw and %mxcsr should have these values on entry to
76 Vex-generated code, and should those values should be
77 unchanged at exit.
78 */
79
80 #define DEFAULT_FPUCW 0x027F
81
82 /* debugging only, do not use */
83 /* define DEFAULT_FPUCW 0x037F */
84
85
86 /*---------------------------------------------------------*/
87 /*--- misc helpers ---*/
88 /*---------------------------------------------------------*/
89
90 /* These are duplicated in guest-x86/toIR.c */
unop(IROp op,IRExpr * a)91 static IRExpr* unop ( IROp op, IRExpr* a )
92 {
93 return IRExpr_Unop(op, a);
94 }
95
binop(IROp op,IRExpr * a1,IRExpr * a2)96 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
97 {
98 return IRExpr_Binop(op, a1, a2);
99 }
100
bind(Int binder)101 static IRExpr* bind ( Int binder )
102 {
103 return IRExpr_Binder(binder);
104 }
105
isZeroU8(IRExpr * e)106 static Bool isZeroU8 ( IRExpr* e )
107 {
108 return e->tag == Iex_Const
109 && e->Iex.Const.con->tag == Ico_U8
110 && e->Iex.Const.con->Ico.U8 == 0;
111 }
112
isZeroU32(IRExpr * e)113 static Bool isZeroU32 ( IRExpr* e )
114 {
115 return e->tag == Iex_Const
116 && e->Iex.Const.con->tag == Ico_U32
117 && e->Iex.Const.con->Ico.U32 == 0;
118 }
119
120 //static Bool isZeroU64 ( IRExpr* e )
121 //{
122 // return e->tag == Iex_Const
123 // && e->Iex.Const.con->tag == Ico_U64
124 // && e->Iex.Const.con->Ico.U64 == 0ULL;
125 //}
126
127
128 /*---------------------------------------------------------*/
129 /*--- ISelEnv ---*/
130 /*---------------------------------------------------------*/
131
132 /* This carries around:
133
134 - A mapping from IRTemp to IRType, giving the type of any IRTemp we
135 might encounter. This is computed before insn selection starts,
136 and does not change.
137
138 - A mapping from IRTemp to HReg. This tells the insn selector
139 which virtual register(s) are associated with each IRTemp
140 temporary. This is computed before insn selection starts, and
141 does not change. We expect this mapping to map precisely the
142 same set of IRTemps as the type mapping does.
143
144 - vregmap holds the primary register for the IRTemp.
145 - vregmapHI is only used for 64-bit integer-typed
146 IRTemps. It holds the identity of a second
147 32-bit virtual HReg, which holds the high half
148 of the value.
149
150 - The code array, that is, the insns selected so far.
151
152 - A counter, for generating new virtual registers.
153
154 - The host subarchitecture we are selecting insns for.
155 This is set at the start and does not change.
156
157 - A Bool for indicating whether we may generate chain-me
158 instructions for control flow transfers, or whether we must use
159 XAssisted.
160
161 - The maximum guest address of any guest insn in this block.
162 Actually, the address of the highest-addressed byte from any insn
163 in this block. Is set at the start and does not change. This is
164 used for detecting jumps which are definitely forward-edges from
165 this block, and therefore can be made (chained) to the fast entry
166 point of the destination, thereby avoiding the destination's
167 event check.
168
169 Note, this is all (well, mostly) host-independent.
170 */
171
172 typedef
173 struct {
174 /* Constant -- are set at the start and do not change. */
175 IRTypeEnv* type_env;
176
177 HReg* vregmap;
178 HReg* vregmapHI;
179 Int n_vregmap;
180
181 UInt hwcaps;
182
183 Bool chainingAllowed;
184 Addr32 max_ga;
185
186 /* These are modified as we go along. */
187 HInstrArray* code;
188 Int vreg_ctr;
189 }
190 ISelEnv;
191
192
lookupIRTemp(ISelEnv * env,IRTemp tmp)193 static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp )
194 {
195 vassert(tmp >= 0);
196 vassert(tmp < env->n_vregmap);
197 return env->vregmap[tmp];
198 }
199
lookupIRTemp64(HReg * vrHI,HReg * vrLO,ISelEnv * env,IRTemp tmp)200 static void lookupIRTemp64 ( HReg* vrHI, HReg* vrLO, ISelEnv* env, IRTemp tmp )
201 {
202 vassert(tmp >= 0);
203 vassert(tmp < env->n_vregmap);
204 vassert(! hregIsInvalid(env->vregmapHI[tmp]));
205 *vrLO = env->vregmap[tmp];
206 *vrHI = env->vregmapHI[tmp];
207 }
208
addInstr(ISelEnv * env,X86Instr * instr)209 static void addInstr ( ISelEnv* env, X86Instr* instr )
210 {
211 addHInstr(env->code, instr);
212 if (vex_traceflags & VEX_TRACE_VCODE) {
213 ppX86Instr(instr, False);
214 vex_printf("\n");
215 }
216 }
217
newVRegI(ISelEnv * env)218 static HReg newVRegI ( ISelEnv* env )
219 {
220 HReg reg = mkHReg(True/*virtual reg*/, HRcInt32, 0/*enc*/, env->vreg_ctr);
221 env->vreg_ctr++;
222 return reg;
223 }
224
newVRegF(ISelEnv * env)225 static HReg newVRegF ( ISelEnv* env )
226 {
227 HReg reg = mkHReg(True/*virtual reg*/, HRcFlt64, 0/*enc*/, env->vreg_ctr);
228 env->vreg_ctr++;
229 return reg;
230 }
231
newVRegV(ISelEnv * env)232 static HReg newVRegV ( ISelEnv* env )
233 {
234 HReg reg = mkHReg(True/*virtual reg*/, HRcVec128, 0/*enc*/, env->vreg_ctr);
235 env->vreg_ctr++;
236 return reg;
237 }
238
239
240 /*---------------------------------------------------------*/
241 /*--- ISEL: Forward declarations ---*/
242 /*---------------------------------------------------------*/
243
244 /* These are organised as iselXXX and iselXXX_wrk pairs. The
245 iselXXX_wrk do the real work, but are not to be called directly.
246 For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
247 checks that all returned registers are virtual. You should not
248 call the _wrk version directly.
249 */
250 static X86RMI* iselIntExpr_RMI_wrk ( ISelEnv* env, const IRExpr* e );
251 static X86RMI* iselIntExpr_RMI ( ISelEnv* env, const IRExpr* e );
252
253 static X86RI* iselIntExpr_RI_wrk ( ISelEnv* env, const IRExpr* e );
254 static X86RI* iselIntExpr_RI ( ISelEnv* env, const IRExpr* e );
255
256 static X86RM* iselIntExpr_RM_wrk ( ISelEnv* env, const IRExpr* e );
257 static X86RM* iselIntExpr_RM ( ISelEnv* env, const IRExpr* e );
258
259 static HReg iselIntExpr_R_wrk ( ISelEnv* env, const IRExpr* e );
260 static HReg iselIntExpr_R ( ISelEnv* env, const IRExpr* e );
261
262 static X86AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, const IRExpr* e );
263 static X86AMode* iselIntExpr_AMode ( ISelEnv* env, const IRExpr* e );
264
265 static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo,
266 ISelEnv* env, const IRExpr* e );
267 static void iselInt64Expr ( HReg* rHi, HReg* rLo,
268 ISelEnv* env, const IRExpr* e );
269
270 static X86CondCode iselCondCode_wrk ( ISelEnv* env, const IRExpr* e );
271 static X86CondCode iselCondCode ( ISelEnv* env, const IRExpr* e );
272
273 static HReg iselDblExpr_wrk ( ISelEnv* env, const IRExpr* e );
274 static HReg iselDblExpr ( ISelEnv* env, const IRExpr* e );
275
276 static HReg iselFltExpr_wrk ( ISelEnv* env, const IRExpr* e );
277 static HReg iselFltExpr ( ISelEnv* env, const IRExpr* e );
278
279 static HReg iselVecExpr_wrk ( ISelEnv* env, const IRExpr* e );
280 static HReg iselVecExpr ( ISelEnv* env, const IRExpr* e );
281
282
283 /*---------------------------------------------------------*/
284 /*--- ISEL: Misc helpers ---*/
285 /*---------------------------------------------------------*/
286
287 /* Make a int reg-reg move. */
288
mk_iMOVsd_RR(HReg src,HReg dst)289 static X86Instr* mk_iMOVsd_RR ( HReg src, HReg dst )
290 {
291 vassert(hregClass(src) == HRcInt32);
292 vassert(hregClass(dst) == HRcInt32);
293 return X86Instr_Alu32R(Xalu_MOV, X86RMI_Reg(src), dst);
294 }
295
296
297 /* Make a vector reg-reg move. */
298
mk_vMOVsd_RR(HReg src,HReg dst)299 static X86Instr* mk_vMOVsd_RR ( HReg src, HReg dst )
300 {
301 vassert(hregClass(src) == HRcVec128);
302 vassert(hregClass(dst) == HRcVec128);
303 return X86Instr_SseReRg(Xsse_MOV, src, dst);
304 }
305
306 /* Advance/retreat %esp by n. */
307
add_to_esp(ISelEnv * env,Int n)308 static void add_to_esp ( ISelEnv* env, Int n )
309 {
310 vassert(n > 0 && n < 256 && (n%4) == 0);
311 addInstr(env,
312 X86Instr_Alu32R(Xalu_ADD, X86RMI_Imm(n), hregX86_ESP()));
313 }
314
sub_from_esp(ISelEnv * env,Int n)315 static void sub_from_esp ( ISelEnv* env, Int n )
316 {
317 vassert(n > 0 && n < 256 && (n%4) == 0);
318 addInstr(env,
319 X86Instr_Alu32R(Xalu_SUB, X86RMI_Imm(n), hregX86_ESP()));
320 }
321
322
323 /* Given an amode, return one which references 4 bytes further
324 along. */
325
advance4(X86AMode * am)326 static X86AMode* advance4 ( X86AMode* am )
327 {
328 X86AMode* am4 = dopyX86AMode(am);
329 switch (am4->tag) {
330 case Xam_IRRS:
331 am4->Xam.IRRS.imm += 4; break;
332 case Xam_IR:
333 am4->Xam.IR.imm += 4; break;
334 default:
335 vpanic("advance4(x86,host)");
336 }
337 return am4;
338 }
339
340
341 /* Push an arg onto the host stack, in preparation for a call to a
342 helper function of some kind. Returns the number of 32-bit words
343 pushed. If we encounter an IRExpr_VECRET() then we expect that
344 r_vecRetAddr will be a valid register, that holds the relevant
345 address.
346 */
pushArg(ISelEnv * env,IRExpr * arg,HReg r_vecRetAddr)347 static Int pushArg ( ISelEnv* env, IRExpr* arg, HReg r_vecRetAddr )
348 {
349 if (UNLIKELY(arg->tag == Iex_VECRET)) {
350 vassert(0); //ATC
351 vassert(!hregIsInvalid(r_vecRetAddr));
352 addInstr(env, X86Instr_Push(X86RMI_Reg(r_vecRetAddr)));
353 return 1;
354 }
355 if (UNLIKELY(arg->tag == Iex_GSPTR)) {
356 addInstr(env, X86Instr_Push(X86RMI_Reg(hregX86_EBP())));
357 return 1;
358 }
359 /* Else it's a "normal" expression. */
360 IRType arg_ty = typeOfIRExpr(env->type_env, arg);
361 if (arg_ty == Ity_I32) {
362 addInstr(env, X86Instr_Push(iselIntExpr_RMI(env, arg)));
363 return 1;
364 } else
365 if (arg_ty == Ity_I64) {
366 HReg rHi, rLo;
367 iselInt64Expr(&rHi, &rLo, env, arg);
368 addInstr(env, X86Instr_Push(X86RMI_Reg(rHi)));
369 addInstr(env, X86Instr_Push(X86RMI_Reg(rLo)));
370 return 2;
371 }
372 ppIRExpr(arg);
373 vpanic("pushArg(x86): can't handle arg of this type");
374 }
375
376
377 /* Complete the call to a helper function, by calling the
378 helper and clearing the args off the stack. */
379
380 static
callHelperAndClearArgs(ISelEnv * env,X86CondCode cc,IRCallee * cee,Int n_arg_ws,RetLoc rloc)381 void callHelperAndClearArgs ( ISelEnv* env, X86CondCode cc,
382 IRCallee* cee, Int n_arg_ws,
383 RetLoc rloc )
384 {
385 /* Complication. Need to decide which reg to use as the fn address
386 pointer, in a way that doesn't trash regparm-passed
387 parameters. */
388 vassert(sizeof(void*) == 4);
389
390 addInstr(env, X86Instr_Call( cc, (Addr)cee->addr,
391 cee->regparms, rloc));
392 if (n_arg_ws > 0)
393 add_to_esp(env, 4*n_arg_ws);
394 }
395
396
397 /* Used only in doHelperCall. See big comment in doHelperCall re
398 handling of regparm args. This function figures out whether
399 evaluation of an expression might require use of a fixed register.
400 If in doubt return True (safe but suboptimal).
401 */
402 static
mightRequireFixedRegs(IRExpr * e)403 Bool mightRequireFixedRegs ( IRExpr* e )
404 {
405 if (UNLIKELY(is_IRExpr_VECRET_or_GSPTR(e))) {
406 // These are always "safe" -- either a copy of %esp in some
407 // arbitrary vreg, or a copy of %ebp, respectively.
408 return False;
409 }
410 /* Else it's a "normal" expression. */
411 switch (e->tag) {
412 case Iex_RdTmp: case Iex_Const: case Iex_Get:
413 return False;
414 default:
415 return True;
416 }
417 }
418
419
420 /* Do a complete function call. |guard| is a Ity_Bit expression
421 indicating whether or not the call happens. If guard==NULL, the
422 call is unconditional. |retloc| is set to indicate where the
423 return value is after the call. The caller (of this fn) must
424 generate code to add |stackAdjustAfterCall| to the stack pointer
425 after the call is done. */
426
427 static
doHelperCall(UInt * stackAdjustAfterCall,RetLoc * retloc,ISelEnv * env,IRExpr * guard,IRCallee * cee,IRType retTy,IRExpr ** args)428 void doHelperCall ( /*OUT*/UInt* stackAdjustAfterCall,
429 /*OUT*/RetLoc* retloc,
430 ISelEnv* env,
431 IRExpr* guard,
432 IRCallee* cee, IRType retTy, IRExpr** args )
433 {
434 X86CondCode cc;
435 HReg argregs[3];
436 HReg tmpregs[3];
437 Bool danger;
438 Int not_done_yet, n_args, n_arg_ws, stack_limit,
439 i, argreg, argregX;
440
441 /* Set default returns. We'll update them later if needed. */
442 *stackAdjustAfterCall = 0;
443 *retloc = mk_RetLoc_INVALID();
444
445 /* These are used for cross-checking that IR-level constraints on
446 the use of Iex_VECRET and Iex_GSPTR are observed. */
447 UInt nVECRETs = 0;
448 UInt nGSPTRs = 0;
449
450 /* Marshal args for a call, do the call, and clear the stack.
451 Complexities to consider:
452
453 * The return type can be I{64,32,16,8} or V128. In the V128
454 case, it is expected that |args| will contain the special
455 node IRExpr_VECRET(), in which case this routine generates
456 code to allocate space on the stack for the vector return
457 value. Since we are not passing any scalars on the stack, it
458 is enough to preallocate the return space before marshalling
459 any arguments, in this case.
460
461 |args| may also contain IRExpr_GSPTR(), in which case the
462 value in %ebp is passed as the corresponding argument.
463
464 * If the callee claims regparmness of 1, 2 or 3, we must pass the
465 first 1, 2 or 3 args in registers (EAX, EDX, and ECX
466 respectively). To keep things relatively simple, only args of
467 type I32 may be passed as regparms -- just bomb out if anything
468 else turns up. Clearly this depends on the front ends not
469 trying to pass any other types as regparms.
470 */
471
472 /* 16 Nov 2004: the regparm handling is complicated by the
473 following problem.
474
475 Consider a call two a function with two regparm parameters:
476 f(e1,e2). We need to compute e1 into %eax and e2 into %edx.
477 Suppose code is first generated to compute e1 into %eax. Then,
478 code is generated to compute e2 into %edx. Unfortunately, if
479 the latter code sequence uses %eax, it will trash the value of
480 e1 computed by the former sequence. This could happen if (for
481 example) e2 itself involved a function call. In the code below,
482 args are evaluated right-to-left, not left-to-right, but the
483 principle and the problem are the same.
484
485 One solution is to compute all regparm-bound args into vregs
486 first, and once they are all done, move them to the relevant
487 real regs. This always gives correct code, but it also gives
488 a bunch of vreg-to-rreg moves which are usually redundant but
489 are hard for the register allocator to get rid of.
490
491 A compromise is to first examine all regparm'd argument
492 expressions. If they are all so simple that it is clear
493 they will be evaluated without use of any fixed registers,
494 use the old compute-directly-to-fixed-target scheme. If not,
495 be safe and use the via-vregs scheme.
496
497 Note this requires being able to examine an expression and
498 determine whether or not evaluation of it might use a fixed
499 register. That requires knowledge of how the rest of this
500 insn selector works. Currently just the following 3 are
501 regarded as safe -- hopefully they cover the majority of
502 arguments in practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
503 */
504 vassert(cee->regparms >= 0 && cee->regparms <= 3);
505
506 /* Count the number of args and also the VECRETs */
507 n_args = n_arg_ws = 0;
508 while (args[n_args]) {
509 IRExpr* arg = args[n_args];
510 n_args++;
511 if (UNLIKELY(arg->tag == Iex_VECRET)) {
512 nVECRETs++;
513 } else if (UNLIKELY(arg->tag == Iex_GSPTR)) {
514 nGSPTRs++;
515 }
516 }
517
518 /* If this fails, the IR is ill-formed */
519 vassert(nGSPTRs == 0 || nGSPTRs == 1);
520
521 /* If we have a VECRET, allocate space on the stack for the return
522 value, and record the stack pointer after that. */
523 HReg r_vecRetAddr = INVALID_HREG;
524 if (nVECRETs == 1) {
525 vassert(retTy == Ity_V128 || retTy == Ity_V256);
526 vassert(retTy != Ity_V256); // we don't handle that yet (if ever)
527 r_vecRetAddr = newVRegI(env);
528 sub_from_esp(env, 16);
529 addInstr(env, mk_iMOVsd_RR( hregX86_ESP(), r_vecRetAddr ));
530 } else {
531 // If either of these fail, the IR is ill-formed
532 vassert(retTy != Ity_V128 && retTy != Ity_V256);
533 vassert(nVECRETs == 0);
534 }
535
536 not_done_yet = n_args;
537
538 stack_limit = cee->regparms;
539
540 /* ------ BEGIN marshall all arguments ------ */
541
542 /* Push (R to L) the stack-passed args, [n_args-1 .. stack_limit] */
543 for (i = n_args-1; i >= stack_limit; i--) {
544 n_arg_ws += pushArg(env, args[i], r_vecRetAddr);
545 not_done_yet--;
546 }
547
548 /* args [stack_limit-1 .. 0] and possibly %ebp are to be passed in
549 registers. */
550
551 if (cee->regparms > 0) {
552
553 /* ------ BEGIN deal with regparms ------ */
554
555 /* deal with regparms, not forgetting %ebp if needed. */
556 argregs[0] = hregX86_EAX();
557 argregs[1] = hregX86_EDX();
558 argregs[2] = hregX86_ECX();
559 tmpregs[0] = tmpregs[1] = tmpregs[2] = INVALID_HREG;
560
561 argreg = cee->regparms;
562
563 /* In keeping with big comment above, detect potential danger
564 and use the via-vregs scheme if needed. */
565 danger = False;
566 for (i = stack_limit-1; i >= 0; i--) {
567 if (mightRequireFixedRegs(args[i])) {
568 danger = True;
569 break;
570 }
571 }
572
573 if (danger) {
574
575 /* Move via temporaries */
576 argregX = argreg;
577 for (i = stack_limit-1; i >= 0; i--) {
578
579 if (0) {
580 vex_printf("x86 host: register param is complex: ");
581 ppIRExpr(args[i]);
582 vex_printf("\n");
583 }
584
585 IRExpr* arg = args[i];
586 argreg--;
587 vassert(argreg >= 0);
588 if (UNLIKELY(arg->tag == Iex_VECRET)) {
589 vassert(0); //ATC
590 }
591 else if (UNLIKELY(arg->tag == Iex_GSPTR)) {
592 vassert(0); //ATC
593 } else {
594 vassert(typeOfIRExpr(env->type_env, arg) == Ity_I32);
595 tmpregs[argreg] = iselIntExpr_R(env, arg);
596 }
597 not_done_yet--;
598 }
599 for (i = stack_limit-1; i >= 0; i--) {
600 argregX--;
601 vassert(argregX >= 0);
602 addInstr( env, mk_iMOVsd_RR( tmpregs[argregX], argregs[argregX] ) );
603 }
604
605 } else {
606 /* It's safe to compute all regparm args directly into their
607 target registers. */
608 for (i = stack_limit-1; i >= 0; i--) {
609 IRExpr* arg = args[i];
610 argreg--;
611 vassert(argreg >= 0);
612 if (UNLIKELY(arg->tag == Iex_VECRET)) {
613 vassert(!hregIsInvalid(r_vecRetAddr));
614 addInstr(env, X86Instr_Alu32R(Xalu_MOV,
615 X86RMI_Reg(r_vecRetAddr),
616 argregs[argreg]));
617 }
618 else if (UNLIKELY(arg->tag == Iex_GSPTR)) {
619 vassert(0); //ATC
620 } else {
621 vassert(typeOfIRExpr(env->type_env, arg) == Ity_I32);
622 addInstr(env, X86Instr_Alu32R(Xalu_MOV,
623 iselIntExpr_RMI(env, arg),
624 argregs[argreg]));
625 }
626 not_done_yet--;
627 }
628
629 }
630
631 /* ------ END deal with regparms ------ */
632
633 }
634
635 vassert(not_done_yet == 0);
636
637 /* ------ END marshall all arguments ------ */
638
639 /* Now we can compute the condition. We can't do it earlier
640 because the argument computations could trash the condition
641 codes. Be a bit clever to handle the common case where the
642 guard is 1:Bit. */
643 cc = Xcc_ALWAYS;
644 if (guard) {
645 if (guard->tag == Iex_Const
646 && guard->Iex.Const.con->tag == Ico_U1
647 && guard->Iex.Const.con->Ico.U1 == True) {
648 /* unconditional -- do nothing */
649 } else {
650 cc = iselCondCode( env, guard );
651 }
652 }
653
654 /* Do final checks, set the return values, and generate the call
655 instruction proper. */
656 vassert(*stackAdjustAfterCall == 0);
657 vassert(is_RetLoc_INVALID(*retloc));
658 switch (retTy) {
659 case Ity_INVALID:
660 /* Function doesn't return a value. */
661 *retloc = mk_RetLoc_simple(RLPri_None);
662 break;
663 case Ity_I64:
664 *retloc = mk_RetLoc_simple(RLPri_2Int);
665 break;
666 case Ity_I32: case Ity_I16: case Ity_I8:
667 *retloc = mk_RetLoc_simple(RLPri_Int);
668 break;
669 case Ity_V128:
670 *retloc = mk_RetLoc_spRel(RLPri_V128SpRel, 0);
671 *stackAdjustAfterCall = 16;
672 break;
673 case Ity_V256:
674 vassert(0); // ATC
675 *retloc = mk_RetLoc_spRel(RLPri_V256SpRel, 0);
676 *stackAdjustAfterCall = 32;
677 break;
678 default:
679 /* IR can denote other possible return types, but we don't
680 handle those here. */
681 vassert(0);
682 }
683
684 /* Finally, generate the call itself. This needs the *retloc value
685 set in the switch above, which is why it's at the end. */
686 callHelperAndClearArgs( env, cc, cee, n_arg_ws, *retloc );
687 }
688
689
690 /* Given a guest-state array descriptor, an index expression and a
691 bias, generate an X86AMode holding the relevant guest state
692 offset. */
693
694 static
genGuestArrayOffset(ISelEnv * env,IRRegArray * descr,IRExpr * off,Int bias)695 X86AMode* genGuestArrayOffset ( ISelEnv* env, IRRegArray* descr,
696 IRExpr* off, Int bias )
697 {
698 HReg tmp, roff;
699 Int elemSz = sizeofIRType(descr->elemTy);
700 Int nElems = descr->nElems;
701 Int shift = 0;
702
703 /* throw out any cases not generated by an x86 front end. In
704 theory there might be a day where we need to handle them -- if
705 we ever run non-x86-guest on x86 host. */
706
707 if (nElems != 8)
708 vpanic("genGuestArrayOffset(x86 host)(1)");
709
710 switch (elemSz) {
711 case 1: shift = 0; break;
712 case 4: shift = 2; break;
713 case 8: shift = 3; break;
714 default: vpanic("genGuestArrayOffset(x86 host)(2)");
715 }
716
717 /* Compute off into a reg, %off. Then return:
718
719 movl %off, %tmp
720 addl $bias, %tmp (if bias != 0)
721 andl %tmp, 7
722 ... base(%ebp, %tmp, shift) ...
723 */
724 tmp = newVRegI(env);
725 roff = iselIntExpr_R(env, off);
726 addInstr(env, mk_iMOVsd_RR(roff, tmp));
727 if (bias != 0) {
728 addInstr(env,
729 X86Instr_Alu32R(Xalu_ADD, X86RMI_Imm(bias), tmp));
730 }
731 addInstr(env,
732 X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(7), tmp));
733 return
734 X86AMode_IRRS( descr->base, hregX86_EBP(), tmp, shift );
735 }
736
737
738 /* Mess with the FPU's rounding mode: set to the default rounding mode
739 (DEFAULT_FPUCW). */
740 static
set_FPU_rounding_default(ISelEnv * env)741 void set_FPU_rounding_default ( ISelEnv* env )
742 {
743 /* pushl $DEFAULT_FPUCW
744 fldcw 0(%esp)
745 addl $4, %esp
746 */
747 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
748 addInstr(env, X86Instr_Push(X86RMI_Imm(DEFAULT_FPUCW)));
749 addInstr(env, X86Instr_FpLdCW(zero_esp));
750 add_to_esp(env, 4);
751 }
752
753
754 /* Mess with the FPU's rounding mode: 'mode' is an I32-typed
755 expression denoting a value in the range 0 .. 3, indicating a round
756 mode encoded as per type IRRoundingMode. Set the x87 FPU to have
757 the same rounding.
758 */
759 static
set_FPU_rounding_mode(ISelEnv * env,IRExpr * mode)760 void set_FPU_rounding_mode ( ISelEnv* env, IRExpr* mode )
761 {
762 HReg rrm = iselIntExpr_R(env, mode);
763 HReg rrm2 = newVRegI(env);
764 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
765
766 /* movl %rrm, %rrm2
767 andl $3, %rrm2 -- shouldn't be needed; paranoia
768 shll $10, %rrm2
769 orl $DEFAULT_FPUCW, %rrm2
770 pushl %rrm2
771 fldcw 0(%esp)
772 addl $4, %esp
773 */
774 addInstr(env, mk_iMOVsd_RR(rrm, rrm2));
775 addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(3), rrm2));
776 addInstr(env, X86Instr_Sh32(Xsh_SHL, 10, rrm2));
777 addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Imm(DEFAULT_FPUCW), rrm2));
778 addInstr(env, X86Instr_Push(X86RMI_Reg(rrm2)));
779 addInstr(env, X86Instr_FpLdCW(zero_esp));
780 add_to_esp(env, 4);
781 }
782
783
784 /* Generate !src into a new vector register, and be sure that the code
785 is SSE1 compatible. Amazing that Intel doesn't offer a less crappy
786 way to do this.
787 */
do_sse_Not128(ISelEnv * env,HReg src)788 static HReg do_sse_Not128 ( ISelEnv* env, HReg src )
789 {
790 HReg dst = newVRegV(env);
791 /* Set dst to zero. If dst contains a NaN then all hell might
792 break loose after the comparison. So, first zero it. */
793 addInstr(env, X86Instr_SseReRg(Xsse_XOR, dst, dst));
794 /* And now make it all 1s ... */
795 addInstr(env, X86Instr_Sse32Fx4(Xsse_CMPEQF, dst, dst));
796 /* Finally, xor 'src' into it. */
797 addInstr(env, X86Instr_SseReRg(Xsse_XOR, src, dst));
798 /* Doesn't that just totally suck? */
799 return dst;
800 }
801
802
803 /* Round an x87 FPU value to 53-bit-mantissa precision, to be used
804 after most non-simple FPU operations (simple = +, -, *, / and
805 sqrt).
806
807 This could be done a lot more efficiently if needed, by loading
808 zero and adding it to the value to be rounded (fldz ; faddp?).
809 */
roundToF64(ISelEnv * env,HReg reg)810 static void roundToF64 ( ISelEnv* env, HReg reg )
811 {
812 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
813 sub_from_esp(env, 8);
814 addInstr(env, X86Instr_FpLdSt(False/*store*/, 8, reg, zero_esp));
815 addInstr(env, X86Instr_FpLdSt(True/*load*/, 8, reg, zero_esp));
816 add_to_esp(env, 8);
817 }
818
819
820 /*---------------------------------------------------------*/
821 /*--- ISEL: Integer expressions (32/16/8 bit) ---*/
822 /*---------------------------------------------------------*/
823
824 /* Select insns for an integer-typed expression, and add them to the
825 code list. Return a reg holding the result. This reg will be a
826 virtual register. THE RETURNED REG MUST NOT BE MODIFIED. If you
827 want to modify it, ask for a new vreg, copy it in there, and modify
828 the copy. The register allocator will do its best to map both
829 vregs to the same real register, so the copies will often disappear
830 later in the game.
831
832 This should handle expressions of 32, 16 and 8-bit type. All
833 results are returned in a 32-bit register. For 16- and 8-bit
834 expressions, the upper 16/24 bits are arbitrary, so you should mask
835 or sign extend partial values if necessary.
836 */
837
iselIntExpr_R(ISelEnv * env,const IRExpr * e)838 static HReg iselIntExpr_R ( ISelEnv* env, const IRExpr* e )
839 {
840 HReg r = iselIntExpr_R_wrk(env, e);
841 /* sanity checks ... */
842 # if 0
843 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
844 # endif
845 vassert(hregClass(r) == HRcInt32);
846 vassert(hregIsVirtual(r));
847 return r;
848 }
849
850 /* DO NOT CALL THIS DIRECTLY ! */
iselIntExpr_R_wrk(ISelEnv * env,const IRExpr * e)851 static HReg iselIntExpr_R_wrk ( ISelEnv* env, const IRExpr* e )
852 {
853 MatchInfo mi;
854
855 IRType ty = typeOfIRExpr(env->type_env,e);
856 vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
857
858 switch (e->tag) {
859
860 /* --------- TEMP --------- */
861 case Iex_RdTmp: {
862 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
863 }
864
865 /* --------- LOAD --------- */
866 case Iex_Load: {
867 HReg dst = newVRegI(env);
868 X86AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr );
869
870 /* We can't handle big-endian loads, nor load-linked. */
871 if (e->Iex.Load.end != Iend_LE)
872 goto irreducible;
873
874 if (ty == Ity_I32) {
875 addInstr(env, X86Instr_Alu32R(Xalu_MOV,
876 X86RMI_Mem(amode), dst) );
877 return dst;
878 }
879 if (ty == Ity_I16) {
880 addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
881 return dst;
882 }
883 if (ty == Ity_I8) {
884 addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
885 return dst;
886 }
887 break;
888 }
889
890 /* --------- TERNARY OP --------- */
891 case Iex_Triop: {
892 IRTriop *triop = e->Iex.Triop.details;
893 /* C3210 flags following FPU partial remainder (fprem), both
894 IEEE compliant (PREM1) and non-IEEE compliant (PREM). */
895 if (triop->op == Iop_PRemC3210F64
896 || triop->op == Iop_PRem1C3210F64) {
897 HReg junk = newVRegF(env);
898 HReg dst = newVRegI(env);
899 HReg srcL = iselDblExpr(env, triop->arg2);
900 HReg srcR = iselDblExpr(env, triop->arg3);
901 /* XXXROUNDINGFIXME */
902 /* set roundingmode here */
903 addInstr(env, X86Instr_FpBinary(
904 e->Iex.Binop.op==Iop_PRemC3210F64
905 ? Xfp_PREM : Xfp_PREM1,
906 srcL,srcR,junk
907 ));
908 /* The previous pseudo-insn will have left the FPU's C3210
909 flags set correctly. So bag them. */
910 addInstr(env, X86Instr_FpStSW_AX());
911 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst));
912 addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0x4700), dst));
913 return dst;
914 }
915
916 break;
917 }
918
919 /* --------- BINARY OP --------- */
920 case Iex_Binop: {
921 X86AluOp aluOp;
922 X86ShiftOp shOp;
923
924 /* Pattern: Sub32(0,x) */
925 if (e->Iex.Binop.op == Iop_Sub32 && isZeroU32(e->Iex.Binop.arg1)) {
926 HReg dst = newVRegI(env);
927 HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg2);
928 addInstr(env, mk_iMOVsd_RR(reg,dst));
929 addInstr(env, X86Instr_Unary32(Xun_NEG,dst));
930 return dst;
931 }
932
933 /* Is it an addition or logical style op? */
934 switch (e->Iex.Binop.op) {
935 case Iop_Add8: case Iop_Add16: case Iop_Add32:
936 aluOp = Xalu_ADD; break;
937 case Iop_Sub8: case Iop_Sub16: case Iop_Sub32:
938 aluOp = Xalu_SUB; break;
939 case Iop_And8: case Iop_And16: case Iop_And32:
940 aluOp = Xalu_AND; break;
941 case Iop_Or8: case Iop_Or16: case Iop_Or32:
942 aluOp = Xalu_OR; break;
943 case Iop_Xor8: case Iop_Xor16: case Iop_Xor32:
944 aluOp = Xalu_XOR; break;
945 case Iop_Mul16: case Iop_Mul32:
946 aluOp = Xalu_MUL; break;
947 default:
948 aluOp = Xalu_INVALID; break;
949 }
950 /* For commutative ops we assume any literal
951 values are on the second operand. */
952 if (aluOp != Xalu_INVALID) {
953 HReg dst = newVRegI(env);
954 HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
955 X86RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
956 addInstr(env, mk_iMOVsd_RR(reg,dst));
957 addInstr(env, X86Instr_Alu32R(aluOp, rmi, dst));
958 return dst;
959 }
960 /* Could do better here; forcing the first arg into a reg
961 isn't always clever.
962 -- t70 = Xor32(And32(Xor32(LDle:I32(Add32(t41,0xFFFFFFA0:I32)),
963 LDle:I32(Add32(t41,0xFFFFFFA4:I32))),LDle:I32(Add32(
964 t41,0xFFFFFFA8:I32))),LDle:I32(Add32(t41,0xFFFFFFA0:I32)))
965 movl 0xFFFFFFA0(%vr41),%vr107
966 movl 0xFFFFFFA4(%vr41),%vr108
967 movl %vr107,%vr106
968 xorl %vr108,%vr106
969 movl 0xFFFFFFA8(%vr41),%vr109
970 movl %vr106,%vr105
971 andl %vr109,%vr105
972 movl 0xFFFFFFA0(%vr41),%vr110
973 movl %vr105,%vr104
974 xorl %vr110,%vr104
975 movl %vr104,%vr70
976 */
977
978 /* Perhaps a shift op? */
979 switch (e->Iex.Binop.op) {
980 case Iop_Shl32: case Iop_Shl16: case Iop_Shl8:
981 shOp = Xsh_SHL; break;
982 case Iop_Shr32: case Iop_Shr16: case Iop_Shr8:
983 shOp = Xsh_SHR; break;
984 case Iop_Sar32: case Iop_Sar16: case Iop_Sar8:
985 shOp = Xsh_SAR; break;
986 default:
987 shOp = Xsh_INVALID; break;
988 }
989 if (shOp != Xsh_INVALID) {
990 HReg dst = newVRegI(env);
991
992 /* regL = the value to be shifted */
993 HReg regL = iselIntExpr_R(env, e->Iex.Binop.arg1);
994 addInstr(env, mk_iMOVsd_RR(regL,dst));
995
996 /* Do any necessary widening for 16/8 bit operands */
997 switch (e->Iex.Binop.op) {
998 case Iop_Shr8:
999 addInstr(env, X86Instr_Alu32R(
1000 Xalu_AND, X86RMI_Imm(0xFF), dst));
1001 break;
1002 case Iop_Shr16:
1003 addInstr(env, X86Instr_Alu32R(
1004 Xalu_AND, X86RMI_Imm(0xFFFF), dst));
1005 break;
1006 case Iop_Sar8:
1007 addInstr(env, X86Instr_Sh32(Xsh_SHL, 24, dst));
1008 addInstr(env, X86Instr_Sh32(Xsh_SAR, 24, dst));
1009 break;
1010 case Iop_Sar16:
1011 addInstr(env, X86Instr_Sh32(Xsh_SHL, 16, dst));
1012 addInstr(env, X86Instr_Sh32(Xsh_SAR, 16, dst));
1013 break;
1014 default: break;
1015 }
1016
1017 /* Now consider the shift amount. If it's a literal, we
1018 can do a much better job than the general case. */
1019 if (e->Iex.Binop.arg2->tag == Iex_Const) {
1020 /* assert that the IR is well-typed */
1021 Int nshift;
1022 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1023 nshift = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1024 vassert(nshift >= 0);
1025 if (nshift > 0)
1026 /* Can't allow nshift==0 since that means %cl */
1027 addInstr(env, X86Instr_Sh32( shOp, nshift, dst ));
1028 } else {
1029 /* General case; we have to force the amount into %cl. */
1030 HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1031 addInstr(env, mk_iMOVsd_RR(regR,hregX86_ECX()));
1032 addInstr(env, X86Instr_Sh32(shOp, 0/* %cl */, dst));
1033 }
1034 return dst;
1035 }
1036
1037 /* Handle misc other ops. */
1038
1039 if (e->Iex.Binop.op == Iop_Max32U) {
1040 HReg src1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1041 HReg dst = newVRegI(env);
1042 HReg src2 = iselIntExpr_R(env, e->Iex.Binop.arg2);
1043 addInstr(env, mk_iMOVsd_RR(src1,dst));
1044 addInstr(env, X86Instr_Alu32R(Xalu_CMP, X86RMI_Reg(src2), dst));
1045 addInstr(env, X86Instr_CMov32(Xcc_B, X86RM_Reg(src2), dst));
1046 return dst;
1047 }
1048
1049 if (e->Iex.Binop.op == Iop_8HLto16) {
1050 HReg hi8 = newVRegI(env);
1051 HReg lo8 = newVRegI(env);
1052 HReg hi8s = iselIntExpr_R(env, e->Iex.Binop.arg1);
1053 HReg lo8s = iselIntExpr_R(env, e->Iex.Binop.arg2);
1054 addInstr(env, mk_iMOVsd_RR(hi8s, hi8));
1055 addInstr(env, mk_iMOVsd_RR(lo8s, lo8));
1056 addInstr(env, X86Instr_Sh32(Xsh_SHL, 8, hi8));
1057 addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0xFF), lo8));
1058 addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(lo8), hi8));
1059 return hi8;
1060 }
1061
1062 if (e->Iex.Binop.op == Iop_16HLto32) {
1063 HReg hi16 = newVRegI(env);
1064 HReg lo16 = newVRegI(env);
1065 HReg hi16s = iselIntExpr_R(env, e->Iex.Binop.arg1);
1066 HReg lo16s = iselIntExpr_R(env, e->Iex.Binop.arg2);
1067 addInstr(env, mk_iMOVsd_RR(hi16s, hi16));
1068 addInstr(env, mk_iMOVsd_RR(lo16s, lo16));
1069 addInstr(env, X86Instr_Sh32(Xsh_SHL, 16, hi16));
1070 addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0xFFFF), lo16));
1071 addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(lo16), hi16));
1072 return hi16;
1073 }
1074
1075 if (e->Iex.Binop.op == Iop_MullS16 || e->Iex.Binop.op == Iop_MullS8
1076 || e->Iex.Binop.op == Iop_MullU16 || e->Iex.Binop.op == Iop_MullU8) {
1077 HReg a16 = newVRegI(env);
1078 HReg b16 = newVRegI(env);
1079 HReg a16s = iselIntExpr_R(env, e->Iex.Binop.arg1);
1080 HReg b16s = iselIntExpr_R(env, e->Iex.Binop.arg2);
1081 Int shift = (e->Iex.Binop.op == Iop_MullS8
1082 || e->Iex.Binop.op == Iop_MullU8)
1083 ? 24 : 16;
1084 X86ShiftOp shr_op = (e->Iex.Binop.op == Iop_MullS8
1085 || e->Iex.Binop.op == Iop_MullS16)
1086 ? Xsh_SAR : Xsh_SHR;
1087
1088 addInstr(env, mk_iMOVsd_RR(a16s, a16));
1089 addInstr(env, mk_iMOVsd_RR(b16s, b16));
1090 addInstr(env, X86Instr_Sh32(Xsh_SHL, shift, a16));
1091 addInstr(env, X86Instr_Sh32(Xsh_SHL, shift, b16));
1092 addInstr(env, X86Instr_Sh32(shr_op, shift, a16));
1093 addInstr(env, X86Instr_Sh32(shr_op, shift, b16));
1094 addInstr(env, X86Instr_Alu32R(Xalu_MUL, X86RMI_Reg(a16), b16));
1095 return b16;
1096 }
1097
1098 if (e->Iex.Binop.op == Iop_CmpF64) {
1099 HReg fL = iselDblExpr(env, e->Iex.Binop.arg1);
1100 HReg fR = iselDblExpr(env, e->Iex.Binop.arg2);
1101 HReg dst = newVRegI(env);
1102 addInstr(env, X86Instr_FpCmp(fL,fR,dst));
1103 /* shift this right 8 bits so as to conform to CmpF64
1104 definition. */
1105 addInstr(env, X86Instr_Sh32(Xsh_SHR, 8, dst));
1106 return dst;
1107 }
1108
1109 if (e->Iex.Binop.op == Iop_F64toI32S
1110 || e->Iex.Binop.op == Iop_F64toI16S) {
1111 Int sz = e->Iex.Binop.op == Iop_F64toI16S ? 2 : 4;
1112 HReg rf = iselDblExpr(env, e->Iex.Binop.arg2);
1113 HReg dst = newVRegI(env);
1114
1115 /* Used several times ... */
1116 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
1117
1118 /* rf now holds the value to be converted, and rrm holds the
1119 rounding mode value, encoded as per the IRRoundingMode
1120 enum. The first thing to do is set the FPU's rounding
1121 mode accordingly. */
1122
1123 /* Create a space for the format conversion. */
1124 /* subl $4, %esp */
1125 sub_from_esp(env, 4);
1126
1127 /* Set host rounding mode */
1128 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
1129
1130 /* gistw/l %rf, 0(%esp) */
1131 addInstr(env, X86Instr_FpLdStI(False/*store*/,
1132 toUChar(sz), rf, zero_esp));
1133
1134 if (sz == 2) {
1135 /* movzwl 0(%esp), %dst */
1136 addInstr(env, X86Instr_LoadEX(2,False,zero_esp,dst));
1137 } else {
1138 /* movl 0(%esp), %dst */
1139 vassert(sz == 4);
1140 addInstr(env, X86Instr_Alu32R(
1141 Xalu_MOV, X86RMI_Mem(zero_esp), dst));
1142 }
1143
1144 /* Restore default FPU rounding. */
1145 set_FPU_rounding_default( env );
1146
1147 /* addl $4, %esp */
1148 add_to_esp(env, 4);
1149 return dst;
1150 }
1151
1152 break;
1153 }
1154
1155 /* --------- UNARY OP --------- */
1156 case Iex_Unop: {
1157
1158 /* 1Uto8(32to1(expr32)) */
1159 if (e->Iex.Unop.op == Iop_1Uto8) {
1160 DECLARE_PATTERN(p_32to1_then_1Uto8);
1161 DEFINE_PATTERN(p_32to1_then_1Uto8,
1162 unop(Iop_1Uto8,unop(Iop_32to1,bind(0))));
1163 if (matchIRExpr(&mi,p_32to1_then_1Uto8,e)) {
1164 const IRExpr* expr32 = mi.bindee[0];
1165 HReg dst = newVRegI(env);
1166 HReg src = iselIntExpr_R(env, expr32);
1167 addInstr(env, mk_iMOVsd_RR(src,dst) );
1168 addInstr(env, X86Instr_Alu32R(Xalu_AND,
1169 X86RMI_Imm(1), dst));
1170 return dst;
1171 }
1172 }
1173
1174 /* 8Uto32(LDle(expr32)) */
1175 if (e->Iex.Unop.op == Iop_8Uto32) {
1176 DECLARE_PATTERN(p_LDle8_then_8Uto32);
1177 DEFINE_PATTERN(p_LDle8_then_8Uto32,
1178 unop(Iop_8Uto32,
1179 IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
1180 if (matchIRExpr(&mi,p_LDle8_then_8Uto32,e)) {
1181 HReg dst = newVRegI(env);
1182 X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1183 addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
1184 return dst;
1185 }
1186 }
1187
1188 /* 8Sto32(LDle(expr32)) */
1189 if (e->Iex.Unop.op == Iop_8Sto32) {
1190 DECLARE_PATTERN(p_LDle8_then_8Sto32);
1191 DEFINE_PATTERN(p_LDle8_then_8Sto32,
1192 unop(Iop_8Sto32,
1193 IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
1194 if (matchIRExpr(&mi,p_LDle8_then_8Sto32,e)) {
1195 HReg dst = newVRegI(env);
1196 X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1197 addInstr(env, X86Instr_LoadEX(1,True,amode,dst));
1198 return dst;
1199 }
1200 }
1201
1202 /* 16Uto32(LDle(expr32)) */
1203 if (e->Iex.Unop.op == Iop_16Uto32) {
1204 DECLARE_PATTERN(p_LDle16_then_16Uto32);
1205 DEFINE_PATTERN(p_LDle16_then_16Uto32,
1206 unop(Iop_16Uto32,
1207 IRExpr_Load(Iend_LE,Ity_I16,bind(0))) );
1208 if (matchIRExpr(&mi,p_LDle16_then_16Uto32,e)) {
1209 HReg dst = newVRegI(env);
1210 X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1211 addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
1212 return dst;
1213 }
1214 }
1215
1216 /* 8Uto32(GET:I8) */
1217 if (e->Iex.Unop.op == Iop_8Uto32) {
1218 if (e->Iex.Unop.arg->tag == Iex_Get) {
1219 HReg dst;
1220 X86AMode* amode;
1221 vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I8);
1222 dst = newVRegI(env);
1223 amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
1224 hregX86_EBP());
1225 addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
1226 return dst;
1227 }
1228 }
1229
1230 /* 16to32(GET:I16) */
1231 if (e->Iex.Unop.op == Iop_16Uto32) {
1232 if (e->Iex.Unop.arg->tag == Iex_Get) {
1233 HReg dst;
1234 X86AMode* amode;
1235 vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I16);
1236 dst = newVRegI(env);
1237 amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
1238 hregX86_EBP());
1239 addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
1240 return dst;
1241 }
1242 }
1243
1244 switch (e->Iex.Unop.op) {
1245 case Iop_8Uto16:
1246 case Iop_8Uto32:
1247 case Iop_16Uto32: {
1248 HReg dst = newVRegI(env);
1249 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1250 UInt mask = e->Iex.Unop.op==Iop_16Uto32 ? 0xFFFF : 0xFF;
1251 addInstr(env, mk_iMOVsd_RR(src,dst) );
1252 addInstr(env, X86Instr_Alu32R(Xalu_AND,
1253 X86RMI_Imm(mask), dst));
1254 return dst;
1255 }
1256 case Iop_8Sto16:
1257 case Iop_8Sto32:
1258 case Iop_16Sto32: {
1259 HReg dst = newVRegI(env);
1260 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1261 UInt amt = e->Iex.Unop.op==Iop_16Sto32 ? 16 : 24;
1262 addInstr(env, mk_iMOVsd_RR(src,dst) );
1263 addInstr(env, X86Instr_Sh32(Xsh_SHL, amt, dst));
1264 addInstr(env, X86Instr_Sh32(Xsh_SAR, amt, dst));
1265 return dst;
1266 }
1267 case Iop_Not8:
1268 case Iop_Not16:
1269 case Iop_Not32: {
1270 HReg dst = newVRegI(env);
1271 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1272 addInstr(env, mk_iMOVsd_RR(src,dst) );
1273 addInstr(env, X86Instr_Unary32(Xun_NOT,dst));
1274 return dst;
1275 }
1276 case Iop_64HIto32: {
1277 HReg rHi, rLo;
1278 iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1279 return rHi; /* and abandon rLo .. poor wee thing :-) */
1280 }
1281 case Iop_64to32: {
1282 HReg rHi, rLo;
1283 iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1284 return rLo; /* similar stupid comment to the above ... */
1285 }
1286 case Iop_16HIto8:
1287 case Iop_32HIto16: {
1288 HReg dst = newVRegI(env);
1289 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1290 Int shift = e->Iex.Unop.op == Iop_16HIto8 ? 8 : 16;
1291 addInstr(env, mk_iMOVsd_RR(src,dst) );
1292 addInstr(env, X86Instr_Sh32(Xsh_SHR, shift, dst));
1293 return dst;
1294 }
1295 case Iop_1Uto32:
1296 case Iop_1Uto8: {
1297 HReg dst = newVRegI(env);
1298 X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1299 addInstr(env, X86Instr_Set32(cond,dst));
1300 return dst;
1301 }
1302 case Iop_1Sto8:
1303 case Iop_1Sto16:
1304 case Iop_1Sto32: {
1305 /* could do better than this, but for now ... */
1306 HReg dst = newVRegI(env);
1307 X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1308 addInstr(env, X86Instr_Set32(cond,dst));
1309 addInstr(env, X86Instr_Sh32(Xsh_SHL, 31, dst));
1310 addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, dst));
1311 return dst;
1312 }
1313 case Iop_Ctz32: {
1314 /* Count trailing zeroes, implemented by x86 'bsfl' */
1315 HReg dst = newVRegI(env);
1316 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1317 addInstr(env, X86Instr_Bsfr32(True,src,dst));
1318 return dst;
1319 }
1320 case Iop_Clz32: {
1321 /* Count leading zeroes. Do 'bsrl' to establish the index
1322 of the highest set bit, and subtract that value from
1323 31. */
1324 HReg tmp = newVRegI(env);
1325 HReg dst = newVRegI(env);
1326 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1327 addInstr(env, X86Instr_Bsfr32(False,src,tmp));
1328 addInstr(env, X86Instr_Alu32R(Xalu_MOV,
1329 X86RMI_Imm(31), dst));
1330 addInstr(env, X86Instr_Alu32R(Xalu_SUB,
1331 X86RMI_Reg(tmp), dst));
1332 return dst;
1333 }
1334
1335 case Iop_CmpwNEZ32: {
1336 HReg dst = newVRegI(env);
1337 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1338 addInstr(env, mk_iMOVsd_RR(src,dst));
1339 addInstr(env, X86Instr_Unary32(Xun_NEG,dst));
1340 addInstr(env, X86Instr_Alu32R(Xalu_OR,
1341 X86RMI_Reg(src), dst));
1342 addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, dst));
1343 return dst;
1344 }
1345 case Iop_Left8:
1346 case Iop_Left16:
1347 case Iop_Left32: {
1348 HReg dst = newVRegI(env);
1349 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1350 addInstr(env, mk_iMOVsd_RR(src, dst));
1351 addInstr(env, X86Instr_Unary32(Xun_NEG, dst));
1352 addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(src), dst));
1353 return dst;
1354 }
1355
1356 case Iop_V128to32: {
1357 HReg dst = newVRegI(env);
1358 HReg vec = iselVecExpr(env, e->Iex.Unop.arg);
1359 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
1360 sub_from_esp(env, 16);
1361 addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, esp0));
1362 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(esp0), dst ));
1363 add_to_esp(env, 16);
1364 return dst;
1365 }
1366
1367 /* ReinterpF32asI32(e) */
1368 /* Given an IEEE754 single, produce an I32 with the same bit
1369 pattern. Keep stack 8-aligned even though only using 4
1370 bytes. */
1371 case Iop_ReinterpF32asI32: {
1372 HReg rf = iselFltExpr(env, e->Iex.Unop.arg);
1373 HReg dst = newVRegI(env);
1374 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
1375 /* paranoia */
1376 set_FPU_rounding_default(env);
1377 /* subl $8, %esp */
1378 sub_from_esp(env, 8);
1379 /* gstF %rf, 0(%esp) */
1380 addInstr(env,
1381 X86Instr_FpLdSt(False/*store*/, 4, rf, zero_esp));
1382 /* movl 0(%esp), %dst */
1383 addInstr(env,
1384 X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(zero_esp), dst));
1385 /* addl $8, %esp */
1386 add_to_esp(env, 8);
1387 return dst;
1388 }
1389
1390 case Iop_16to8:
1391 case Iop_32to8:
1392 case Iop_32to16:
1393 /* These are no-ops. */
1394 return iselIntExpr_R(env, e->Iex.Unop.arg);
1395
1396 case Iop_GetMSBs8x8: {
1397 /* Note: the following assumes the helper is of
1398 signature
1399 UInt fn ( ULong ), and is not a regparm fn.
1400 */
1401 HReg xLo, xHi;
1402 HReg dst = newVRegI(env);
1403 Addr fn = (Addr)h_generic_calc_GetMSBs8x8;
1404 iselInt64Expr(&xHi, &xLo, env, e->Iex.Unop.arg);
1405 addInstr(env, X86Instr_Push(X86RMI_Reg(xHi)));
1406 addInstr(env, X86Instr_Push(X86RMI_Reg(xLo)));
1407 addInstr(env, X86Instr_Call( Xcc_ALWAYS, (Addr32)fn,
1408 0, mk_RetLoc_simple(RLPri_Int) ));
1409 add_to_esp(env, 2*4);
1410 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst));
1411 return dst;
1412 }
1413
1414 default:
1415 break;
1416 }
1417 break;
1418 }
1419
1420 /* --------- GET --------- */
1421 case Iex_Get: {
1422 if (ty == Ity_I32) {
1423 HReg dst = newVRegI(env);
1424 addInstr(env, X86Instr_Alu32R(
1425 Xalu_MOV,
1426 X86RMI_Mem(X86AMode_IR(e->Iex.Get.offset,
1427 hregX86_EBP())),
1428 dst));
1429 return dst;
1430 }
1431 if (ty == Ity_I8 || ty == Ity_I16) {
1432 HReg dst = newVRegI(env);
1433 addInstr(env, X86Instr_LoadEX(
1434 toUChar(ty==Ity_I8 ? 1 : 2),
1435 False,
1436 X86AMode_IR(e->Iex.Get.offset,hregX86_EBP()),
1437 dst));
1438 return dst;
1439 }
1440 break;
1441 }
1442
1443 case Iex_GetI: {
1444 X86AMode* am
1445 = genGuestArrayOffset(
1446 env, e->Iex.GetI.descr,
1447 e->Iex.GetI.ix, e->Iex.GetI.bias );
1448 HReg dst = newVRegI(env);
1449 if (ty == Ity_I8) {
1450 addInstr(env, X86Instr_LoadEX( 1, False, am, dst ));
1451 return dst;
1452 }
1453 if (ty == Ity_I32) {
1454 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(am), dst));
1455 return dst;
1456 }
1457 break;
1458 }
1459
1460 /* --------- CCALL --------- */
1461 case Iex_CCall: {
1462 HReg dst = newVRegI(env);
1463 vassert(ty == e->Iex.CCall.retty);
1464
1465 /* be very restrictive for now. Only 32/64-bit ints allowed for
1466 args, and 32 bits for return type. Don't forget to change
1467 the RetLoc if more return types are allowed in future. */
1468 if (e->Iex.CCall.retty != Ity_I32)
1469 goto irreducible;
1470
1471 /* Marshal args, do the call, clear stack. */
1472 UInt addToSp = 0;
1473 RetLoc rloc = mk_RetLoc_INVALID();
1474 doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
1475 e->Iex.CCall.cee, e->Iex.CCall.retty, e->Iex.CCall.args );
1476 vassert(is_sane_RetLoc(rloc));
1477 vassert(rloc.pri == RLPri_Int);
1478 vassert(addToSp == 0);
1479
1480 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst));
1481 return dst;
1482 }
1483
1484 /* --------- LITERAL --------- */
1485 /* 32/16/8-bit literals */
1486 case Iex_Const: {
1487 X86RMI* rmi = iselIntExpr_RMI ( env, e );
1488 HReg r = newVRegI(env);
1489 addInstr(env, X86Instr_Alu32R(Xalu_MOV, rmi, r));
1490 return r;
1491 }
1492
1493 /* --------- MULTIPLEX --------- */
1494 case Iex_ITE: { // VFD
1495 if ((ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8)
1496 && typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) {
1497 HReg r1 = iselIntExpr_R(env, e->Iex.ITE.iftrue);
1498 X86RM* r0 = iselIntExpr_RM(env, e->Iex.ITE.iffalse);
1499 HReg dst = newVRegI(env);
1500 addInstr(env, mk_iMOVsd_RR(r1,dst));
1501 X86CondCode cc = iselCondCode(env, e->Iex.ITE.cond);
1502 addInstr(env, X86Instr_CMov32(cc ^ 1, r0, dst));
1503 return dst;
1504 }
1505 break;
1506 }
1507
1508 default:
1509 break;
1510 } /* switch (e->tag) */
1511
1512 /* We get here if no pattern matched. */
1513 irreducible:
1514 ppIRExpr(e);
1515 vpanic("iselIntExpr_R: cannot reduce tree");
1516 }
1517
1518
1519 /*---------------------------------------------------------*/
1520 /*--- ISEL: Integer expression auxiliaries ---*/
1521 /*---------------------------------------------------------*/
1522
1523 /* --------------------- AMODEs --------------------- */
1524
1525 /* Return an AMode which computes the value of the specified
1526 expression, possibly also adding insns to the code list as a
1527 result. The expression may only be a 32-bit one.
1528 */
1529
sane_AMode(X86AMode * am)1530 static Bool sane_AMode ( X86AMode* am )
1531 {
1532 switch (am->tag) {
1533 case Xam_IR:
1534 return
1535 toBool( hregClass(am->Xam.IR.reg) == HRcInt32
1536 && (hregIsVirtual(am->Xam.IR.reg)
1537 || sameHReg(am->Xam.IR.reg, hregX86_EBP())) );
1538 case Xam_IRRS:
1539 return
1540 toBool( hregClass(am->Xam.IRRS.base) == HRcInt32
1541 && hregIsVirtual(am->Xam.IRRS.base)
1542 && hregClass(am->Xam.IRRS.index) == HRcInt32
1543 && hregIsVirtual(am->Xam.IRRS.index) );
1544 default:
1545 vpanic("sane_AMode: unknown x86 amode tag");
1546 }
1547 }
1548
iselIntExpr_AMode(ISelEnv * env,const IRExpr * e)1549 static X86AMode* iselIntExpr_AMode ( ISelEnv* env, const IRExpr* e )
1550 {
1551 X86AMode* am = iselIntExpr_AMode_wrk(env, e);
1552 vassert(sane_AMode(am));
1553 return am;
1554 }
1555
1556 /* DO NOT CALL THIS DIRECTLY ! */
iselIntExpr_AMode_wrk(ISelEnv * env,const IRExpr * e)1557 static X86AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, const IRExpr* e )
1558 {
1559 IRType ty = typeOfIRExpr(env->type_env,e);
1560 vassert(ty == Ity_I32);
1561
1562 /* Add32( Add32(expr1, Shl32(expr2, simm)), imm32 ) */
1563 if (e->tag == Iex_Binop
1564 && e->Iex.Binop.op == Iop_Add32
1565 && e->Iex.Binop.arg2->tag == Iex_Const
1566 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32
1567 && e->Iex.Binop.arg1->tag == Iex_Binop
1568 && e->Iex.Binop.arg1->Iex.Binop.op == Iop_Add32
1569 && e->Iex.Binop.arg1->Iex.Binop.arg2->tag == Iex_Binop
1570 && e->Iex.Binop.arg1->Iex.Binop.arg2->Iex.Binop.op == Iop_Shl32
1571 && e->Iex.Binop.arg1
1572 ->Iex.Binop.arg2->Iex.Binop.arg2->tag == Iex_Const
1573 && e->Iex.Binop.arg1
1574 ->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8) {
1575 UInt shift = e->Iex.Binop.arg1
1576 ->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1577 UInt imm32 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
1578 if (shift == 1 || shift == 2 || shift == 3) {
1579 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1->Iex.Binop.arg1);
1580 HReg r2 = iselIntExpr_R(env, e->Iex.Binop.arg1
1581 ->Iex.Binop.arg2->Iex.Binop.arg1 );
1582 return X86AMode_IRRS(imm32, r1, r2, shift);
1583 }
1584 }
1585
1586 /* Add32(expr1, Shl32(expr2, imm)) */
1587 if (e->tag == Iex_Binop
1588 && e->Iex.Binop.op == Iop_Add32
1589 && e->Iex.Binop.arg2->tag == Iex_Binop
1590 && e->Iex.Binop.arg2->Iex.Binop.op == Iop_Shl32
1591 && e->Iex.Binop.arg2->Iex.Binop.arg2->tag == Iex_Const
1592 && e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8) {
1593 UInt shift = e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1594 if (shift == 1 || shift == 2 || shift == 3) {
1595 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1596 HReg r2 = iselIntExpr_R(env, e->Iex.Binop.arg2->Iex.Binop.arg1 );
1597 return X86AMode_IRRS(0, r1, r2, shift);
1598 }
1599 }
1600
1601 /* Add32(expr,i) */
1602 if (e->tag == Iex_Binop
1603 && e->Iex.Binop.op == Iop_Add32
1604 && e->Iex.Binop.arg2->tag == Iex_Const
1605 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
1606 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1607 return X86AMode_IR(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32, r1);
1608 }
1609
1610 /* Doesn't match anything in particular. Generate it into
1611 a register and use that. */
1612 {
1613 HReg r1 = iselIntExpr_R(env, e);
1614 return X86AMode_IR(0, r1);
1615 }
1616 }
1617
1618
1619 /* --------------------- RMIs --------------------- */
1620
1621 /* Similarly, calculate an expression into an X86RMI operand. As with
1622 iselIntExpr_R, the expression can have type 32, 16 or 8 bits. */
1623
iselIntExpr_RMI(ISelEnv * env,const IRExpr * e)1624 static X86RMI* iselIntExpr_RMI ( ISelEnv* env, const IRExpr* e )
1625 {
1626 X86RMI* rmi = iselIntExpr_RMI_wrk(env, e);
1627 /* sanity checks ... */
1628 switch (rmi->tag) {
1629 case Xrmi_Imm:
1630 return rmi;
1631 case Xrmi_Reg:
1632 vassert(hregClass(rmi->Xrmi.Reg.reg) == HRcInt32);
1633 vassert(hregIsVirtual(rmi->Xrmi.Reg.reg));
1634 return rmi;
1635 case Xrmi_Mem:
1636 vassert(sane_AMode(rmi->Xrmi.Mem.am));
1637 return rmi;
1638 default:
1639 vpanic("iselIntExpr_RMI: unknown x86 RMI tag");
1640 }
1641 }
1642
1643 /* DO NOT CALL THIS DIRECTLY ! */
iselIntExpr_RMI_wrk(ISelEnv * env,const IRExpr * e)1644 static X86RMI* iselIntExpr_RMI_wrk ( ISelEnv* env, const IRExpr* e )
1645 {
1646 IRType ty = typeOfIRExpr(env->type_env,e);
1647 vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1648
1649 /* special case: immediate */
1650 if (e->tag == Iex_Const) {
1651 UInt u;
1652 switch (e->Iex.Const.con->tag) {
1653 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
1654 case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
1655 case Ico_U8: u = 0xFF & (e->Iex.Const.con->Ico.U8); break;
1656 default: vpanic("iselIntExpr_RMI.Iex_Const(x86h)");
1657 }
1658 return X86RMI_Imm(u);
1659 }
1660
1661 /* special case: 32-bit GET */
1662 if (e->tag == Iex_Get && ty == Ity_I32) {
1663 return X86RMI_Mem(X86AMode_IR(e->Iex.Get.offset,
1664 hregX86_EBP()));
1665 }
1666
1667 /* special case: 32-bit load from memory */
1668 if (e->tag == Iex_Load && ty == Ity_I32
1669 && e->Iex.Load.end == Iend_LE) {
1670 X86AMode* am = iselIntExpr_AMode(env, e->Iex.Load.addr);
1671 return X86RMI_Mem(am);
1672 }
1673
1674 /* default case: calculate into a register and return that */
1675 {
1676 HReg r = iselIntExpr_R ( env, e );
1677 return X86RMI_Reg(r);
1678 }
1679 }
1680
1681
1682 /* --------------------- RIs --------------------- */
1683
1684 /* Calculate an expression into an X86RI operand. As with
1685 iselIntExpr_R, the expression can have type 32, 16 or 8 bits. */
1686
iselIntExpr_RI(ISelEnv * env,const IRExpr * e)1687 static X86RI* iselIntExpr_RI ( ISelEnv* env, const IRExpr* e )
1688 {
1689 X86RI* ri = iselIntExpr_RI_wrk(env, e);
1690 /* sanity checks ... */
1691 switch (ri->tag) {
1692 case Xri_Imm:
1693 return ri;
1694 case Xri_Reg:
1695 vassert(hregClass(ri->Xri.Reg.reg) == HRcInt32);
1696 vassert(hregIsVirtual(ri->Xri.Reg.reg));
1697 return ri;
1698 default:
1699 vpanic("iselIntExpr_RI: unknown x86 RI tag");
1700 }
1701 }
1702
1703 /* DO NOT CALL THIS DIRECTLY ! */
iselIntExpr_RI_wrk(ISelEnv * env,const IRExpr * e)1704 static X86RI* iselIntExpr_RI_wrk ( ISelEnv* env, const IRExpr* e )
1705 {
1706 IRType ty = typeOfIRExpr(env->type_env,e);
1707 vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1708
1709 /* special case: immediate */
1710 if (e->tag == Iex_Const) {
1711 UInt u;
1712 switch (e->Iex.Const.con->tag) {
1713 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
1714 case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
1715 case Ico_U8: u = 0xFF & (e->Iex.Const.con->Ico.U8); break;
1716 default: vpanic("iselIntExpr_RMI.Iex_Const(x86h)");
1717 }
1718 return X86RI_Imm(u);
1719 }
1720
1721 /* default case: calculate into a register and return that */
1722 {
1723 HReg r = iselIntExpr_R ( env, e );
1724 return X86RI_Reg(r);
1725 }
1726 }
1727
1728
1729 /* --------------------- RMs --------------------- */
1730
1731 /* Similarly, calculate an expression into an X86RM operand. As with
1732 iselIntExpr_R, the expression can have type 32, 16 or 8 bits. */
1733
iselIntExpr_RM(ISelEnv * env,const IRExpr * e)1734 static X86RM* iselIntExpr_RM ( ISelEnv* env, const IRExpr* e )
1735 {
1736 X86RM* rm = iselIntExpr_RM_wrk(env, e);
1737 /* sanity checks ... */
1738 switch (rm->tag) {
1739 case Xrm_Reg:
1740 vassert(hregClass(rm->Xrm.Reg.reg) == HRcInt32);
1741 vassert(hregIsVirtual(rm->Xrm.Reg.reg));
1742 return rm;
1743 case Xrm_Mem:
1744 vassert(sane_AMode(rm->Xrm.Mem.am));
1745 return rm;
1746 default:
1747 vpanic("iselIntExpr_RM: unknown x86 RM tag");
1748 }
1749 }
1750
1751 /* DO NOT CALL THIS DIRECTLY ! */
iselIntExpr_RM_wrk(ISelEnv * env,const IRExpr * e)1752 static X86RM* iselIntExpr_RM_wrk ( ISelEnv* env, const IRExpr* e )
1753 {
1754 IRType ty = typeOfIRExpr(env->type_env,e);
1755 vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1756
1757 /* special case: 32-bit GET */
1758 if (e->tag == Iex_Get && ty == Ity_I32) {
1759 return X86RM_Mem(X86AMode_IR(e->Iex.Get.offset,
1760 hregX86_EBP()));
1761 }
1762
1763 /* special case: load from memory */
1764
1765 /* default case: calculate into a register and return that */
1766 {
1767 HReg r = iselIntExpr_R ( env, e );
1768 return X86RM_Reg(r);
1769 }
1770 }
1771
1772
1773 /* --------------------- CONDCODE --------------------- */
1774
1775 /* Generate code to evaluated a bit-typed expression, returning the
1776 condition code which would correspond when the expression would
1777 notionally have returned 1. */
1778
iselCondCode(ISelEnv * env,const IRExpr * e)1779 static X86CondCode iselCondCode ( ISelEnv* env, const IRExpr* e )
1780 {
1781 /* Uh, there's nothing we can sanity check here, unfortunately. */
1782 return iselCondCode_wrk(env,e);
1783 }
1784
1785 /* DO NOT CALL THIS DIRECTLY ! */
iselCondCode_wrk(ISelEnv * env,const IRExpr * e)1786 static X86CondCode iselCondCode_wrk ( ISelEnv* env, const IRExpr* e )
1787 {
1788 MatchInfo mi;
1789
1790 vassert(e);
1791 vassert(typeOfIRExpr(env->type_env,e) == Ity_I1);
1792
1793 /* var */
1794 if (e->tag == Iex_RdTmp) {
1795 HReg r32 = lookupIRTemp(env, e->Iex.RdTmp.tmp);
1796 /* Test32 doesn't modify r32; so this is OK. */
1797 addInstr(env, X86Instr_Test32(1,X86RM_Reg(r32)));
1798 return Xcc_NZ;
1799 }
1800
1801 /* Constant 1:Bit */
1802 if (e->tag == Iex_Const) {
1803 HReg r;
1804 vassert(e->Iex.Const.con->tag == Ico_U1);
1805 vassert(e->Iex.Const.con->Ico.U1 == True
1806 || e->Iex.Const.con->Ico.U1 == False);
1807 r = newVRegI(env);
1808 addInstr(env, X86Instr_Alu32R(Xalu_MOV,X86RMI_Imm(0),r));
1809 addInstr(env, X86Instr_Alu32R(Xalu_XOR,X86RMI_Reg(r),r));
1810 return e->Iex.Const.con->Ico.U1 ? Xcc_Z : Xcc_NZ;
1811 }
1812
1813 /* Not1(e) */
1814 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) {
1815 /* Generate code for the arg, and negate the test condition */
1816 return 1 ^ iselCondCode(env, e->Iex.Unop.arg);
1817 }
1818
1819 /* --- patterns rooted at: 32to1 --- */
1820
1821 if (e->tag == Iex_Unop
1822 && e->Iex.Unop.op == Iop_32to1) {
1823 X86RM* rm = iselIntExpr_RM(env, e->Iex.Unop.arg);
1824 addInstr(env, X86Instr_Test32(1,rm));
1825 return Xcc_NZ;
1826 }
1827
1828 /* --- patterns rooted at: CmpNEZ8 --- */
1829
1830 /* CmpNEZ8(x) */
1831 if (e->tag == Iex_Unop
1832 && e->Iex.Unop.op == Iop_CmpNEZ8) {
1833 X86RM* rm = iselIntExpr_RM(env, e->Iex.Unop.arg);
1834 addInstr(env, X86Instr_Test32(0xFF,rm));
1835 return Xcc_NZ;
1836 }
1837
1838 /* --- patterns rooted at: CmpNEZ16 --- */
1839
1840 /* CmpNEZ16(x) */
1841 if (e->tag == Iex_Unop
1842 && e->Iex.Unop.op == Iop_CmpNEZ16) {
1843 X86RM* rm = iselIntExpr_RM(env, e->Iex.Unop.arg);
1844 addInstr(env, X86Instr_Test32(0xFFFF,rm));
1845 return Xcc_NZ;
1846 }
1847
1848 /* --- patterns rooted at: CmpNEZ32 --- */
1849
1850 /* CmpNEZ32(And32(x,y)) */
1851 {
1852 DECLARE_PATTERN(p_CmpNEZ32_And32);
1853 DEFINE_PATTERN(p_CmpNEZ32_And32,
1854 unop(Iop_CmpNEZ32, binop(Iop_And32, bind(0), bind(1))));
1855 if (matchIRExpr(&mi, p_CmpNEZ32_And32, e)) {
1856 HReg r0 = iselIntExpr_R(env, mi.bindee[0]);
1857 X86RMI* rmi1 = iselIntExpr_RMI(env, mi.bindee[1]);
1858 HReg tmp = newVRegI(env);
1859 addInstr(env, mk_iMOVsd_RR(r0, tmp));
1860 addInstr(env, X86Instr_Alu32R(Xalu_AND,rmi1,tmp));
1861 return Xcc_NZ;
1862 }
1863 }
1864
1865 /* CmpNEZ32(Or32(x,y)) */
1866 {
1867 DECLARE_PATTERN(p_CmpNEZ32_Or32);
1868 DEFINE_PATTERN(p_CmpNEZ32_Or32,
1869 unop(Iop_CmpNEZ32, binop(Iop_Or32, bind(0), bind(1))));
1870 if (matchIRExpr(&mi, p_CmpNEZ32_Or32, e)) {
1871 HReg r0 = iselIntExpr_R(env, mi.bindee[0]);
1872 X86RMI* rmi1 = iselIntExpr_RMI(env, mi.bindee[1]);
1873 HReg tmp = newVRegI(env);
1874 addInstr(env, mk_iMOVsd_RR(r0, tmp));
1875 addInstr(env, X86Instr_Alu32R(Xalu_OR,rmi1,tmp));
1876 return Xcc_NZ;
1877 }
1878 }
1879
1880 /* CmpNEZ32(GET(..):I32) */
1881 if (e->tag == Iex_Unop
1882 && e->Iex.Unop.op == Iop_CmpNEZ32
1883 && e->Iex.Unop.arg->tag == Iex_Get) {
1884 X86AMode* am = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
1885 hregX86_EBP());
1886 addInstr(env, X86Instr_Alu32M(Xalu_CMP, X86RI_Imm(0), am));
1887 return Xcc_NZ;
1888 }
1889
1890 /* CmpNEZ32(x) */
1891 if (e->tag == Iex_Unop
1892 && e->Iex.Unop.op == Iop_CmpNEZ32) {
1893 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
1894 X86RMI* rmi2 = X86RMI_Imm(0);
1895 addInstr(env, X86Instr_Alu32R(Xalu_CMP,rmi2,r1));
1896 return Xcc_NZ;
1897 }
1898
1899 /* --- patterns rooted at: CmpNEZ64 --- */
1900
1901 /* CmpNEZ64(Or64(x,y)) */
1902 {
1903 DECLARE_PATTERN(p_CmpNEZ64_Or64);
1904 DEFINE_PATTERN(p_CmpNEZ64_Or64,
1905 unop(Iop_CmpNEZ64, binop(Iop_Or64, bind(0), bind(1))));
1906 if (matchIRExpr(&mi, p_CmpNEZ64_Or64, e)) {
1907 HReg hi1, lo1, hi2, lo2;
1908 HReg tmp = newVRegI(env);
1909 iselInt64Expr( &hi1, &lo1, env, mi.bindee[0] );
1910 addInstr(env, mk_iMOVsd_RR(hi1, tmp));
1911 addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(lo1),tmp));
1912 iselInt64Expr( &hi2, &lo2, env, mi.bindee[1] );
1913 addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(hi2),tmp));
1914 addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(lo2),tmp));
1915 return Xcc_NZ;
1916 }
1917 }
1918
1919 /* CmpNEZ64(x) */
1920 if (e->tag == Iex_Unop
1921 && e->Iex.Unop.op == Iop_CmpNEZ64) {
1922 HReg hi, lo;
1923 HReg tmp = newVRegI(env);
1924 iselInt64Expr( &hi, &lo, env, e->Iex.Unop.arg );
1925 addInstr(env, mk_iMOVsd_RR(hi, tmp));
1926 addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(lo), tmp));
1927 return Xcc_NZ;
1928 }
1929
1930 /* --- patterns rooted at: Cmp{EQ,NE}{8,16} --- */
1931
1932 /* CmpEQ8 / CmpNE8 */
1933 if (e->tag == Iex_Binop
1934 && (e->Iex.Binop.op == Iop_CmpEQ8
1935 || e->Iex.Binop.op == Iop_CmpNE8
1936 || e->Iex.Binop.op == Iop_CasCmpEQ8
1937 || e->Iex.Binop.op == Iop_CasCmpNE8)) {
1938 if (isZeroU8(e->Iex.Binop.arg2)) {
1939 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1940 addInstr(env, X86Instr_Test32(0xFF,X86RM_Reg(r1)));
1941 switch (e->Iex.Binop.op) {
1942 case Iop_CmpEQ8: case Iop_CasCmpEQ8: return Xcc_Z;
1943 case Iop_CmpNE8: case Iop_CasCmpNE8: return Xcc_NZ;
1944 default: vpanic("iselCondCode(x86): CmpXX8(expr,0:I8)");
1945 }
1946 } else {
1947 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1948 X86RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
1949 HReg r = newVRegI(env);
1950 addInstr(env, mk_iMOVsd_RR(r1,r));
1951 addInstr(env, X86Instr_Alu32R(Xalu_XOR,rmi2,r));
1952 addInstr(env, X86Instr_Test32(0xFF,X86RM_Reg(r)));
1953 switch (e->Iex.Binop.op) {
1954 case Iop_CmpEQ8: case Iop_CasCmpEQ8: return Xcc_Z;
1955 case Iop_CmpNE8: case Iop_CasCmpNE8: return Xcc_NZ;
1956 default: vpanic("iselCondCode(x86): CmpXX8(expr,expr)");
1957 }
1958 }
1959 }
1960
1961 /* CmpEQ16 / CmpNE16 */
1962 if (e->tag == Iex_Binop
1963 && (e->Iex.Binop.op == Iop_CmpEQ16
1964 || e->Iex.Binop.op == Iop_CmpNE16
1965 || e->Iex.Binop.op == Iop_CasCmpEQ16
1966 || e->Iex.Binop.op == Iop_CasCmpNE16
1967 || e->Iex.Binop.op == Iop_ExpCmpNE16)) {
1968 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1969 X86RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
1970 HReg r = newVRegI(env);
1971 addInstr(env, mk_iMOVsd_RR(r1,r));
1972 addInstr(env, X86Instr_Alu32R(Xalu_XOR,rmi2,r));
1973 addInstr(env, X86Instr_Test32(0xFFFF,X86RM_Reg(r)));
1974 switch (e->Iex.Binop.op) {
1975 case Iop_CmpEQ16: case Iop_CasCmpEQ16:
1976 return Xcc_Z;
1977 case Iop_CmpNE16: case Iop_CasCmpNE16: case Iop_ExpCmpNE16:
1978 return Xcc_NZ;
1979 default:
1980 vpanic("iselCondCode(x86): CmpXX16");
1981 }
1982 }
1983
1984 /* CmpNE32(ccall, 32-bit constant) (--smc-check=all optimisation).
1985 Saves a "movl %eax, %tmp" compared to the default route. */
1986 if (e->tag == Iex_Binop
1987 && e->Iex.Binop.op == Iop_CmpNE32
1988 && e->Iex.Binop.arg1->tag == Iex_CCall
1989 && e->Iex.Binop.arg2->tag == Iex_Const) {
1990 IRExpr* cal = e->Iex.Binop.arg1;
1991 IRExpr* con = e->Iex.Binop.arg2;
1992 /* clone & partial-eval of generic Iex_CCall and Iex_Const cases */
1993 vassert(cal->Iex.CCall.retty == Ity_I32); /* else ill-typed IR */
1994 vassert(con->Iex.Const.con->tag == Ico_U32);
1995 /* Marshal args, do the call. */
1996 UInt addToSp = 0;
1997 RetLoc rloc = mk_RetLoc_INVALID();
1998 doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
1999 cal->Iex.CCall.cee,
2000 cal->Iex.CCall.retty, cal->Iex.CCall.args );
2001 vassert(is_sane_RetLoc(rloc));
2002 vassert(rloc.pri == RLPri_Int);
2003 vassert(addToSp == 0);
2004 /* */
2005 addInstr(env, X86Instr_Alu32R(Xalu_CMP,
2006 X86RMI_Imm(con->Iex.Const.con->Ico.U32),
2007 hregX86_EAX()));
2008 return Xcc_NZ;
2009 }
2010
2011 /* Cmp*32*(x,y) */
2012 if (e->tag == Iex_Binop
2013 && (e->Iex.Binop.op == Iop_CmpEQ32
2014 || e->Iex.Binop.op == Iop_CmpNE32
2015 || e->Iex.Binop.op == Iop_CmpLT32S
2016 || e->Iex.Binop.op == Iop_CmpLT32U
2017 || e->Iex.Binop.op == Iop_CmpLE32S
2018 || e->Iex.Binop.op == Iop_CmpLE32U
2019 || e->Iex.Binop.op == Iop_CasCmpEQ32
2020 || e->Iex.Binop.op == Iop_CasCmpNE32
2021 || e->Iex.Binop.op == Iop_ExpCmpNE32)) {
2022 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
2023 X86RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
2024 addInstr(env, X86Instr_Alu32R(Xalu_CMP,rmi2,r1));
2025 switch (e->Iex.Binop.op) {
2026 case Iop_CmpEQ32: case Iop_CasCmpEQ32: return Xcc_Z;
2027 case Iop_CmpNE32:
2028 case Iop_CasCmpNE32: case Iop_ExpCmpNE32: return Xcc_NZ;
2029 case Iop_CmpLT32S: return Xcc_L;
2030 case Iop_CmpLT32U: return Xcc_B;
2031 case Iop_CmpLE32S: return Xcc_LE;
2032 case Iop_CmpLE32U: return Xcc_BE;
2033 default: vpanic("iselCondCode(x86): CmpXX32");
2034 }
2035 }
2036
2037 /* CmpNE64 */
2038 if (e->tag == Iex_Binop
2039 && (e->Iex.Binop.op == Iop_CmpNE64
2040 || e->Iex.Binop.op == Iop_CmpEQ64)) {
2041 HReg hi1, hi2, lo1, lo2;
2042 HReg tHi = newVRegI(env);
2043 HReg tLo = newVRegI(env);
2044 iselInt64Expr( &hi1, &lo1, env, e->Iex.Binop.arg1 );
2045 iselInt64Expr( &hi2, &lo2, env, e->Iex.Binop.arg2 );
2046 addInstr(env, mk_iMOVsd_RR(hi1, tHi));
2047 addInstr(env, X86Instr_Alu32R(Xalu_XOR,X86RMI_Reg(hi2), tHi));
2048 addInstr(env, mk_iMOVsd_RR(lo1, tLo));
2049 addInstr(env, X86Instr_Alu32R(Xalu_XOR,X86RMI_Reg(lo2), tLo));
2050 addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(tHi), tLo));
2051 switch (e->Iex.Binop.op) {
2052 case Iop_CmpNE64: return Xcc_NZ;
2053 case Iop_CmpEQ64: return Xcc_Z;
2054 default: vpanic("iselCondCode(x86): CmpXX64");
2055 }
2056 }
2057
2058 ppIRExpr(e);
2059 vpanic("iselCondCode");
2060 }
2061
2062
2063 /*---------------------------------------------------------*/
2064 /*--- ISEL: Integer expressions (64 bit) ---*/
2065 /*---------------------------------------------------------*/
2066
2067 /* Compute a 64-bit value into a register pair, which is returned as
2068 the first two parameters. As with iselIntExpr_R, these may be
2069 either real or virtual regs; in any case they must not be changed
2070 by subsequent code emitted by the caller. */
2071
iselInt64Expr(HReg * rHi,HReg * rLo,ISelEnv * env,const IRExpr * e)2072 static void iselInt64Expr ( HReg* rHi, HReg* rLo, ISelEnv* env,
2073 const IRExpr* e )
2074 {
2075 iselInt64Expr_wrk(rHi, rLo, env, e);
2076 # if 0
2077 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2078 # endif
2079 vassert(hregClass(*rHi) == HRcInt32);
2080 vassert(hregIsVirtual(*rHi));
2081 vassert(hregClass(*rLo) == HRcInt32);
2082 vassert(hregIsVirtual(*rLo));
2083 }
2084
2085 /* DO NOT CALL THIS DIRECTLY ! */
iselInt64Expr_wrk(HReg * rHi,HReg * rLo,ISelEnv * env,const IRExpr * e)2086 static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env,
2087 const IRExpr* e )
2088 {
2089 MatchInfo mi;
2090 HWord fn = 0; /* helper fn for most SIMD64 stuff */
2091 vassert(e);
2092 vassert(typeOfIRExpr(env->type_env,e) == Ity_I64);
2093
2094 /* 64-bit literal */
2095 if (e->tag == Iex_Const) {
2096 ULong w64 = e->Iex.Const.con->Ico.U64;
2097 UInt wHi = toUInt(w64 >> 32);
2098 UInt wLo = toUInt(w64);
2099 HReg tLo = newVRegI(env);
2100 HReg tHi = newVRegI(env);
2101 vassert(e->Iex.Const.con->tag == Ico_U64);
2102 if (wLo == wHi) {
2103 /* Save a precious Int register in this special case. */
2104 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wLo), tLo));
2105 *rHi = tLo;
2106 *rLo = tLo;
2107 } else {
2108 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wHi), tHi));
2109 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wLo), tLo));
2110 *rHi = tHi;
2111 *rLo = tLo;
2112 }
2113 return;
2114 }
2115
2116 /* read 64-bit IRTemp */
2117 if (e->tag == Iex_RdTmp) {
2118 lookupIRTemp64( rHi, rLo, env, e->Iex.RdTmp.tmp);
2119 return;
2120 }
2121
2122 /* 64-bit load */
2123 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
2124 HReg tLo, tHi;
2125 X86AMode *am0, *am4;
2126 vassert(e->Iex.Load.ty == Ity_I64);
2127 tLo = newVRegI(env);
2128 tHi = newVRegI(env);
2129 am0 = iselIntExpr_AMode(env, e->Iex.Load.addr);
2130 am4 = advance4(am0);
2131 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am0), tLo ));
2132 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi ));
2133 *rHi = tHi;
2134 *rLo = tLo;
2135 return;
2136 }
2137
2138 /* 64-bit GET */
2139 if (e->tag == Iex_Get) {
2140 X86AMode* am = X86AMode_IR(e->Iex.Get.offset, hregX86_EBP());
2141 X86AMode* am4 = advance4(am);
2142 HReg tLo = newVRegI(env);
2143 HReg tHi = newVRegI(env);
2144 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am), tLo ));
2145 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi ));
2146 *rHi = tHi;
2147 *rLo = tLo;
2148 return;
2149 }
2150
2151 /* 64-bit GETI */
2152 if (e->tag == Iex_GetI) {
2153 X86AMode* am
2154 = genGuestArrayOffset( env, e->Iex.GetI.descr,
2155 e->Iex.GetI.ix, e->Iex.GetI.bias );
2156 X86AMode* am4 = advance4(am);
2157 HReg tLo = newVRegI(env);
2158 HReg tHi = newVRegI(env);
2159 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am), tLo ));
2160 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi ));
2161 *rHi = tHi;
2162 *rLo = tLo;
2163 return;
2164 }
2165
2166 /* 64-bit ITE: ITE(g, expr, expr) */ // VFD
2167 if (e->tag == Iex_ITE) {
2168 HReg e0Lo, e0Hi, e1Lo, e1Hi;
2169 HReg tLo = newVRegI(env);
2170 HReg tHi = newVRegI(env);
2171 iselInt64Expr(&e0Hi, &e0Lo, env, e->Iex.ITE.iffalse);
2172 iselInt64Expr(&e1Hi, &e1Lo, env, e->Iex.ITE.iftrue);
2173 addInstr(env, mk_iMOVsd_RR(e1Hi, tHi));
2174 addInstr(env, mk_iMOVsd_RR(e1Lo, tLo));
2175 X86CondCode cc = iselCondCode(env, e->Iex.ITE.cond);
2176 /* This assumes the first cmov32 doesn't trash the condition
2177 codes, so they are still available for the second cmov32 */
2178 addInstr(env, X86Instr_CMov32(cc ^ 1, X86RM_Reg(e0Hi), tHi));
2179 addInstr(env, X86Instr_CMov32(cc ^ 1, X86RM_Reg(e0Lo), tLo));
2180 *rHi = tHi;
2181 *rLo = tLo;
2182 return;
2183 }
2184
2185 /* --------- BINARY ops --------- */
2186 if (e->tag == Iex_Binop) {
2187 switch (e->Iex.Binop.op) {
2188 /* 32 x 32 -> 64 multiply */
2189 case Iop_MullU32:
2190 case Iop_MullS32: {
2191 /* get one operand into %eax, and the other into a R/M.
2192 Need to make an educated guess about which is better in
2193 which. */
2194 HReg tLo = newVRegI(env);
2195 HReg tHi = newVRegI(env);
2196 Bool syned = toBool(e->Iex.Binop.op == Iop_MullS32);
2197 X86RM* rmLeft = iselIntExpr_RM(env, e->Iex.Binop.arg1);
2198 HReg rRight = iselIntExpr_R(env, e->Iex.Binop.arg2);
2199 addInstr(env, mk_iMOVsd_RR(rRight, hregX86_EAX()));
2200 addInstr(env, X86Instr_MulL(syned, rmLeft));
2201 /* Result is now in EDX:EAX. Tell the caller. */
2202 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2203 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2204 *rHi = tHi;
2205 *rLo = tLo;
2206 return;
2207 }
2208
2209 /* 64 x 32 -> (32(rem),32(div)) division */
2210 case Iop_DivModU64to32:
2211 case Iop_DivModS64to32: {
2212 /* Get the 64-bit operand into edx:eax, and the other into
2213 any old R/M. */
2214 HReg sHi, sLo;
2215 HReg tLo = newVRegI(env);
2216 HReg tHi = newVRegI(env);
2217 Bool syned = toBool(e->Iex.Binop.op == Iop_DivModS64to32);
2218 X86RM* rmRight = iselIntExpr_RM(env, e->Iex.Binop.arg2);
2219 iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1);
2220 addInstr(env, mk_iMOVsd_RR(sHi, hregX86_EDX()));
2221 addInstr(env, mk_iMOVsd_RR(sLo, hregX86_EAX()));
2222 addInstr(env, X86Instr_Div(syned, rmRight));
2223 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2224 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2225 *rHi = tHi;
2226 *rLo = tLo;
2227 return;
2228 }
2229
2230 /* Or64/And64/Xor64 */
2231 case Iop_Or64:
2232 case Iop_And64:
2233 case Iop_Xor64: {
2234 HReg xLo, xHi, yLo, yHi;
2235 HReg tLo = newVRegI(env);
2236 HReg tHi = newVRegI(env);
2237 X86AluOp op = e->Iex.Binop.op==Iop_Or64 ? Xalu_OR
2238 : e->Iex.Binop.op==Iop_And64 ? Xalu_AND
2239 : Xalu_XOR;
2240 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2241 iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
2242 addInstr(env, mk_iMOVsd_RR(xHi, tHi));
2243 addInstr(env, X86Instr_Alu32R(op, X86RMI_Reg(yHi), tHi));
2244 addInstr(env, mk_iMOVsd_RR(xLo, tLo));
2245 addInstr(env, X86Instr_Alu32R(op, X86RMI_Reg(yLo), tLo));
2246 *rHi = tHi;
2247 *rLo = tLo;
2248 return;
2249 }
2250
2251 /* Add64/Sub64 */
2252 case Iop_Add64:
2253 if (e->Iex.Binop.arg2->tag == Iex_Const) {
2254 /* special case Add64(e, const) */
2255 ULong w64 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
2256 UInt wHi = toUInt(w64 >> 32);
2257 UInt wLo = toUInt(w64);
2258 HReg tLo = newVRegI(env);
2259 HReg tHi = newVRegI(env);
2260 HReg xLo, xHi;
2261 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64);
2262 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2263 addInstr(env, mk_iMOVsd_RR(xHi, tHi));
2264 addInstr(env, mk_iMOVsd_RR(xLo, tLo));
2265 addInstr(env, X86Instr_Alu32R(Xalu_ADD, X86RMI_Imm(wLo), tLo));
2266 addInstr(env, X86Instr_Alu32R(Xalu_ADC, X86RMI_Imm(wHi), tHi));
2267 *rHi = tHi;
2268 *rLo = tLo;
2269 return;
2270 }
2271 /* else fall through to the generic case */
2272 case Iop_Sub64: {
2273 HReg xLo, xHi, yLo, yHi;
2274 HReg tLo = newVRegI(env);
2275 HReg tHi = newVRegI(env);
2276 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2277 addInstr(env, mk_iMOVsd_RR(xHi, tHi));
2278 addInstr(env, mk_iMOVsd_RR(xLo, tLo));
2279 iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
2280 if (e->Iex.Binop.op==Iop_Add64) {
2281 addInstr(env, X86Instr_Alu32R(Xalu_ADD, X86RMI_Reg(yLo), tLo));
2282 addInstr(env, X86Instr_Alu32R(Xalu_ADC, X86RMI_Reg(yHi), tHi));
2283 } else {
2284 addInstr(env, X86Instr_Alu32R(Xalu_SUB, X86RMI_Reg(yLo), tLo));
2285 addInstr(env, X86Instr_Alu32R(Xalu_SBB, X86RMI_Reg(yHi), tHi));
2286 }
2287 *rHi = tHi;
2288 *rLo = tLo;
2289 return;
2290 }
2291
2292 /* 32HLto64(e1,e2) */
2293 case Iop_32HLto64:
2294 *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2295 *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2296 return;
2297
2298 /* 64-bit shifts */
2299 case Iop_Shl64: {
2300 /* We use the same ingenious scheme as gcc. Put the value
2301 to be shifted into %hi:%lo, and the shift amount into
2302 %cl. Then (dsts on right, a la ATT syntax):
2303
2304 shldl %cl, %lo, %hi -- make %hi be right for the
2305 -- shift amt %cl % 32
2306 shll %cl, %lo -- make %lo be right for the
2307 -- shift amt %cl % 32
2308
2309 Now, if (shift amount % 64) is in the range 32 .. 63,
2310 we have to do a fixup, which puts the result low half
2311 into the result high half, and zeroes the low half:
2312
2313 testl $32, %ecx
2314
2315 cmovnz %lo, %hi
2316 movl $0, %tmp -- sigh; need yet another reg
2317 cmovnz %tmp, %lo
2318 */
2319 HReg rAmt, sHi, sLo, tHi, tLo, tTemp;
2320 tLo = newVRegI(env);
2321 tHi = newVRegI(env);
2322 tTemp = newVRegI(env);
2323 rAmt = iselIntExpr_R(env, e->Iex.Binop.arg2);
2324 iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1);
2325 addInstr(env, mk_iMOVsd_RR(rAmt, hregX86_ECX()));
2326 addInstr(env, mk_iMOVsd_RR(sHi, tHi));
2327 addInstr(env, mk_iMOVsd_RR(sLo, tLo));
2328 /* Ok. Now shift amt is in %ecx, and value is in tHi/tLo
2329 and those regs are legitimately modifiable. */
2330 addInstr(env, X86Instr_Sh3232(Xsh_SHL, 0/*%cl*/, tLo, tHi));
2331 addInstr(env, X86Instr_Sh32(Xsh_SHL, 0/*%cl*/, tLo));
2332 addInstr(env, X86Instr_Test32(32, X86RM_Reg(hregX86_ECX())));
2333 addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tLo), tHi));
2334 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tTemp));
2335 addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tTemp), tLo));
2336 *rHi = tHi;
2337 *rLo = tLo;
2338 return;
2339 }
2340
2341 case Iop_Shr64: {
2342 /* We use the same ingenious scheme as gcc. Put the value
2343 to be shifted into %hi:%lo, and the shift amount into
2344 %cl. Then:
2345
2346 shrdl %cl, %hi, %lo -- make %lo be right for the
2347 -- shift amt %cl % 32
2348 shrl %cl, %hi -- make %hi be right for the
2349 -- shift amt %cl % 32
2350
2351 Now, if (shift amount % 64) is in the range 32 .. 63,
2352 we have to do a fixup, which puts the result high half
2353 into the result low half, and zeroes the high half:
2354
2355 testl $32, %ecx
2356
2357 cmovnz %hi, %lo
2358 movl $0, %tmp -- sigh; need yet another reg
2359 cmovnz %tmp, %hi
2360 */
2361 HReg rAmt, sHi, sLo, tHi, tLo, tTemp;
2362 tLo = newVRegI(env);
2363 tHi = newVRegI(env);
2364 tTemp = newVRegI(env);
2365 rAmt = iselIntExpr_R(env, e->Iex.Binop.arg2);
2366 iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1);
2367 addInstr(env, mk_iMOVsd_RR(rAmt, hregX86_ECX()));
2368 addInstr(env, mk_iMOVsd_RR(sHi, tHi));
2369 addInstr(env, mk_iMOVsd_RR(sLo, tLo));
2370 /* Ok. Now shift amt is in %ecx, and value is in tHi/tLo
2371 and those regs are legitimately modifiable. */
2372 addInstr(env, X86Instr_Sh3232(Xsh_SHR, 0/*%cl*/, tHi, tLo));
2373 addInstr(env, X86Instr_Sh32(Xsh_SHR, 0/*%cl*/, tHi));
2374 addInstr(env, X86Instr_Test32(32, X86RM_Reg(hregX86_ECX())));
2375 addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tHi), tLo));
2376 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tTemp));
2377 addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tTemp), tHi));
2378 *rHi = tHi;
2379 *rLo = tLo;
2380 return;
2381 }
2382
2383 case Iop_Sar64: {
2384 /* gcc -O2 does the following. I don't know how it works, but it
2385 does work. Don't mess with it. This is hard to test because the
2386 x86 front end doesn't create Iop_Sar64 for any x86 instruction,
2387 so it's impossible to write a test program that feeds values
2388 through Iop_Sar64 and prints their results. The implementation
2389 here was tested by using psrlq on mmx registers -- that generates
2390 Iop_Shr64 -- and temporarily hacking the front end to generate
2391 Iop_Sar64 for that instruction instead.
2392
2393 movl %amount, %ecx
2394 movl %srcHi, %r1
2395 movl %srcLo, %r2
2396
2397 movl %r1, %r3
2398 sarl %cl, %r3
2399 movl %r2, %r4
2400 shrdl %cl, %r1, %r4
2401 movl %r3, %r2
2402 sarl $31, %r2
2403 andl $32, %ecx
2404 cmovne %r3, %r4 // = resLo
2405 cmovne %r2, %r3 // = resHi
2406 */
2407 HReg amount = iselIntExpr_R(env, e->Iex.Binop.arg2);
2408 HReg srcHi = INVALID_HREG, srcLo = INVALID_HREG;
2409 iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Binop.arg1);
2410 HReg r1 = newVRegI(env);
2411 HReg r2 = newVRegI(env);
2412 HReg r3 = newVRegI(env);
2413 HReg r4 = newVRegI(env);
2414 addInstr(env, mk_iMOVsd_RR(amount, hregX86_ECX()));
2415 addInstr(env, mk_iMOVsd_RR(srcHi, r1));
2416 addInstr(env, mk_iMOVsd_RR(srcLo, r2));
2417
2418 addInstr(env, mk_iMOVsd_RR(r1, r3));
2419 addInstr(env, X86Instr_Sh32(Xsh_SAR, 0/*%cl*/, r3));
2420 addInstr(env, mk_iMOVsd_RR(r2, r4));
2421 addInstr(env, X86Instr_Sh3232(Xsh_SHR, 0/*%cl*/, r1, r4));
2422 addInstr(env, mk_iMOVsd_RR(r3, r2));
2423 addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, r2));
2424 addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(32),
2425 hregX86_ECX()));
2426 addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(r3), r4));
2427 addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(r2), r3));
2428 *rHi = r3;
2429 *rLo = r4;
2430 return;
2431 }
2432
2433 /* F64 -> I64 */
2434 /* Sigh, this is an almost exact copy of the F64 -> I32/I16
2435 case. Unfortunately I see no easy way to avoid the
2436 duplication. */
2437 case Iop_F64toI64S: {
2438 HReg rf = iselDblExpr(env, e->Iex.Binop.arg2);
2439 HReg tLo = newVRegI(env);
2440 HReg tHi = newVRegI(env);
2441
2442 /* Used several times ... */
2443 /* Careful ... this sharing is only safe because
2444 zero_esp/four_esp do not hold any registers which the
2445 register allocator could attempt to swizzle later. */
2446 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
2447 X86AMode* four_esp = X86AMode_IR(4, hregX86_ESP());
2448
2449 /* rf now holds the value to be converted, and rrm holds
2450 the rounding mode value, encoded as per the
2451 IRRoundingMode enum. The first thing to do is set the
2452 FPU's rounding mode accordingly. */
2453
2454 /* Create a space for the format conversion. */
2455 /* subl $8, %esp */
2456 sub_from_esp(env, 8);
2457
2458 /* Set host rounding mode */
2459 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
2460
2461 /* gistll %rf, 0(%esp) */
2462 addInstr(env, X86Instr_FpLdStI(False/*store*/, 8, rf, zero_esp));
2463
2464 /* movl 0(%esp), %dstLo */
2465 /* movl 4(%esp), %dstHi */
2466 addInstr(env, X86Instr_Alu32R(
2467 Xalu_MOV, X86RMI_Mem(zero_esp), tLo));
2468 addInstr(env, X86Instr_Alu32R(
2469 Xalu_MOV, X86RMI_Mem(four_esp), tHi));
2470
2471 /* Restore default FPU rounding. */
2472 set_FPU_rounding_default( env );
2473
2474 /* addl $8, %esp */
2475 add_to_esp(env, 8);
2476
2477 *rHi = tHi;
2478 *rLo = tLo;
2479 return;
2480 }
2481
2482 case Iop_Add8x8:
2483 fn = (HWord)h_generic_calc_Add8x8; goto binnish;
2484 case Iop_Add16x4:
2485 fn = (HWord)h_generic_calc_Add16x4; goto binnish;
2486 case Iop_Add32x2:
2487 fn = (HWord)h_generic_calc_Add32x2; goto binnish;
2488
2489 case Iop_Avg8Ux8:
2490 fn = (HWord)h_generic_calc_Avg8Ux8; goto binnish;
2491 case Iop_Avg16Ux4:
2492 fn = (HWord)h_generic_calc_Avg16Ux4; goto binnish;
2493
2494 case Iop_CmpEQ8x8:
2495 fn = (HWord)h_generic_calc_CmpEQ8x8; goto binnish;
2496 case Iop_CmpEQ16x4:
2497 fn = (HWord)h_generic_calc_CmpEQ16x4; goto binnish;
2498 case Iop_CmpEQ32x2:
2499 fn = (HWord)h_generic_calc_CmpEQ32x2; goto binnish;
2500
2501 case Iop_CmpGT8Sx8:
2502 fn = (HWord)h_generic_calc_CmpGT8Sx8; goto binnish;
2503 case Iop_CmpGT16Sx4:
2504 fn = (HWord)h_generic_calc_CmpGT16Sx4; goto binnish;
2505 case Iop_CmpGT32Sx2:
2506 fn = (HWord)h_generic_calc_CmpGT32Sx2; goto binnish;
2507
2508 case Iop_InterleaveHI8x8:
2509 fn = (HWord)h_generic_calc_InterleaveHI8x8; goto binnish;
2510 case Iop_InterleaveLO8x8:
2511 fn = (HWord)h_generic_calc_InterleaveLO8x8; goto binnish;
2512 case Iop_InterleaveHI16x4:
2513 fn = (HWord)h_generic_calc_InterleaveHI16x4; goto binnish;
2514 case Iop_InterleaveLO16x4:
2515 fn = (HWord)h_generic_calc_InterleaveLO16x4; goto binnish;
2516 case Iop_InterleaveHI32x2:
2517 fn = (HWord)h_generic_calc_InterleaveHI32x2; goto binnish;
2518 case Iop_InterleaveLO32x2:
2519 fn = (HWord)h_generic_calc_InterleaveLO32x2; goto binnish;
2520 case Iop_CatOddLanes16x4:
2521 fn = (HWord)h_generic_calc_CatOddLanes16x4; goto binnish;
2522 case Iop_CatEvenLanes16x4:
2523 fn = (HWord)h_generic_calc_CatEvenLanes16x4; goto binnish;
2524 case Iop_Perm8x8:
2525 fn = (HWord)h_generic_calc_Perm8x8; goto binnish;
2526
2527 case Iop_Max8Ux8:
2528 fn = (HWord)h_generic_calc_Max8Ux8; goto binnish;
2529 case Iop_Max16Sx4:
2530 fn = (HWord)h_generic_calc_Max16Sx4; goto binnish;
2531 case Iop_Min8Ux8:
2532 fn = (HWord)h_generic_calc_Min8Ux8; goto binnish;
2533 case Iop_Min16Sx4:
2534 fn = (HWord)h_generic_calc_Min16Sx4; goto binnish;
2535
2536 case Iop_Mul16x4:
2537 fn = (HWord)h_generic_calc_Mul16x4; goto binnish;
2538 case Iop_Mul32x2:
2539 fn = (HWord)h_generic_calc_Mul32x2; goto binnish;
2540 case Iop_MulHi16Sx4:
2541 fn = (HWord)h_generic_calc_MulHi16Sx4; goto binnish;
2542 case Iop_MulHi16Ux4:
2543 fn = (HWord)h_generic_calc_MulHi16Ux4; goto binnish;
2544
2545 case Iop_QAdd8Sx8:
2546 fn = (HWord)h_generic_calc_QAdd8Sx8; goto binnish;
2547 case Iop_QAdd16Sx4:
2548 fn = (HWord)h_generic_calc_QAdd16Sx4; goto binnish;
2549 case Iop_QAdd8Ux8:
2550 fn = (HWord)h_generic_calc_QAdd8Ux8; goto binnish;
2551 case Iop_QAdd16Ux4:
2552 fn = (HWord)h_generic_calc_QAdd16Ux4; goto binnish;
2553
2554 case Iop_QNarrowBin32Sto16Sx4:
2555 fn = (HWord)h_generic_calc_QNarrowBin32Sto16Sx4; goto binnish;
2556 case Iop_QNarrowBin16Sto8Sx8:
2557 fn = (HWord)h_generic_calc_QNarrowBin16Sto8Sx8; goto binnish;
2558 case Iop_QNarrowBin16Sto8Ux8:
2559 fn = (HWord)h_generic_calc_QNarrowBin16Sto8Ux8; goto binnish;
2560 case Iop_NarrowBin16to8x8:
2561 fn = (HWord)h_generic_calc_NarrowBin16to8x8; goto binnish;
2562 case Iop_NarrowBin32to16x4:
2563 fn = (HWord)h_generic_calc_NarrowBin32to16x4; goto binnish;
2564
2565 case Iop_QSub8Sx8:
2566 fn = (HWord)h_generic_calc_QSub8Sx8; goto binnish;
2567 case Iop_QSub16Sx4:
2568 fn = (HWord)h_generic_calc_QSub16Sx4; goto binnish;
2569 case Iop_QSub8Ux8:
2570 fn = (HWord)h_generic_calc_QSub8Ux8; goto binnish;
2571 case Iop_QSub16Ux4:
2572 fn = (HWord)h_generic_calc_QSub16Ux4; goto binnish;
2573
2574 case Iop_Sub8x8:
2575 fn = (HWord)h_generic_calc_Sub8x8; goto binnish;
2576 case Iop_Sub16x4:
2577 fn = (HWord)h_generic_calc_Sub16x4; goto binnish;
2578 case Iop_Sub32x2:
2579 fn = (HWord)h_generic_calc_Sub32x2; goto binnish;
2580
2581 binnish: {
2582 /* Note: the following assumes all helpers are of
2583 signature
2584 ULong fn ( ULong, ULong ), and they are
2585 not marked as regparm functions.
2586 */
2587 HReg xLo, xHi, yLo, yHi;
2588 HReg tLo = newVRegI(env);
2589 HReg tHi = newVRegI(env);
2590 iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
2591 addInstr(env, X86Instr_Push(X86RMI_Reg(yHi)));
2592 addInstr(env, X86Instr_Push(X86RMI_Reg(yLo)));
2593 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2594 addInstr(env, X86Instr_Push(X86RMI_Reg(xHi)));
2595 addInstr(env, X86Instr_Push(X86RMI_Reg(xLo)));
2596 addInstr(env, X86Instr_Call( Xcc_ALWAYS, (Addr32)fn,
2597 0, mk_RetLoc_simple(RLPri_2Int) ));
2598 add_to_esp(env, 4*4);
2599 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2600 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2601 *rHi = tHi;
2602 *rLo = tLo;
2603 return;
2604 }
2605
2606 case Iop_ShlN32x2:
2607 fn = (HWord)h_generic_calc_ShlN32x2; goto shifty;
2608 case Iop_ShlN16x4:
2609 fn = (HWord)h_generic_calc_ShlN16x4; goto shifty;
2610 case Iop_ShlN8x8:
2611 fn = (HWord)h_generic_calc_ShlN8x8; goto shifty;
2612 case Iop_ShrN32x2:
2613 fn = (HWord)h_generic_calc_ShrN32x2; goto shifty;
2614 case Iop_ShrN16x4:
2615 fn = (HWord)h_generic_calc_ShrN16x4; goto shifty;
2616 case Iop_SarN32x2:
2617 fn = (HWord)h_generic_calc_SarN32x2; goto shifty;
2618 case Iop_SarN16x4:
2619 fn = (HWord)h_generic_calc_SarN16x4; goto shifty;
2620 case Iop_SarN8x8:
2621 fn = (HWord)h_generic_calc_SarN8x8; goto shifty;
2622 shifty: {
2623 /* Note: the following assumes all helpers are of
2624 signature
2625 ULong fn ( ULong, UInt ), and they are
2626 not marked as regparm functions.
2627 */
2628 HReg xLo, xHi;
2629 HReg tLo = newVRegI(env);
2630 HReg tHi = newVRegI(env);
2631 X86RMI* y = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
2632 addInstr(env, X86Instr_Push(y));
2633 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2634 addInstr(env, X86Instr_Push(X86RMI_Reg(xHi)));
2635 addInstr(env, X86Instr_Push(X86RMI_Reg(xLo)));
2636 addInstr(env, X86Instr_Call( Xcc_ALWAYS, (Addr32)fn,
2637 0, mk_RetLoc_simple(RLPri_2Int) ));
2638 add_to_esp(env, 3*4);
2639 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2640 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2641 *rHi = tHi;
2642 *rLo = tLo;
2643 return;
2644 }
2645
2646 default:
2647 break;
2648 }
2649 } /* if (e->tag == Iex_Binop) */
2650
2651
2652 /* --------- UNARY ops --------- */
2653 if (e->tag == Iex_Unop) {
2654 switch (e->Iex.Unop.op) {
2655
2656 /* 32Sto64(e) */
2657 case Iop_32Sto64: {
2658 HReg tLo = newVRegI(env);
2659 HReg tHi = newVRegI(env);
2660 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2661 addInstr(env, mk_iMOVsd_RR(src,tHi));
2662 addInstr(env, mk_iMOVsd_RR(src,tLo));
2663 addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tHi));
2664 *rHi = tHi;
2665 *rLo = tLo;
2666 return;
2667 }
2668
2669 /* 32Uto64(e) */
2670 case Iop_32Uto64: {
2671 HReg tLo = newVRegI(env);
2672 HReg tHi = newVRegI(env);
2673 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2674 addInstr(env, mk_iMOVsd_RR(src,tLo));
2675 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tHi));
2676 *rHi = tHi;
2677 *rLo = tLo;
2678 return;
2679 }
2680
2681 /* 16Uto64(e) */
2682 case Iop_16Uto64: {
2683 HReg tLo = newVRegI(env);
2684 HReg tHi = newVRegI(env);
2685 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2686 addInstr(env, mk_iMOVsd_RR(src,tLo));
2687 addInstr(env, X86Instr_Alu32R(Xalu_AND,
2688 X86RMI_Imm(0xFFFF), tLo));
2689 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tHi));
2690 *rHi = tHi;
2691 *rLo = tLo;
2692 return;
2693 }
2694
2695 /* V128{HI}to64 */
2696 case Iop_V128HIto64:
2697 case Iop_V128to64: {
2698 Int off = e->Iex.Unop.op==Iop_V128HIto64 ? 8 : 0;
2699 HReg tLo = newVRegI(env);
2700 HReg tHi = newVRegI(env);
2701 HReg vec = iselVecExpr(env, e->Iex.Unop.arg);
2702 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
2703 X86AMode* espLO = X86AMode_IR(off, hregX86_ESP());
2704 X86AMode* espHI = X86AMode_IR(off+4, hregX86_ESP());
2705 sub_from_esp(env, 16);
2706 addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, esp0));
2707 addInstr(env, X86Instr_Alu32R( Xalu_MOV,
2708 X86RMI_Mem(espLO), tLo ));
2709 addInstr(env, X86Instr_Alu32R( Xalu_MOV,
2710 X86RMI_Mem(espHI), tHi ));
2711 add_to_esp(env, 16);
2712 *rHi = tHi;
2713 *rLo = tLo;
2714 return;
2715 }
2716
2717 /* could do better than this, but for now ... */
2718 case Iop_1Sto64: {
2719 HReg tLo = newVRegI(env);
2720 HReg tHi = newVRegI(env);
2721 X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
2722 addInstr(env, X86Instr_Set32(cond,tLo));
2723 addInstr(env, X86Instr_Sh32(Xsh_SHL, 31, tLo));
2724 addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tLo));
2725 addInstr(env, mk_iMOVsd_RR(tLo, tHi));
2726 *rHi = tHi;
2727 *rLo = tLo;
2728 return;
2729 }
2730
2731 /* Not64(e) */
2732 case Iop_Not64: {
2733 HReg tLo = newVRegI(env);
2734 HReg tHi = newVRegI(env);
2735 HReg sHi, sLo;
2736 iselInt64Expr(&sHi, &sLo, env, e->Iex.Unop.arg);
2737 addInstr(env, mk_iMOVsd_RR(sHi, tHi));
2738 addInstr(env, mk_iMOVsd_RR(sLo, tLo));
2739 addInstr(env, X86Instr_Unary32(Xun_NOT,tHi));
2740 addInstr(env, X86Instr_Unary32(Xun_NOT,tLo));
2741 *rHi = tHi;
2742 *rLo = tLo;
2743 return;
2744 }
2745
2746 /* Left64(e) */
2747 case Iop_Left64: {
2748 HReg yLo, yHi;
2749 HReg tLo = newVRegI(env);
2750 HReg tHi = newVRegI(env);
2751 /* yHi:yLo = arg */
2752 iselInt64Expr(&yHi, &yLo, env, e->Iex.Unop.arg);
2753 /* tLo = 0 - yLo, and set carry */
2754 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tLo));
2755 addInstr(env, X86Instr_Alu32R(Xalu_SUB, X86RMI_Reg(yLo), tLo));
2756 /* tHi = 0 - yHi - carry */
2757 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tHi));
2758 addInstr(env, X86Instr_Alu32R(Xalu_SBB, X86RMI_Reg(yHi), tHi));
2759 /* So now we have tHi:tLo = -arg. To finish off, or 'arg'
2760 back in, so as to give the final result
2761 tHi:tLo = arg | -arg. */
2762 addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(yLo), tLo));
2763 addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(yHi), tHi));
2764 *rHi = tHi;
2765 *rLo = tLo;
2766 return;
2767 }
2768
2769 /* --- patterns rooted at: CmpwNEZ64 --- */
2770
2771 /* CmpwNEZ64(e) */
2772 case Iop_CmpwNEZ64: {
2773
2774 DECLARE_PATTERN(p_CmpwNEZ64_Or64);
2775 DEFINE_PATTERN(p_CmpwNEZ64_Or64,
2776 unop(Iop_CmpwNEZ64,binop(Iop_Or64,bind(0),bind(1))));
2777 if (matchIRExpr(&mi, p_CmpwNEZ64_Or64, e)) {
2778 /* CmpwNEZ64(Or64(x,y)) */
2779 HReg xHi,xLo,yHi,yLo;
2780 HReg xBoth = newVRegI(env);
2781 HReg merged = newVRegI(env);
2782 HReg tmp2 = newVRegI(env);
2783
2784 iselInt64Expr(&xHi,&xLo, env, mi.bindee[0]);
2785 addInstr(env, mk_iMOVsd_RR(xHi,xBoth));
2786 addInstr(env, X86Instr_Alu32R(Xalu_OR,
2787 X86RMI_Reg(xLo),xBoth));
2788
2789 iselInt64Expr(&yHi,&yLo, env, mi.bindee[1]);
2790 addInstr(env, mk_iMOVsd_RR(yHi,merged));
2791 addInstr(env, X86Instr_Alu32R(Xalu_OR,
2792 X86RMI_Reg(yLo),merged));
2793 addInstr(env, X86Instr_Alu32R(Xalu_OR,
2794 X86RMI_Reg(xBoth),merged));
2795
2796 /* tmp2 = (merged | -merged) >>s 31 */
2797 addInstr(env, mk_iMOVsd_RR(merged,tmp2));
2798 addInstr(env, X86Instr_Unary32(Xun_NEG,tmp2));
2799 addInstr(env, X86Instr_Alu32R(Xalu_OR,
2800 X86RMI_Reg(merged), tmp2));
2801 addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tmp2));
2802 *rHi = tmp2;
2803 *rLo = tmp2;
2804 return;
2805 } else {
2806 /* CmpwNEZ64(e) */
2807 HReg srcLo, srcHi;
2808 HReg tmp1 = newVRegI(env);
2809 HReg tmp2 = newVRegI(env);
2810 /* srcHi:srcLo = arg */
2811 iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg);
2812 /* tmp1 = srcHi | srcLo */
2813 addInstr(env, mk_iMOVsd_RR(srcHi,tmp1));
2814 addInstr(env, X86Instr_Alu32R(Xalu_OR,
2815 X86RMI_Reg(srcLo), tmp1));
2816 /* tmp2 = (tmp1 | -tmp1) >>s 31 */
2817 addInstr(env, mk_iMOVsd_RR(tmp1,tmp2));
2818 addInstr(env, X86Instr_Unary32(Xun_NEG,tmp2));
2819 addInstr(env, X86Instr_Alu32R(Xalu_OR,
2820 X86RMI_Reg(tmp1), tmp2));
2821 addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tmp2));
2822 *rHi = tmp2;
2823 *rLo = tmp2;
2824 return;
2825 }
2826 }
2827
2828 /* ReinterpF64asI64(e) */
2829 /* Given an IEEE754 double, produce an I64 with the same bit
2830 pattern. */
2831 case Iop_ReinterpF64asI64: {
2832 HReg rf = iselDblExpr(env, e->Iex.Unop.arg);
2833 HReg tLo = newVRegI(env);
2834 HReg tHi = newVRegI(env);
2835 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
2836 X86AMode* four_esp = X86AMode_IR(4, hregX86_ESP());
2837 /* paranoia */
2838 set_FPU_rounding_default(env);
2839 /* subl $8, %esp */
2840 sub_from_esp(env, 8);
2841 /* gstD %rf, 0(%esp) */
2842 addInstr(env,
2843 X86Instr_FpLdSt(False/*store*/, 8, rf, zero_esp));
2844 /* movl 0(%esp), %tLo */
2845 addInstr(env,
2846 X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(zero_esp), tLo));
2847 /* movl 4(%esp), %tHi */
2848 addInstr(env,
2849 X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(four_esp), tHi));
2850 /* addl $8, %esp */
2851 add_to_esp(env, 8);
2852 *rHi = tHi;
2853 *rLo = tLo;
2854 return;
2855 }
2856
2857 case Iop_CmpNEZ32x2:
2858 fn = (HWord)h_generic_calc_CmpNEZ32x2; goto unish;
2859 case Iop_CmpNEZ16x4:
2860 fn = (HWord)h_generic_calc_CmpNEZ16x4; goto unish;
2861 case Iop_CmpNEZ8x8:
2862 fn = (HWord)h_generic_calc_CmpNEZ8x8; goto unish;
2863 unish: {
2864 /* Note: the following assumes all helpers are of
2865 signature
2866 ULong fn ( ULong ), and they are
2867 not marked as regparm functions.
2868 */
2869 HReg xLo, xHi;
2870 HReg tLo = newVRegI(env);
2871 HReg tHi = newVRegI(env);
2872 iselInt64Expr(&xHi, &xLo, env, e->Iex.Unop.arg);
2873 addInstr(env, X86Instr_Push(X86RMI_Reg(xHi)));
2874 addInstr(env, X86Instr_Push(X86RMI_Reg(xLo)));
2875 addInstr(env, X86Instr_Call( Xcc_ALWAYS, (Addr32)fn,
2876 0, mk_RetLoc_simple(RLPri_2Int) ));
2877 add_to_esp(env, 2*4);
2878 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2879 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2880 *rHi = tHi;
2881 *rLo = tLo;
2882 return;
2883 }
2884
2885 default:
2886 break;
2887 }
2888 } /* if (e->tag == Iex_Unop) */
2889
2890
2891 /* --------- CCALL --------- */
2892 if (e->tag == Iex_CCall) {
2893 HReg tLo = newVRegI(env);
2894 HReg tHi = newVRegI(env);
2895
2896 /* Marshal args, do the call, clear stack. */
2897 UInt addToSp = 0;
2898 RetLoc rloc = mk_RetLoc_INVALID();
2899 doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
2900 e->Iex.CCall.cee,
2901 e->Iex.CCall.retty, e->Iex.CCall.args );
2902 vassert(is_sane_RetLoc(rloc));
2903 vassert(rloc.pri == RLPri_2Int);
2904 vassert(addToSp == 0);
2905 /* */
2906
2907 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2908 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2909 *rHi = tHi;
2910 *rLo = tLo;
2911 return;
2912 }
2913
2914 ppIRExpr(e);
2915 vpanic("iselInt64Expr");
2916 }
2917
2918
2919 /*---------------------------------------------------------*/
2920 /*--- ISEL: Floating point expressions (32 bit) ---*/
2921 /*---------------------------------------------------------*/
2922
2923 /* Nothing interesting here; really just wrappers for
2924 64-bit stuff. */
2925
iselFltExpr(ISelEnv * env,const IRExpr * e)2926 static HReg iselFltExpr ( ISelEnv* env, const IRExpr* e )
2927 {
2928 HReg r = iselFltExpr_wrk( env, e );
2929 # if 0
2930 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2931 # endif
2932 vassert(hregClass(r) == HRcFlt64); /* yes, really Flt64 */
2933 vassert(hregIsVirtual(r));
2934 return r;
2935 }
2936
2937 /* DO NOT CALL THIS DIRECTLY */
iselFltExpr_wrk(ISelEnv * env,const IRExpr * e)2938 static HReg iselFltExpr_wrk ( ISelEnv* env, const IRExpr* e )
2939 {
2940 IRType ty = typeOfIRExpr(env->type_env,e);
2941 vassert(ty == Ity_F32);
2942
2943 if (e->tag == Iex_RdTmp) {
2944 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
2945 }
2946
2947 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
2948 X86AMode* am;
2949 HReg res = newVRegF(env);
2950 vassert(e->Iex.Load.ty == Ity_F32);
2951 am = iselIntExpr_AMode(env, e->Iex.Load.addr);
2952 addInstr(env, X86Instr_FpLdSt(True/*load*/, 4, res, am));
2953 return res;
2954 }
2955
2956 if (e->tag == Iex_Binop
2957 && e->Iex.Binop.op == Iop_F64toF32) {
2958 /* Although the result is still held in a standard FPU register,
2959 we need to round it to reflect the loss of accuracy/range
2960 entailed in casting it to a 32-bit float. */
2961 HReg dst = newVRegF(env);
2962 HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
2963 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
2964 addInstr(env, X86Instr_Fp64to32(src,dst));
2965 set_FPU_rounding_default( env );
2966 return dst;
2967 }
2968
2969 if (e->tag == Iex_Get) {
2970 X86AMode* am = X86AMode_IR( e->Iex.Get.offset,
2971 hregX86_EBP() );
2972 HReg res = newVRegF(env);
2973 addInstr(env, X86Instr_FpLdSt( True/*load*/, 4, res, am ));
2974 return res;
2975 }
2976
2977 if (e->tag == Iex_Unop
2978 && e->Iex.Unop.op == Iop_ReinterpI32asF32) {
2979 /* Given an I32, produce an IEEE754 float with the same bit
2980 pattern. */
2981 HReg dst = newVRegF(env);
2982 X86RMI* rmi = iselIntExpr_RMI(env, e->Iex.Unop.arg);
2983 /* paranoia */
2984 addInstr(env, X86Instr_Push(rmi));
2985 addInstr(env, X86Instr_FpLdSt(
2986 True/*load*/, 4, dst,
2987 X86AMode_IR(0, hregX86_ESP())));
2988 add_to_esp(env, 4);
2989 return dst;
2990 }
2991
2992 if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF32toInt) {
2993 HReg rf = iselFltExpr(env, e->Iex.Binop.arg2);
2994 HReg dst = newVRegF(env);
2995
2996 /* rf now holds the value to be rounded. The first thing to do
2997 is set the FPU's rounding mode accordingly. */
2998
2999 /* Set host rounding mode */
3000 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
3001
3002 /* grndint %rf, %dst */
3003 addInstr(env, X86Instr_FpUnary(Xfp_ROUND, rf, dst));
3004
3005 /* Restore default FPU rounding. */
3006 set_FPU_rounding_default( env );
3007
3008 return dst;
3009 }
3010
3011 ppIRExpr(e);
3012 vpanic("iselFltExpr_wrk");
3013 }
3014
3015
3016 /*---------------------------------------------------------*/
3017 /*--- ISEL: Floating point expressions (64 bit) ---*/
3018 /*---------------------------------------------------------*/
3019
3020 /* Compute a 64-bit floating point value into a register, the identity
3021 of which is returned. As with iselIntExpr_R, the reg may be either
3022 real or virtual; in any case it must not be changed by subsequent
3023 code emitted by the caller. */
3024
3025 /* IEEE 754 formats. From http://www.freesoft.org/CIE/RFC/1832/32.htm:
3026
3027 Type S (1 bit) E (11 bits) F (52 bits)
3028 ---- --------- ----------- -----------
3029 signalling NaN u 2047 (max) .0uuuuu---u
3030 (with at least
3031 one 1 bit)
3032 quiet NaN u 2047 (max) .1uuuuu---u
3033
3034 negative infinity 1 2047 (max) .000000---0
3035
3036 positive infinity 0 2047 (max) .000000---0
3037
3038 negative zero 1 0 .000000---0
3039
3040 positive zero 0 0 .000000---0
3041 */
3042
iselDblExpr(ISelEnv * env,const IRExpr * e)3043 static HReg iselDblExpr ( ISelEnv* env, const IRExpr* e )
3044 {
3045 HReg r = iselDblExpr_wrk( env, e );
3046 # if 0
3047 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3048 # endif
3049 vassert(hregClass(r) == HRcFlt64);
3050 vassert(hregIsVirtual(r));
3051 return r;
3052 }
3053
3054 /* DO NOT CALL THIS DIRECTLY */
iselDblExpr_wrk(ISelEnv * env,const IRExpr * e)3055 static HReg iselDblExpr_wrk ( ISelEnv* env, const IRExpr* e )
3056 {
3057 IRType ty = typeOfIRExpr(env->type_env,e);
3058 vassert(e);
3059 vassert(ty == Ity_F64);
3060
3061 if (e->tag == Iex_RdTmp) {
3062 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
3063 }
3064
3065 if (e->tag == Iex_Const) {
3066 union { UInt u32x2[2]; ULong u64; Double f64; } u;
3067 HReg freg = newVRegF(env);
3068 vassert(sizeof(u) == 8);
3069 vassert(sizeof(u.u64) == 8);
3070 vassert(sizeof(u.f64) == 8);
3071 vassert(sizeof(u.u32x2) == 8);
3072
3073 if (e->Iex.Const.con->tag == Ico_F64) {
3074 u.f64 = e->Iex.Const.con->Ico.F64;
3075 }
3076 else if (e->Iex.Const.con->tag == Ico_F64i) {
3077 u.u64 = e->Iex.Const.con->Ico.F64i;
3078 }
3079 else
3080 vpanic("iselDblExpr(x86): const");
3081
3082 addInstr(env, X86Instr_Push(X86RMI_Imm(u.u32x2[1])));
3083 addInstr(env, X86Instr_Push(X86RMI_Imm(u.u32x2[0])));
3084 addInstr(env, X86Instr_FpLdSt(True/*load*/, 8, freg,
3085 X86AMode_IR(0, hregX86_ESP())));
3086 add_to_esp(env, 8);
3087 return freg;
3088 }
3089
3090 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
3091 X86AMode* am;
3092 HReg res = newVRegF(env);
3093 vassert(e->Iex.Load.ty == Ity_F64);
3094 am = iselIntExpr_AMode(env, e->Iex.Load.addr);
3095 addInstr(env, X86Instr_FpLdSt(True/*load*/, 8, res, am));
3096 return res;
3097 }
3098
3099 if (e->tag == Iex_Get) {
3100 X86AMode* am = X86AMode_IR( e->Iex.Get.offset,
3101 hregX86_EBP() );
3102 HReg res = newVRegF(env);
3103 addInstr(env, X86Instr_FpLdSt( True/*load*/, 8, res, am ));
3104 return res;
3105 }
3106
3107 if (e->tag == Iex_GetI) {
3108 X86AMode* am
3109 = genGuestArrayOffset(
3110 env, e->Iex.GetI.descr,
3111 e->Iex.GetI.ix, e->Iex.GetI.bias );
3112 HReg res = newVRegF(env);
3113 addInstr(env, X86Instr_FpLdSt( True/*load*/, 8, res, am ));
3114 return res;
3115 }
3116
3117 if (e->tag == Iex_Triop) {
3118 X86FpOp fpop = Xfp_INVALID;
3119 IRTriop *triop = e->Iex.Triop.details;
3120 switch (triop->op) {
3121 case Iop_AddF64: fpop = Xfp_ADD; break;
3122 case Iop_SubF64: fpop = Xfp_SUB; break;
3123 case Iop_MulF64: fpop = Xfp_MUL; break;
3124 case Iop_DivF64: fpop = Xfp_DIV; break;
3125 case Iop_ScaleF64: fpop = Xfp_SCALE; break;
3126 case Iop_Yl2xF64: fpop = Xfp_YL2X; break;
3127 case Iop_Yl2xp1F64: fpop = Xfp_YL2XP1; break;
3128 case Iop_AtanF64: fpop = Xfp_ATAN; break;
3129 case Iop_PRemF64: fpop = Xfp_PREM; break;
3130 case Iop_PRem1F64: fpop = Xfp_PREM1; break;
3131 default: break;
3132 }
3133 if (fpop != Xfp_INVALID) {
3134 HReg res = newVRegF(env);
3135 HReg srcL = iselDblExpr(env, triop->arg2);
3136 HReg srcR = iselDblExpr(env, triop->arg3);
3137 /* XXXROUNDINGFIXME */
3138 /* set roundingmode here */
3139 addInstr(env, X86Instr_FpBinary(fpop,srcL,srcR,res));
3140 if (fpop != Xfp_ADD && fpop != Xfp_SUB
3141 && fpop != Xfp_MUL && fpop != Xfp_DIV)
3142 roundToF64(env, res);
3143 return res;
3144 }
3145 }
3146
3147 if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF64toInt) {
3148 HReg rf = iselDblExpr(env, e->Iex.Binop.arg2);
3149 HReg dst = newVRegF(env);
3150
3151 /* rf now holds the value to be rounded. The first thing to do
3152 is set the FPU's rounding mode accordingly. */
3153
3154 /* Set host rounding mode */
3155 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
3156
3157 /* grndint %rf, %dst */
3158 addInstr(env, X86Instr_FpUnary(Xfp_ROUND, rf, dst));
3159
3160 /* Restore default FPU rounding. */
3161 set_FPU_rounding_default( env );
3162
3163 return dst;
3164 }
3165
3166 if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_I64StoF64) {
3167 HReg dst = newVRegF(env);
3168 HReg rHi,rLo;
3169 iselInt64Expr( &rHi, &rLo, env, e->Iex.Binop.arg2);
3170 addInstr(env, X86Instr_Push(X86RMI_Reg(rHi)));
3171 addInstr(env, X86Instr_Push(X86RMI_Reg(rLo)));
3172
3173 /* Set host rounding mode */
3174 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
3175
3176 addInstr(env, X86Instr_FpLdStI(
3177 True/*load*/, 8, dst,
3178 X86AMode_IR(0, hregX86_ESP())));
3179
3180 /* Restore default FPU rounding. */
3181 set_FPU_rounding_default( env );
3182
3183 add_to_esp(env, 8);
3184 return dst;
3185 }
3186
3187 if (e->tag == Iex_Binop) {
3188 X86FpOp fpop = Xfp_INVALID;
3189 switch (e->Iex.Binop.op) {
3190 case Iop_SinF64: fpop = Xfp_SIN; break;
3191 case Iop_CosF64: fpop = Xfp_COS; break;
3192 case Iop_TanF64: fpop = Xfp_TAN; break;
3193 case Iop_2xm1F64: fpop = Xfp_2XM1; break;
3194 case Iop_SqrtF64: fpop = Xfp_SQRT; break;
3195 default: break;
3196 }
3197 if (fpop != Xfp_INVALID) {
3198 HReg res = newVRegF(env);
3199 HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
3200 /* XXXROUNDINGFIXME */
3201 /* set roundingmode here */
3202 /* Note that X86Instr_FpUnary(Xfp_TAN,..) sets the condition
3203 codes. I don't think that matters, since this insn
3204 selector never generates such an instruction intervening
3205 between an flag-setting instruction and a flag-using
3206 instruction. */
3207 addInstr(env, X86Instr_FpUnary(fpop,src,res));
3208 if (fpop != Xfp_SQRT
3209 && fpop != Xfp_NEG && fpop != Xfp_ABS)
3210 roundToF64(env, res);
3211 return res;
3212 }
3213 }
3214
3215 if (e->tag == Iex_Unop) {
3216 X86FpOp fpop = Xfp_INVALID;
3217 switch (e->Iex.Unop.op) {
3218 case Iop_NegF64: fpop = Xfp_NEG; break;
3219 case Iop_AbsF64: fpop = Xfp_ABS; break;
3220 default: break;
3221 }
3222 if (fpop != Xfp_INVALID) {
3223 HReg res = newVRegF(env);
3224 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
3225 addInstr(env, X86Instr_FpUnary(fpop,src,res));
3226 /* No need to do roundToF64(env,res) for Xfp_NEG or Xfp_ABS,
3227 but might need to do that for other unary ops. */
3228 return res;
3229 }
3230 }
3231
3232 if (e->tag == Iex_Unop) {
3233 switch (e->Iex.Unop.op) {
3234 case Iop_I32StoF64: {
3235 HReg dst = newVRegF(env);
3236 HReg ri = iselIntExpr_R(env, e->Iex.Unop.arg);
3237 addInstr(env, X86Instr_Push(X86RMI_Reg(ri)));
3238 set_FPU_rounding_default(env);
3239 addInstr(env, X86Instr_FpLdStI(
3240 True/*load*/, 4, dst,
3241 X86AMode_IR(0, hregX86_ESP())));
3242 add_to_esp(env, 4);
3243 return dst;
3244 }
3245 case Iop_ReinterpI64asF64: {
3246 /* Given an I64, produce an IEEE754 double with the same
3247 bit pattern. */
3248 HReg dst = newVRegF(env);
3249 HReg rHi, rLo;
3250 iselInt64Expr( &rHi, &rLo, env, e->Iex.Unop.arg);
3251 /* paranoia */
3252 set_FPU_rounding_default(env);
3253 addInstr(env, X86Instr_Push(X86RMI_Reg(rHi)));
3254 addInstr(env, X86Instr_Push(X86RMI_Reg(rLo)));
3255 addInstr(env, X86Instr_FpLdSt(
3256 True/*load*/, 8, dst,
3257 X86AMode_IR(0, hregX86_ESP())));
3258 add_to_esp(env, 8);
3259 return dst;
3260 }
3261 case Iop_F32toF64: {
3262 /* this is a no-op */
3263 HReg res = iselFltExpr(env, e->Iex.Unop.arg);
3264 return res;
3265 }
3266 default:
3267 break;
3268 }
3269 }
3270
3271 /* --------- MULTIPLEX --------- */
3272 if (e->tag == Iex_ITE) { // VFD
3273 if (ty == Ity_F64
3274 && typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) {
3275 HReg r1 = iselDblExpr(env, e->Iex.ITE.iftrue);
3276 HReg r0 = iselDblExpr(env, e->Iex.ITE.iffalse);
3277 HReg dst = newVRegF(env);
3278 addInstr(env, X86Instr_FpUnary(Xfp_MOV,r1,dst));
3279 X86CondCode cc = iselCondCode(env, e->Iex.ITE.cond);
3280 addInstr(env, X86Instr_FpCMov(cc ^ 1, r0, dst));
3281 return dst;
3282 }
3283 }
3284
3285 ppIRExpr(e);
3286 vpanic("iselDblExpr_wrk");
3287 }
3288
3289
3290 /*---------------------------------------------------------*/
3291 /*--- ISEL: SIMD (Vector) expressions, 128 bit. ---*/
3292 /*---------------------------------------------------------*/
3293
iselVecExpr(ISelEnv * env,const IRExpr * e)3294 static HReg iselVecExpr ( ISelEnv* env, const IRExpr* e )
3295 {
3296 HReg r = iselVecExpr_wrk( env, e );
3297 # if 0
3298 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3299 # endif
3300 vassert(hregClass(r) == HRcVec128);
3301 vassert(hregIsVirtual(r));
3302 return r;
3303 }
3304
3305
3306 /* DO NOT CALL THIS DIRECTLY */
iselVecExpr_wrk(ISelEnv * env,const IRExpr * e)3307 static HReg iselVecExpr_wrk ( ISelEnv* env, const IRExpr* e )
3308 {
3309
3310 # define REQUIRE_SSE1 \
3311 do { if (env->hwcaps == 0/*baseline, no sse*/ \
3312 || env->hwcaps == VEX_HWCAPS_X86_MMXEXT /*Integer SSE*/) \
3313 goto vec_fail; \
3314 } while (0)
3315
3316 # define REQUIRE_SSE2 \
3317 do { if (0 == (env->hwcaps & VEX_HWCAPS_X86_SSE2)) \
3318 goto vec_fail; \
3319 } while (0)
3320
3321 # define SSE2_OR_ABOVE \
3322 (env->hwcaps & VEX_HWCAPS_X86_SSE2)
3323
3324 HWord fn = 0; /* address of helper fn, if required */
3325 MatchInfo mi;
3326 Bool arg1isEReg = False;
3327 X86SseOp op = Xsse_INVALID;
3328 IRType ty = typeOfIRExpr(env->type_env,e);
3329 vassert(e);
3330 vassert(ty == Ity_V128);
3331
3332 REQUIRE_SSE1;
3333
3334 if (e->tag == Iex_RdTmp) {
3335 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
3336 }
3337
3338 if (e->tag == Iex_Get) {
3339 HReg dst = newVRegV(env);
3340 addInstr(env, X86Instr_SseLdSt(
3341 True/*load*/,
3342 dst,
3343 X86AMode_IR(e->Iex.Get.offset, hregX86_EBP())
3344 )
3345 );
3346 return dst;
3347 }
3348
3349 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
3350 HReg dst = newVRegV(env);
3351 X86AMode* am = iselIntExpr_AMode(env, e->Iex.Load.addr);
3352 addInstr(env, X86Instr_SseLdSt( True/*load*/, dst, am ));
3353 return dst;
3354 }
3355
3356 if (e->tag == Iex_Const) {
3357 HReg dst = newVRegV(env);
3358 vassert(e->Iex.Const.con->tag == Ico_V128);
3359 addInstr(env, X86Instr_SseConst(e->Iex.Const.con->Ico.V128, dst));
3360 return dst;
3361 }
3362
3363 if (e->tag == Iex_Unop) {
3364
3365 if (SSE2_OR_ABOVE) {
3366 /* 64UtoV128(LDle:I64(addr)) */
3367 DECLARE_PATTERN(p_zwiden_load64);
3368 DEFINE_PATTERN(p_zwiden_load64,
3369 unop(Iop_64UtoV128,
3370 IRExpr_Load(Iend_LE,Ity_I64,bind(0))));
3371 if (matchIRExpr(&mi, p_zwiden_load64, e)) {
3372 X86AMode* am = iselIntExpr_AMode(env, mi.bindee[0]);
3373 HReg dst = newVRegV(env);
3374 addInstr(env, X86Instr_SseLdzLO(8, dst, am));
3375 return dst;
3376 }
3377 }
3378
3379 switch (e->Iex.Unop.op) {
3380
3381 case Iop_NotV128: {
3382 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3383 return do_sse_Not128(env, arg);
3384 }
3385
3386 case Iop_CmpNEZ64x2: {
3387 /* We can use SSE2 instructions for this. */
3388 /* Ideally, we want to do a 64Ix2 comparison against zero of
3389 the operand. Problem is no such insn exists. Solution
3390 therefore is to do a 32Ix4 comparison instead, and bitwise-
3391 negate (NOT) the result. Let a,b,c,d be 32-bit lanes, and
3392 let the not'd result of this initial comparison be a:b:c:d.
3393 What we need to compute is (a|b):(a|b):(c|d):(c|d). So, use
3394 pshufd to create a value b:a:d:c, and OR that with a:b:c:d,
3395 giving the required result.
3396
3397 The required selection sequence is 2,3,0,1, which
3398 according to Intel's documentation means the pshufd
3399 literal value is 0xB1, that is,
3400 (2 << 6) | (3 << 4) | (0 << 2) | (1 << 0)
3401 */
3402 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3403 HReg tmp = newVRegV(env);
3404 HReg dst = newVRegV(env);
3405 REQUIRE_SSE2;
3406 addInstr(env, X86Instr_SseReRg(Xsse_XOR, tmp, tmp));
3407 addInstr(env, X86Instr_SseReRg(Xsse_CMPEQ32, arg, tmp));
3408 tmp = do_sse_Not128(env, tmp);
3409 addInstr(env, X86Instr_SseShuf(0xB1, tmp, dst));
3410 addInstr(env, X86Instr_SseReRg(Xsse_OR, tmp, dst));
3411 return dst;
3412 }
3413
3414 case Iop_CmpNEZ32x4: {
3415 /* Sigh, we have to generate lousy code since this has to
3416 work on SSE1 hosts */
3417 /* basically, the idea is: for each lane:
3418 movl lane, %r ; negl %r (now CF = lane==0 ? 0 : 1)
3419 sbbl %r, %r (now %r = 1Sto32(CF))
3420 movl %r, lane
3421 */
3422 Int i;
3423 X86AMode* am;
3424 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3425 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3426 HReg dst = newVRegV(env);
3427 HReg r32 = newVRegI(env);
3428 sub_from_esp(env, 16);
3429 addInstr(env, X86Instr_SseLdSt(False/*store*/, arg, esp0));
3430 for (i = 0; i < 4; i++) {
3431 am = X86AMode_IR(i*4, hregX86_ESP());
3432 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(am), r32));
3433 addInstr(env, X86Instr_Unary32(Xun_NEG, r32));
3434 addInstr(env, X86Instr_Alu32R(Xalu_SBB, X86RMI_Reg(r32), r32));
3435 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r32), am));
3436 }
3437 addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0));
3438 add_to_esp(env, 16);
3439 return dst;
3440 }
3441
3442 case Iop_CmpNEZ8x16:
3443 case Iop_CmpNEZ16x8: {
3444 /* We can use SSE2 instructions for this. */
3445 HReg arg;
3446 HReg vec0 = newVRegV(env);
3447 HReg vec1 = newVRegV(env);
3448 HReg dst = newVRegV(env);
3449 X86SseOp cmpOp
3450 = e->Iex.Unop.op==Iop_CmpNEZ16x8 ? Xsse_CMPEQ16
3451 : Xsse_CMPEQ8;
3452 REQUIRE_SSE2;
3453 addInstr(env, X86Instr_SseReRg(Xsse_XOR, vec0, vec0));
3454 addInstr(env, mk_vMOVsd_RR(vec0, vec1));
3455 addInstr(env, X86Instr_Sse32Fx4(Xsse_CMPEQF, vec1, vec1));
3456 /* defer arg computation to here so as to give CMPEQF as long
3457 as possible to complete */
3458 arg = iselVecExpr(env, e->Iex.Unop.arg);
3459 /* vec0 is all 0s; vec1 is all 1s */
3460 addInstr(env, mk_vMOVsd_RR(arg, dst));
3461 /* 16x8 or 8x16 comparison == */
3462 addInstr(env, X86Instr_SseReRg(cmpOp, vec0, dst));
3463 /* invert result */
3464 addInstr(env, X86Instr_SseReRg(Xsse_XOR, vec1, dst));
3465 return dst;
3466 }
3467
3468 case Iop_RecipEst32Fx4: op = Xsse_RCPF; goto do_32Fx4_unary;
3469 case Iop_RSqrtEst32Fx4: op = Xsse_RSQRTF; goto do_32Fx4_unary;
3470 do_32Fx4_unary:
3471 {
3472 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3473 HReg dst = newVRegV(env);
3474 addInstr(env, X86Instr_Sse32Fx4(op, arg, dst));
3475 return dst;
3476 }
3477
3478 case Iop_RecipEst32F0x4: op = Xsse_RCPF; goto do_32F0x4_unary;
3479 case Iop_RSqrtEst32F0x4: op = Xsse_RSQRTF; goto do_32F0x4_unary;
3480 case Iop_Sqrt32F0x4: op = Xsse_SQRTF; goto do_32F0x4_unary;
3481 do_32F0x4_unary:
3482 {
3483 /* A bit subtle. We have to copy the arg to the result
3484 register first, because actually doing the SSE scalar insn
3485 leaves the upper 3/4 of the destination register
3486 unchanged. Whereas the required semantics of these
3487 primops is that the upper 3/4 is simply copied in from the
3488 argument. */
3489 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3490 HReg dst = newVRegV(env);
3491 addInstr(env, mk_vMOVsd_RR(arg, dst));
3492 addInstr(env, X86Instr_Sse32FLo(op, arg, dst));
3493 return dst;
3494 }
3495
3496 case Iop_Sqrt64F0x2: op = Xsse_SQRTF; goto do_64F0x2_unary;
3497 do_64F0x2_unary:
3498 {
3499 /* A bit subtle. We have to copy the arg to the result
3500 register first, because actually doing the SSE scalar insn
3501 leaves the upper half of the destination register
3502 unchanged. Whereas the required semantics of these
3503 primops is that the upper half is simply copied in from the
3504 argument. */
3505 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3506 HReg dst = newVRegV(env);
3507 REQUIRE_SSE2;
3508 addInstr(env, mk_vMOVsd_RR(arg, dst));
3509 addInstr(env, X86Instr_Sse64FLo(op, arg, dst));
3510 return dst;
3511 }
3512
3513 case Iop_32UtoV128: {
3514 HReg dst = newVRegV(env);
3515 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3516 X86RMI* rmi = iselIntExpr_RMI(env, e->Iex.Unop.arg);
3517 addInstr(env, X86Instr_Push(rmi));
3518 addInstr(env, X86Instr_SseLdzLO(4, dst, esp0));
3519 add_to_esp(env, 4);
3520 return dst;
3521 }
3522
3523 case Iop_64UtoV128: {
3524 HReg rHi, rLo;
3525 HReg dst = newVRegV(env);
3526 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3527 iselInt64Expr(&rHi, &rLo, env, e->Iex.Unop.arg);
3528 addInstr(env, X86Instr_Push(X86RMI_Reg(rHi)));
3529 addInstr(env, X86Instr_Push(X86RMI_Reg(rLo)));
3530 addInstr(env, X86Instr_SseLdzLO(8, dst, esp0));
3531 add_to_esp(env, 8);
3532 return dst;
3533 }
3534
3535 default:
3536 break;
3537 } /* switch (e->Iex.Unop.op) */
3538 } /* if (e->tag == Iex_Unop) */
3539
3540 if (e->tag == Iex_Binop) {
3541 switch (e->Iex.Binop.op) {
3542
3543 case Iop_Sqrt64Fx2:
3544 REQUIRE_SSE2;
3545 /* fallthrough */
3546 case Iop_Sqrt32Fx4: {
3547 /* :: (rmode, vec) -> vec */
3548 HReg arg = iselVecExpr(env, e->Iex.Binop.arg2);
3549 HReg dst = newVRegV(env);
3550 /* XXXROUNDINGFIXME */
3551 /* set roundingmode here */
3552 addInstr(env, (e->Iex.Binop.op == Iop_Sqrt64Fx2
3553 ? X86Instr_Sse64Fx2 : X86Instr_Sse32Fx4)
3554 (Xsse_SQRTF, arg, dst));
3555 return dst;
3556 }
3557
3558 case Iop_SetV128lo32: {
3559 HReg dst = newVRegV(env);
3560 HReg srcV = iselVecExpr(env, e->Iex.Binop.arg1);
3561 HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2);
3562 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3563 sub_from_esp(env, 16);
3564 addInstr(env, X86Instr_SseLdSt(False/*store*/, srcV, esp0));
3565 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(srcI), esp0));
3566 addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0));
3567 add_to_esp(env, 16);
3568 return dst;
3569 }
3570
3571 case Iop_SetV128lo64: {
3572 HReg dst = newVRegV(env);
3573 HReg srcV = iselVecExpr(env, e->Iex.Binop.arg1);
3574 HReg srcIhi, srcIlo;
3575 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3576 X86AMode* esp4 = advance4(esp0);
3577 iselInt64Expr(&srcIhi, &srcIlo, env, e->Iex.Binop.arg2);
3578 sub_from_esp(env, 16);
3579 addInstr(env, X86Instr_SseLdSt(False/*store*/, srcV, esp0));
3580 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(srcIlo), esp0));
3581 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(srcIhi), esp4));
3582 addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0));
3583 add_to_esp(env, 16);
3584 return dst;
3585 }
3586
3587 case Iop_64HLtoV128: {
3588 HReg r3, r2, r1, r0;
3589 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3590 X86AMode* esp4 = advance4(esp0);
3591 X86AMode* esp8 = advance4(esp4);
3592 X86AMode* esp12 = advance4(esp8);
3593 HReg dst = newVRegV(env);
3594 /* do this via the stack (easy, convenient, etc) */
3595 sub_from_esp(env, 16);
3596 /* Do the less significant 64 bits */
3597 iselInt64Expr(&r1, &r0, env, e->Iex.Binop.arg2);
3598 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r0), esp0));
3599 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r1), esp4));
3600 /* Do the more significant 64 bits */
3601 iselInt64Expr(&r3, &r2, env, e->Iex.Binop.arg1);
3602 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r2), esp8));
3603 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r3), esp12));
3604 /* Fetch result back from stack. */
3605 addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0));
3606 add_to_esp(env, 16);
3607 return dst;
3608 }
3609
3610 case Iop_CmpEQ32Fx4: op = Xsse_CMPEQF; goto do_32Fx4;
3611 case Iop_CmpLT32Fx4: op = Xsse_CMPLTF; goto do_32Fx4;
3612 case Iop_CmpLE32Fx4: op = Xsse_CMPLEF; goto do_32Fx4;
3613 case Iop_CmpUN32Fx4: op = Xsse_CMPUNF; goto do_32Fx4;
3614 case Iop_Max32Fx4: op = Xsse_MAXF; goto do_32Fx4;
3615 case Iop_Min32Fx4: op = Xsse_MINF; goto do_32Fx4;
3616 do_32Fx4:
3617 {
3618 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3619 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3620 HReg dst = newVRegV(env);
3621 addInstr(env, mk_vMOVsd_RR(argL, dst));
3622 addInstr(env, X86Instr_Sse32Fx4(op, argR, dst));
3623 return dst;
3624 }
3625
3626 case Iop_CmpEQ64Fx2: op = Xsse_CMPEQF; goto do_64Fx2;
3627 case Iop_CmpLT64Fx2: op = Xsse_CMPLTF; goto do_64Fx2;
3628 case Iop_CmpLE64Fx2: op = Xsse_CMPLEF; goto do_64Fx2;
3629 case Iop_CmpUN64Fx2: op = Xsse_CMPUNF; goto do_64Fx2;
3630 case Iop_Max64Fx2: op = Xsse_MAXF; goto do_64Fx2;
3631 case Iop_Min64Fx2: op = Xsse_MINF; goto do_64Fx2;
3632 do_64Fx2:
3633 {
3634 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3635 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3636 HReg dst = newVRegV(env);
3637 REQUIRE_SSE2;
3638 addInstr(env, mk_vMOVsd_RR(argL, dst));
3639 addInstr(env, X86Instr_Sse64Fx2(op, argR, dst));
3640 return dst;
3641 }
3642
3643 case Iop_CmpEQ32F0x4: op = Xsse_CMPEQF; goto do_32F0x4;
3644 case Iop_CmpLT32F0x4: op = Xsse_CMPLTF; goto do_32F0x4;
3645 case Iop_CmpLE32F0x4: op = Xsse_CMPLEF; goto do_32F0x4;
3646 case Iop_CmpUN32F0x4: op = Xsse_CMPUNF; goto do_32F0x4;
3647 case Iop_Add32F0x4: op = Xsse_ADDF; goto do_32F0x4;
3648 case Iop_Div32F0x4: op = Xsse_DIVF; goto do_32F0x4;
3649 case Iop_Max32F0x4: op = Xsse_MAXF; goto do_32F0x4;
3650 case Iop_Min32F0x4: op = Xsse_MINF; goto do_32F0x4;
3651 case Iop_Mul32F0x4: op = Xsse_MULF; goto do_32F0x4;
3652 case Iop_Sub32F0x4: op = Xsse_SUBF; goto do_32F0x4;
3653 do_32F0x4: {
3654 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3655 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3656 HReg dst = newVRegV(env);
3657 addInstr(env, mk_vMOVsd_RR(argL, dst));
3658 addInstr(env, X86Instr_Sse32FLo(op, argR, dst));
3659 return dst;
3660 }
3661
3662 case Iop_CmpEQ64F0x2: op = Xsse_CMPEQF; goto do_64F0x2;
3663 case Iop_CmpLT64F0x2: op = Xsse_CMPLTF; goto do_64F0x2;
3664 case Iop_CmpLE64F0x2: op = Xsse_CMPLEF; goto do_64F0x2;
3665 case Iop_CmpUN64F0x2: op = Xsse_CMPUNF; goto do_64F0x2;
3666 case Iop_Add64F0x2: op = Xsse_ADDF; goto do_64F0x2;
3667 case Iop_Div64F0x2: op = Xsse_DIVF; goto do_64F0x2;
3668 case Iop_Max64F0x2: op = Xsse_MAXF; goto do_64F0x2;
3669 case Iop_Min64F0x2: op = Xsse_MINF; goto do_64F0x2;
3670 case Iop_Mul64F0x2: op = Xsse_MULF; goto do_64F0x2;
3671 case Iop_Sub64F0x2: op = Xsse_SUBF; goto do_64F0x2;
3672 do_64F0x2: {
3673 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3674 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3675 HReg dst = newVRegV(env);
3676 REQUIRE_SSE2;
3677 addInstr(env, mk_vMOVsd_RR(argL, dst));
3678 addInstr(env, X86Instr_Sse64FLo(op, argR, dst));
3679 return dst;
3680 }
3681
3682 case Iop_QNarrowBin32Sto16Sx8:
3683 op = Xsse_PACKSSD; arg1isEReg = True; goto do_SseReRg;
3684 case Iop_QNarrowBin16Sto8Sx16:
3685 op = Xsse_PACKSSW; arg1isEReg = True; goto do_SseReRg;
3686 case Iop_QNarrowBin16Sto8Ux16:
3687 op = Xsse_PACKUSW; arg1isEReg = True; goto do_SseReRg;
3688
3689 case Iop_InterleaveHI8x16:
3690 op = Xsse_UNPCKHB; arg1isEReg = True; goto do_SseReRg;
3691 case Iop_InterleaveHI16x8:
3692 op = Xsse_UNPCKHW; arg1isEReg = True; goto do_SseReRg;
3693 case Iop_InterleaveHI32x4:
3694 op = Xsse_UNPCKHD; arg1isEReg = True; goto do_SseReRg;
3695 case Iop_InterleaveHI64x2:
3696 op = Xsse_UNPCKHQ; arg1isEReg = True; goto do_SseReRg;
3697
3698 case Iop_InterleaveLO8x16:
3699 op = Xsse_UNPCKLB; arg1isEReg = True; goto do_SseReRg;
3700 case Iop_InterleaveLO16x8:
3701 op = Xsse_UNPCKLW; arg1isEReg = True; goto do_SseReRg;
3702 case Iop_InterleaveLO32x4:
3703 op = Xsse_UNPCKLD; arg1isEReg = True; goto do_SseReRg;
3704 case Iop_InterleaveLO64x2:
3705 op = Xsse_UNPCKLQ; arg1isEReg = True; goto do_SseReRg;
3706
3707 case Iop_AndV128: op = Xsse_AND; goto do_SseReRg;
3708 case Iop_OrV128: op = Xsse_OR; goto do_SseReRg;
3709 case Iop_XorV128: op = Xsse_XOR; goto do_SseReRg;
3710 case Iop_Add8x16: op = Xsse_ADD8; goto do_SseReRg;
3711 case Iop_Add16x8: op = Xsse_ADD16; goto do_SseReRg;
3712 case Iop_Add32x4: op = Xsse_ADD32; goto do_SseReRg;
3713 case Iop_Add64x2: op = Xsse_ADD64; goto do_SseReRg;
3714 case Iop_QAdd8Sx16: op = Xsse_QADD8S; goto do_SseReRg;
3715 case Iop_QAdd16Sx8: op = Xsse_QADD16S; goto do_SseReRg;
3716 case Iop_QAdd8Ux16: op = Xsse_QADD8U; goto do_SseReRg;
3717 case Iop_QAdd16Ux8: op = Xsse_QADD16U; goto do_SseReRg;
3718 case Iop_Avg8Ux16: op = Xsse_AVG8U; goto do_SseReRg;
3719 case Iop_Avg16Ux8: op = Xsse_AVG16U; goto do_SseReRg;
3720 case Iop_CmpEQ8x16: op = Xsse_CMPEQ8; goto do_SseReRg;
3721 case Iop_CmpEQ16x8: op = Xsse_CMPEQ16; goto do_SseReRg;
3722 case Iop_CmpEQ32x4: op = Xsse_CMPEQ32; goto do_SseReRg;
3723 case Iop_CmpGT8Sx16: op = Xsse_CMPGT8S; goto do_SseReRg;
3724 case Iop_CmpGT16Sx8: op = Xsse_CMPGT16S; goto do_SseReRg;
3725 case Iop_CmpGT32Sx4: op = Xsse_CMPGT32S; goto do_SseReRg;
3726 case Iop_Max16Sx8: op = Xsse_MAX16S; goto do_SseReRg;
3727 case Iop_Max8Ux16: op = Xsse_MAX8U; goto do_SseReRg;
3728 case Iop_Min16Sx8: op = Xsse_MIN16S; goto do_SseReRg;
3729 case Iop_Min8Ux16: op = Xsse_MIN8U; goto do_SseReRg;
3730 case Iop_MulHi16Ux8: op = Xsse_MULHI16U; goto do_SseReRg;
3731 case Iop_MulHi16Sx8: op = Xsse_MULHI16S; goto do_SseReRg;
3732 case Iop_Mul16x8: op = Xsse_MUL16; goto do_SseReRg;
3733 case Iop_Sub8x16: op = Xsse_SUB8; goto do_SseReRg;
3734 case Iop_Sub16x8: op = Xsse_SUB16; goto do_SseReRg;
3735 case Iop_Sub32x4: op = Xsse_SUB32; goto do_SseReRg;
3736 case Iop_Sub64x2: op = Xsse_SUB64; goto do_SseReRg;
3737 case Iop_QSub8Sx16: op = Xsse_QSUB8S; goto do_SseReRg;
3738 case Iop_QSub16Sx8: op = Xsse_QSUB16S; goto do_SseReRg;
3739 case Iop_QSub8Ux16: op = Xsse_QSUB8U; goto do_SseReRg;
3740 case Iop_QSub16Ux8: op = Xsse_QSUB16U; goto do_SseReRg;
3741 do_SseReRg: {
3742 HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1);
3743 HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2);
3744 HReg dst = newVRegV(env);
3745 if (op != Xsse_OR && op != Xsse_AND && op != Xsse_XOR)
3746 REQUIRE_SSE2;
3747 if (arg1isEReg) {
3748 addInstr(env, mk_vMOVsd_RR(arg2, dst));
3749 addInstr(env, X86Instr_SseReRg(op, arg1, dst));
3750 } else {
3751 addInstr(env, mk_vMOVsd_RR(arg1, dst));
3752 addInstr(env, X86Instr_SseReRg(op, arg2, dst));
3753 }
3754 return dst;
3755 }
3756
3757 case Iop_ShlN16x8: op = Xsse_SHL16; goto do_SseShift;
3758 case Iop_ShlN32x4: op = Xsse_SHL32; goto do_SseShift;
3759 case Iop_ShlN64x2: op = Xsse_SHL64; goto do_SseShift;
3760 case Iop_SarN16x8: op = Xsse_SAR16; goto do_SseShift;
3761 case Iop_SarN32x4: op = Xsse_SAR32; goto do_SseShift;
3762 case Iop_ShrN16x8: op = Xsse_SHR16; goto do_SseShift;
3763 case Iop_ShrN32x4: op = Xsse_SHR32; goto do_SseShift;
3764 case Iop_ShrN64x2: op = Xsse_SHR64; goto do_SseShift;
3765 do_SseShift: {
3766 HReg greg = iselVecExpr(env, e->Iex.Binop.arg1);
3767 X86RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
3768 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3769 HReg ereg = newVRegV(env);
3770 HReg dst = newVRegV(env);
3771 REQUIRE_SSE2;
3772 addInstr(env, X86Instr_Push(X86RMI_Imm(0)));
3773 addInstr(env, X86Instr_Push(X86RMI_Imm(0)));
3774 addInstr(env, X86Instr_Push(X86RMI_Imm(0)));
3775 addInstr(env, X86Instr_Push(rmi));
3776 addInstr(env, X86Instr_SseLdSt(True/*load*/, ereg, esp0));
3777 addInstr(env, mk_vMOVsd_RR(greg, dst));
3778 addInstr(env, X86Instr_SseReRg(op, ereg, dst));
3779 add_to_esp(env, 16);
3780 return dst;
3781 }
3782
3783 case Iop_NarrowBin32to16x8:
3784 fn = (HWord)h_generic_calc_NarrowBin32to16x8;
3785 goto do_SseAssistedBinary;
3786 case Iop_NarrowBin16to8x16:
3787 fn = (HWord)h_generic_calc_NarrowBin16to8x16;
3788 goto do_SseAssistedBinary;
3789 do_SseAssistedBinary: {
3790 /* As with the amd64 case (where this is copied from) we
3791 generate pretty bad code. */
3792 vassert(fn != 0);
3793 HReg dst = newVRegV(env);
3794 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3795 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3796 HReg argp = newVRegI(env);
3797 /* subl $112, %esp -- make a space */
3798 sub_from_esp(env, 112);
3799 /* leal 48(%esp), %r_argp -- point into it */
3800 addInstr(env, X86Instr_Lea32(X86AMode_IR(48, hregX86_ESP()),
3801 argp));
3802 /* andl $-16, %r_argp -- 16-align the pointer */
3803 addInstr(env, X86Instr_Alu32R(Xalu_AND,
3804 X86RMI_Imm( ~(UInt)15 ),
3805 argp));
3806 /* Prepare 3 arg regs:
3807 leal 0(%r_argp), %eax
3808 leal 16(%r_argp), %edx
3809 leal 32(%r_argp), %ecx
3810 */
3811 addInstr(env, X86Instr_Lea32(X86AMode_IR(0, argp),
3812 hregX86_EAX()));
3813 addInstr(env, X86Instr_Lea32(X86AMode_IR(16, argp),
3814 hregX86_EDX()));
3815 addInstr(env, X86Instr_Lea32(X86AMode_IR(32, argp),
3816 hregX86_ECX()));
3817 /* Store the two args, at (%edx) and (%ecx):
3818 movupd %argL, 0(%edx)
3819 movupd %argR, 0(%ecx)
3820 */
3821 addInstr(env, X86Instr_SseLdSt(False/*!isLoad*/, argL,
3822 X86AMode_IR(0, hregX86_EDX())));
3823 addInstr(env, X86Instr_SseLdSt(False/*!isLoad*/, argR,
3824 X86AMode_IR(0, hregX86_ECX())));
3825 /* call the helper */
3826 addInstr(env, X86Instr_Call( Xcc_ALWAYS, (Addr32)fn,
3827 3, mk_RetLoc_simple(RLPri_None) ));
3828 /* fetch the result from memory, using %r_argp, which the
3829 register allocator will keep alive across the call. */
3830 addInstr(env, X86Instr_SseLdSt(True/*isLoad*/, dst,
3831 X86AMode_IR(0, argp)));
3832 /* and finally, clear the space */
3833 add_to_esp(env, 112);
3834 return dst;
3835 }
3836
3837 default:
3838 break;
3839 } /* switch (e->Iex.Binop.op) */
3840 } /* if (e->tag == Iex_Binop) */
3841
3842
3843 if (e->tag == Iex_Triop) {
3844 IRTriop *triop = e->Iex.Triop.details;
3845 switch (triop->op) {
3846
3847 case Iop_Add32Fx4: op = Xsse_ADDF; goto do_32Fx4_w_rm;
3848 case Iop_Sub32Fx4: op = Xsse_SUBF; goto do_32Fx4_w_rm;
3849 case Iop_Mul32Fx4: op = Xsse_MULF; goto do_32Fx4_w_rm;
3850 case Iop_Div32Fx4: op = Xsse_DIVF; goto do_32Fx4_w_rm;
3851 do_32Fx4_w_rm:
3852 {
3853 HReg argL = iselVecExpr(env, triop->arg2);
3854 HReg argR = iselVecExpr(env, triop->arg3);
3855 HReg dst = newVRegV(env);
3856 addInstr(env, mk_vMOVsd_RR(argL, dst));
3857 /* XXXROUNDINGFIXME */
3858 /* set roundingmode here */
3859 addInstr(env, X86Instr_Sse32Fx4(op, argR, dst));
3860 return dst;
3861 }
3862
3863 case Iop_Add64Fx2: op = Xsse_ADDF; goto do_64Fx2_w_rm;
3864 case Iop_Sub64Fx2: op = Xsse_SUBF; goto do_64Fx2_w_rm;
3865 case Iop_Mul64Fx2: op = Xsse_MULF; goto do_64Fx2_w_rm;
3866 case Iop_Div64Fx2: op = Xsse_DIVF; goto do_64Fx2_w_rm;
3867 do_64Fx2_w_rm:
3868 {
3869 HReg argL = iselVecExpr(env, triop->arg2);
3870 HReg argR = iselVecExpr(env, triop->arg3);
3871 HReg dst = newVRegV(env);
3872 REQUIRE_SSE2;
3873 addInstr(env, mk_vMOVsd_RR(argL, dst));
3874 /* XXXROUNDINGFIXME */
3875 /* set roundingmode here */
3876 addInstr(env, X86Instr_Sse64Fx2(op, argR, dst));
3877 return dst;
3878 }
3879
3880 default:
3881 break;
3882 } /* switch (triop->op) */
3883 } /* if (e->tag == Iex_Triop) */
3884
3885
3886 if (e->tag == Iex_ITE) { // VFD
3887 HReg r1 = iselVecExpr(env, e->Iex.ITE.iftrue);
3888 HReg r0 = iselVecExpr(env, e->Iex.ITE.iffalse);
3889 HReg dst = newVRegV(env);
3890 addInstr(env, mk_vMOVsd_RR(r1,dst));
3891 X86CondCode cc = iselCondCode(env, e->Iex.ITE.cond);
3892 addInstr(env, X86Instr_SseCMov(cc ^ 1, r0, dst));
3893 return dst;
3894 }
3895
3896 vec_fail:
3897 vex_printf("iselVecExpr (hwcaps = %s): can't reduce\n",
3898 LibVEX_ppVexHwCaps(VexArchX86,env->hwcaps));
3899 ppIRExpr(e);
3900 vpanic("iselVecExpr_wrk");
3901
3902 # undef REQUIRE_SSE1
3903 # undef REQUIRE_SSE2
3904 # undef SSE2_OR_ABOVE
3905 }
3906
3907
3908 /*---------------------------------------------------------*/
3909 /*--- ISEL: Statements ---*/
3910 /*---------------------------------------------------------*/
3911
iselStmt(ISelEnv * env,IRStmt * stmt)3912 static void iselStmt ( ISelEnv* env, IRStmt* stmt )
3913 {
3914 if (vex_traceflags & VEX_TRACE_VCODE) {
3915 vex_printf("\n-- ");
3916 ppIRStmt(stmt);
3917 vex_printf("\n");
3918 }
3919
3920 switch (stmt->tag) {
3921
3922 /* --------- STORE --------- */
3923 case Ist_Store: {
3924 IRType tya = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
3925 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
3926 IREndness end = stmt->Ist.Store.end;
3927
3928 if (tya != Ity_I32 || end != Iend_LE)
3929 goto stmt_fail;
3930
3931 if (tyd == Ity_I32) {
3932 X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
3933 X86RI* ri = iselIntExpr_RI(env, stmt->Ist.Store.data);
3934 addInstr(env, X86Instr_Alu32M(Xalu_MOV,ri,am));
3935 return;
3936 }
3937 if (tyd == Ity_I8 || tyd == Ity_I16) {
3938 X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
3939 HReg r = iselIntExpr_R(env, stmt->Ist.Store.data);
3940 addInstr(env, X86Instr_Store( toUChar(tyd==Ity_I8 ? 1 : 2),
3941 r,am ));
3942 return;
3943 }
3944 if (tyd == Ity_F64) {
3945 X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
3946 HReg r = iselDblExpr(env, stmt->Ist.Store.data);
3947 addInstr(env, X86Instr_FpLdSt(False/*store*/, 8, r, am));
3948 return;
3949 }
3950 if (tyd == Ity_F32) {
3951 X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
3952 HReg r = iselFltExpr(env, stmt->Ist.Store.data);
3953 addInstr(env, X86Instr_FpLdSt(False/*store*/, 4, r, am));
3954 return;
3955 }
3956 if (tyd == Ity_I64) {
3957 HReg vHi, vLo, rA;
3958 iselInt64Expr(&vHi, &vLo, env, stmt->Ist.Store.data);
3959 rA = iselIntExpr_R(env, stmt->Ist.Store.addr);
3960 addInstr(env, X86Instr_Alu32M(
3961 Xalu_MOV, X86RI_Reg(vLo), X86AMode_IR(0, rA)));
3962 addInstr(env, X86Instr_Alu32M(
3963 Xalu_MOV, X86RI_Reg(vHi), X86AMode_IR(4, rA)));
3964 return;
3965 }
3966 if (tyd == Ity_V128) {
3967 X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
3968 HReg r = iselVecExpr(env, stmt->Ist.Store.data);
3969 addInstr(env, X86Instr_SseLdSt(False/*store*/, r, am));
3970 return;
3971 }
3972 break;
3973 }
3974
3975 /* --------- PUT --------- */
3976 case Ist_Put: {
3977 IRType ty = typeOfIRExpr(env->type_env, stmt->Ist.Put.data);
3978 if (ty == Ity_I32) {
3979 /* We're going to write to memory, so compute the RHS into an
3980 X86RI. */
3981 X86RI* ri = iselIntExpr_RI(env, stmt->Ist.Put.data);
3982 addInstr(env,
3983 X86Instr_Alu32M(
3984 Xalu_MOV,
3985 ri,
3986 X86AMode_IR(stmt->Ist.Put.offset,hregX86_EBP())
3987 ));
3988 return;
3989 }
3990 if (ty == Ity_I8 || ty == Ity_I16) {
3991 HReg r = iselIntExpr_R(env, stmt->Ist.Put.data);
3992 addInstr(env, X86Instr_Store(
3993 toUChar(ty==Ity_I8 ? 1 : 2),
3994 r,
3995 X86AMode_IR(stmt->Ist.Put.offset,
3996 hregX86_EBP())));
3997 return;
3998 }
3999 if (ty == Ity_I64) {
4000 HReg vHi, vLo;
4001 X86AMode* am = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP());
4002 X86AMode* am4 = advance4(am);
4003 iselInt64Expr(&vHi, &vLo, env, stmt->Ist.Put.data);
4004 addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(vLo), am ));
4005 addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(vHi), am4 ));
4006 return;
4007 }
4008 if (ty == Ity_V128) {
4009 HReg vec = iselVecExpr(env, stmt->Ist.Put.data);
4010 X86AMode* am = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP());
4011 addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, am));
4012 return;
4013 }
4014 if (ty == Ity_F32) {
4015 HReg f32 = iselFltExpr(env, stmt->Ist.Put.data);
4016 X86AMode* am = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP());
4017 set_FPU_rounding_default(env); /* paranoia */
4018 addInstr(env, X86Instr_FpLdSt( False/*store*/, 4, f32, am ));
4019 return;
4020 }
4021 if (ty == Ity_F64) {
4022 HReg f64 = iselDblExpr(env, stmt->Ist.Put.data);
4023 X86AMode* am = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP());
4024 set_FPU_rounding_default(env); /* paranoia */
4025 addInstr(env, X86Instr_FpLdSt( False/*store*/, 8, f64, am ));
4026 return;
4027 }
4028 break;
4029 }
4030
4031 /* --------- Indexed PUT --------- */
4032 case Ist_PutI: {
4033 IRPutI *puti = stmt->Ist.PutI.details;
4034
4035 X86AMode* am
4036 = genGuestArrayOffset(
4037 env, puti->descr,
4038 puti->ix, puti->bias );
4039
4040 IRType ty = typeOfIRExpr(env->type_env, puti->data);
4041 if (ty == Ity_F64) {
4042 HReg val = iselDblExpr(env, puti->data);
4043 addInstr(env, X86Instr_FpLdSt( False/*store*/, 8, val, am ));
4044 return;
4045 }
4046 if (ty == Ity_I8) {
4047 HReg r = iselIntExpr_R(env, puti->data);
4048 addInstr(env, X86Instr_Store( 1, r, am ));
4049 return;
4050 }
4051 if (ty == Ity_I32) {
4052 HReg r = iselIntExpr_R(env, puti->data);
4053 addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(r), am ));
4054 return;
4055 }
4056 if (ty == Ity_I64) {
4057 HReg rHi, rLo;
4058 X86AMode* am4 = advance4(am);
4059 iselInt64Expr(&rHi, &rLo, env, puti->data);
4060 addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(rLo), am ));
4061 addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(rHi), am4 ));
4062 return;
4063 }
4064 break;
4065 }
4066
4067 /* --------- TMP --------- */
4068 case Ist_WrTmp: {
4069 IRTemp tmp = stmt->Ist.WrTmp.tmp;
4070 IRType ty = typeOfIRTemp(env->type_env, tmp);
4071
4072 /* optimisation: if stmt->Ist.WrTmp.data is Add32(..,..),
4073 compute it into an AMode and then use LEA. This usually
4074 produces fewer instructions, often because (for memcheck
4075 created IR) we get t = address-expression, (t is later used
4076 twice) and so doing this naturally turns address-expression
4077 back into an X86 amode. */
4078 if (ty == Ity_I32
4079 && stmt->Ist.WrTmp.data->tag == Iex_Binop
4080 && stmt->Ist.WrTmp.data->Iex.Binop.op == Iop_Add32) {
4081 X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.WrTmp.data);
4082 HReg dst = lookupIRTemp(env, tmp);
4083 if (am->tag == Xam_IR && am->Xam.IR.imm == 0) {
4084 /* Hmm, iselIntExpr_AMode wimped out and just computed the
4085 value into a register. Just emit a normal reg-reg move
4086 so reg-alloc can coalesce it away in the usual way. */
4087 HReg src = am->Xam.IR.reg;
4088 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Reg(src), dst));
4089 } else {
4090 addInstr(env, X86Instr_Lea32(am,dst));
4091 }
4092 return;
4093 }
4094
4095 if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
4096 X86RMI* rmi = iselIntExpr_RMI(env, stmt->Ist.WrTmp.data);
4097 HReg dst = lookupIRTemp(env, tmp);
4098 addInstr(env, X86Instr_Alu32R(Xalu_MOV,rmi,dst));
4099 return;
4100 }
4101 if (ty == Ity_I64) {
4102 HReg rHi, rLo, dstHi, dstLo;
4103 iselInt64Expr(&rHi,&rLo, env, stmt->Ist.WrTmp.data);
4104 lookupIRTemp64( &dstHi, &dstLo, env, tmp);
4105 addInstr(env, mk_iMOVsd_RR(rHi,dstHi) );
4106 addInstr(env, mk_iMOVsd_RR(rLo,dstLo) );
4107 return;
4108 }
4109 if (ty == Ity_I1) {
4110 X86CondCode cond = iselCondCode(env, stmt->Ist.WrTmp.data);
4111 HReg dst = lookupIRTemp(env, tmp);
4112 addInstr(env, X86Instr_Set32(cond, dst));
4113 return;
4114 }
4115 if (ty == Ity_F64) {
4116 HReg dst = lookupIRTemp(env, tmp);
4117 HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data);
4118 addInstr(env, X86Instr_FpUnary(Xfp_MOV,src,dst));
4119 return;
4120 }
4121 if (ty == Ity_F32) {
4122 HReg dst = lookupIRTemp(env, tmp);
4123 HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data);
4124 addInstr(env, X86Instr_FpUnary(Xfp_MOV,src,dst));
4125 return;
4126 }
4127 if (ty == Ity_V128) {
4128 HReg dst = lookupIRTemp(env, tmp);
4129 HReg src = iselVecExpr(env, stmt->Ist.WrTmp.data);
4130 addInstr(env, mk_vMOVsd_RR(src,dst));
4131 return;
4132 }
4133 break;
4134 }
4135
4136 /* --------- Call to DIRTY helper --------- */
4137 case Ist_Dirty: {
4138 IRDirty* d = stmt->Ist.Dirty.details;
4139
4140 /* Figure out the return type, if any. */
4141 IRType retty = Ity_INVALID;
4142 if (d->tmp != IRTemp_INVALID)
4143 retty = typeOfIRTemp(env->type_env, d->tmp);
4144
4145 Bool retty_ok = False;
4146 switch (retty) {
4147 case Ity_INVALID: /* function doesn't return anything */
4148 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
4149 case Ity_V128:
4150 retty_ok = True; break;
4151 default:
4152 break;
4153 }
4154 if (!retty_ok)
4155 break; /* will go to stmt_fail: */
4156
4157 /* Marshal args, do the call, and set the return value to
4158 0x555..555 if this is a conditional call that returns a value
4159 and the call is skipped. */
4160 UInt addToSp = 0;
4161 RetLoc rloc = mk_RetLoc_INVALID();
4162 doHelperCall( &addToSp, &rloc, env, d->guard, d->cee, retty, d->args );
4163 vassert(is_sane_RetLoc(rloc));
4164
4165 /* Now figure out what to do with the returned value, if any. */
4166 switch (retty) {
4167 case Ity_INVALID: {
4168 /* No return value. Nothing to do. */
4169 vassert(d->tmp == IRTemp_INVALID);
4170 vassert(rloc.pri == RLPri_None);
4171 vassert(addToSp == 0);
4172 return;
4173 }
4174 case Ity_I32: case Ity_I16: case Ity_I8: {
4175 /* The returned value is in %eax. Park it in the register
4176 associated with tmp. */
4177 vassert(rloc.pri == RLPri_Int);
4178 vassert(addToSp == 0);
4179 HReg dst = lookupIRTemp(env, d->tmp);
4180 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(),dst) );
4181 return;
4182 }
4183 case Ity_I64: {
4184 /* The returned value is in %edx:%eax. Park it in the
4185 register-pair associated with tmp. */
4186 vassert(rloc.pri == RLPri_2Int);
4187 vassert(addToSp == 0);
4188 HReg dstHi, dstLo;
4189 lookupIRTemp64( &dstHi, &dstLo, env, d->tmp);
4190 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(),dstHi) );
4191 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(),dstLo) );
4192 return;
4193 }
4194 case Ity_V128: {
4195 /* The returned value is on the stack, and *retloc tells
4196 us where. Fish it off the stack and then move the
4197 stack pointer upwards to clear it, as directed by
4198 doHelperCall. */
4199 vassert(rloc.pri == RLPri_V128SpRel);
4200 vassert(addToSp >= 16);
4201 HReg dst = lookupIRTemp(env, d->tmp);
4202 X86AMode* am = X86AMode_IR(rloc.spOff, hregX86_ESP());
4203 addInstr(env, X86Instr_SseLdSt( True/*load*/, dst, am ));
4204 add_to_esp(env, addToSp);
4205 return;
4206 }
4207 default:
4208 /*NOTREACHED*/
4209 vassert(0);
4210 }
4211 break;
4212 }
4213
4214 /* --------- MEM FENCE --------- */
4215 case Ist_MBE:
4216 switch (stmt->Ist.MBE.event) {
4217 case Imbe_Fence:
4218 addInstr(env, X86Instr_MFence(env->hwcaps));
4219 return;
4220 default:
4221 break;
4222 }
4223 break;
4224
4225 /* --------- ACAS --------- */
4226 case Ist_CAS:
4227 if (stmt->Ist.CAS.details->oldHi == IRTemp_INVALID) {
4228 /* "normal" singleton CAS */
4229 UChar sz;
4230 IRCAS* cas = stmt->Ist.CAS.details;
4231 IRType ty = typeOfIRExpr(env->type_env, cas->dataLo);
4232 /* get: cas->expdLo into %eax, and cas->dataLo into %ebx */
4233 X86AMode* am = iselIntExpr_AMode(env, cas->addr);
4234 HReg rDataLo = iselIntExpr_R(env, cas->dataLo);
4235 HReg rExpdLo = iselIntExpr_R(env, cas->expdLo);
4236 HReg rOldLo = lookupIRTemp(env, cas->oldLo);
4237 vassert(cas->expdHi == NULL);
4238 vassert(cas->dataHi == NULL);
4239 addInstr(env, mk_iMOVsd_RR(rExpdLo, rOldLo));
4240 addInstr(env, mk_iMOVsd_RR(rExpdLo, hregX86_EAX()));
4241 addInstr(env, mk_iMOVsd_RR(rDataLo, hregX86_EBX()));
4242 switch (ty) {
4243 case Ity_I32: sz = 4; break;
4244 case Ity_I16: sz = 2; break;
4245 case Ity_I8: sz = 1; break;
4246 default: goto unhandled_cas;
4247 }
4248 addInstr(env, X86Instr_ACAS(am, sz));
4249 addInstr(env,
4250 X86Instr_CMov32(Xcc_NZ,
4251 X86RM_Reg(hregX86_EAX()), rOldLo));
4252 return;
4253 } else {
4254 /* double CAS */
4255 IRCAS* cas = stmt->Ist.CAS.details;
4256 IRType ty = typeOfIRExpr(env->type_env, cas->dataLo);
4257 /* only 32-bit allowed in this case */
4258 /* get: cas->expdLo into %eax, and cas->dataLo into %ebx */
4259 /* get: cas->expdHi into %edx, and cas->dataHi into %ecx */
4260 X86AMode* am = iselIntExpr_AMode(env, cas->addr);
4261 HReg rDataHi = iselIntExpr_R(env, cas->dataHi);
4262 HReg rDataLo = iselIntExpr_R(env, cas->dataLo);
4263 HReg rExpdHi = iselIntExpr_R(env, cas->expdHi);
4264 HReg rExpdLo = iselIntExpr_R(env, cas->expdLo);
4265 HReg rOldHi = lookupIRTemp(env, cas->oldHi);
4266 HReg rOldLo = lookupIRTemp(env, cas->oldLo);
4267 if (ty != Ity_I32)
4268 goto unhandled_cas;
4269 addInstr(env, mk_iMOVsd_RR(rExpdHi, rOldHi));
4270 addInstr(env, mk_iMOVsd_RR(rExpdLo, rOldLo));
4271 addInstr(env, mk_iMOVsd_RR(rExpdHi, hregX86_EDX()));
4272 addInstr(env, mk_iMOVsd_RR(rExpdLo, hregX86_EAX()));
4273 addInstr(env, mk_iMOVsd_RR(rDataHi, hregX86_ECX()));
4274 addInstr(env, mk_iMOVsd_RR(rDataLo, hregX86_EBX()));
4275 addInstr(env, X86Instr_DACAS(am));
4276 addInstr(env,
4277 X86Instr_CMov32(Xcc_NZ,
4278 X86RM_Reg(hregX86_EDX()), rOldHi));
4279 addInstr(env,
4280 X86Instr_CMov32(Xcc_NZ,
4281 X86RM_Reg(hregX86_EAX()), rOldLo));
4282 return;
4283 }
4284 unhandled_cas:
4285 break;
4286
4287 /* --------- INSTR MARK --------- */
4288 /* Doesn't generate any executable code ... */
4289 case Ist_IMark:
4290 return;
4291
4292 /* --------- NO-OP --------- */
4293 /* Fairly self-explanatory, wouldn't you say? */
4294 case Ist_NoOp:
4295 return;
4296
4297 /* --------- EXIT --------- */
4298 case Ist_Exit: {
4299 if (stmt->Ist.Exit.dst->tag != Ico_U32)
4300 vpanic("iselStmt(x86): Ist_Exit: dst is not a 32-bit value");
4301
4302 X86CondCode cc = iselCondCode(env, stmt->Ist.Exit.guard);
4303 X86AMode* amEIP = X86AMode_IR(stmt->Ist.Exit.offsIP,
4304 hregX86_EBP());
4305
4306 /* Case: boring transfer to known address */
4307 if (stmt->Ist.Exit.jk == Ijk_Boring) {
4308 if (env->chainingAllowed) {
4309 /* .. almost always true .. */
4310 /* Skip the event check at the dst if this is a forwards
4311 edge. */
4312 Bool toFastEP
4313 = ((Addr32)stmt->Ist.Exit.dst->Ico.U32) > env->max_ga;
4314 if (0) vex_printf("%s", toFastEP ? "Y" : ",");
4315 addInstr(env, X86Instr_XDirect(stmt->Ist.Exit.dst->Ico.U32,
4316 amEIP, cc, toFastEP));
4317 } else {
4318 /* .. very occasionally .. */
4319 /* We can't use chaining, so ask for an assisted transfer,
4320 as that's the only alternative that is allowable. */
4321 HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
4322 addInstr(env, X86Instr_XAssisted(r, amEIP, cc, Ijk_Boring));
4323 }
4324 return;
4325 }
4326
4327 /* Case: assisted transfer to arbitrary address */
4328 switch (stmt->Ist.Exit.jk) {
4329 /* Keep this list in sync with that in iselNext below */
4330 case Ijk_ClientReq:
4331 case Ijk_EmWarn:
4332 case Ijk_MapFail:
4333 case Ijk_NoDecode:
4334 case Ijk_NoRedir:
4335 case Ijk_SigSEGV:
4336 case Ijk_SigTRAP:
4337 case Ijk_Sys_int128:
4338 case Ijk_Sys_int129:
4339 case Ijk_Sys_int130:
4340 case Ijk_Sys_int145:
4341 case Ijk_Sys_int210:
4342 case Ijk_Sys_syscall:
4343 case Ijk_Sys_sysenter:
4344 case Ijk_InvalICache:
4345 case Ijk_Yield:
4346 {
4347 HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
4348 addInstr(env, X86Instr_XAssisted(r, amEIP, cc, stmt->Ist.Exit.jk));
4349 return;
4350 }
4351 default:
4352 break;
4353 }
4354
4355 /* Do we ever expect to see any other kind? */
4356 goto stmt_fail;
4357 }
4358
4359 default: break;
4360 }
4361 stmt_fail:
4362 ppIRStmt(stmt);
4363 vpanic("iselStmt");
4364 }
4365
4366
4367 /*---------------------------------------------------------*/
4368 /*--- ISEL: Basic block terminators (Nexts) ---*/
4369 /*---------------------------------------------------------*/
4370
iselNext(ISelEnv * env,IRExpr * next,IRJumpKind jk,Int offsIP)4371 static void iselNext ( ISelEnv* env,
4372 IRExpr* next, IRJumpKind jk, Int offsIP )
4373 {
4374 if (vex_traceflags & VEX_TRACE_VCODE) {
4375 vex_printf( "\n-- PUT(%d) = ", offsIP);
4376 ppIRExpr( next );
4377 vex_printf( "; exit-");
4378 ppIRJumpKind(jk);
4379 vex_printf( "\n");
4380 }
4381
4382 /* Case: boring transfer to known address */
4383 if (next->tag == Iex_Const) {
4384 IRConst* cdst = next->Iex.Const.con;
4385 vassert(cdst->tag == Ico_U32);
4386 if (jk == Ijk_Boring || jk == Ijk_Call) {
4387 /* Boring transfer to known address */
4388 X86AMode* amEIP = X86AMode_IR(offsIP, hregX86_EBP());
4389 if (env->chainingAllowed) {
4390 /* .. almost always true .. */
4391 /* Skip the event check at the dst if this is a forwards
4392 edge. */
4393 Bool toFastEP
4394 = ((Addr32)cdst->Ico.U32) > env->max_ga;
4395 if (0) vex_printf("%s", toFastEP ? "X" : ".");
4396 addInstr(env, X86Instr_XDirect(cdst->Ico.U32,
4397 amEIP, Xcc_ALWAYS,
4398 toFastEP));
4399 } else {
4400 /* .. very occasionally .. */
4401 /* We can't use chaining, so ask for an assisted transfer,
4402 as that's the only alternative that is allowable. */
4403 HReg r = iselIntExpr_R(env, next);
4404 addInstr(env, X86Instr_XAssisted(r, amEIP, Xcc_ALWAYS,
4405 Ijk_Boring));
4406 }
4407 return;
4408 }
4409 }
4410
4411 /* Case: call/return (==boring) transfer to any address */
4412 switch (jk) {
4413 case Ijk_Boring: case Ijk_Ret: case Ijk_Call: {
4414 HReg r = iselIntExpr_R(env, next);
4415 X86AMode* amEIP = X86AMode_IR(offsIP, hregX86_EBP());
4416 if (env->chainingAllowed) {
4417 addInstr(env, X86Instr_XIndir(r, amEIP, Xcc_ALWAYS));
4418 } else {
4419 addInstr(env, X86Instr_XAssisted(r, amEIP, Xcc_ALWAYS,
4420 Ijk_Boring));
4421 }
4422 return;
4423 }
4424 default:
4425 break;
4426 }
4427
4428 /* Case: assisted transfer to arbitrary address */
4429 switch (jk) {
4430 /* Keep this list in sync with that for Ist_Exit above */
4431 case Ijk_ClientReq:
4432 case Ijk_EmWarn:
4433 case Ijk_MapFail:
4434 case Ijk_NoDecode:
4435 case Ijk_NoRedir:
4436 case Ijk_SigSEGV:
4437 case Ijk_SigTRAP:
4438 case Ijk_Sys_int128:
4439 case Ijk_Sys_int129:
4440 case Ijk_Sys_int130:
4441 case Ijk_Sys_int145:
4442 case Ijk_Sys_int210:
4443 case Ijk_Sys_syscall:
4444 case Ijk_Sys_sysenter:
4445 case Ijk_InvalICache:
4446 case Ijk_Yield:
4447 {
4448 HReg r = iselIntExpr_R(env, next);
4449 X86AMode* amEIP = X86AMode_IR(offsIP, hregX86_EBP());
4450 addInstr(env, X86Instr_XAssisted(r, amEIP, Xcc_ALWAYS, jk));
4451 return;
4452 }
4453 default:
4454 break;
4455 }
4456
4457 vex_printf( "\n-- PUT(%d) = ", offsIP);
4458 ppIRExpr( next );
4459 vex_printf( "; exit-");
4460 ppIRJumpKind(jk);
4461 vex_printf( "\n");
4462 vassert(0); // are we expecting any other kind?
4463 }
4464
4465
4466 /*---------------------------------------------------------*/
4467 /*--- Insn selector top-level ---*/
4468 /*---------------------------------------------------------*/
4469
4470 /* Translate an entire SB to x86 code. */
4471
iselSB_X86(const IRSB * bb,VexArch arch_host,const VexArchInfo * archinfo_host,const VexAbiInfo * vbi,Int offs_Host_EvC_Counter,Int offs_Host_EvC_FailAddr,Bool chainingAllowed,Bool addProfInc,Addr max_ga)4472 HInstrArray* iselSB_X86 ( const IRSB* bb,
4473 VexArch arch_host,
4474 const VexArchInfo* archinfo_host,
4475 const VexAbiInfo* vbi/*UNUSED*/,
4476 Int offs_Host_EvC_Counter,
4477 Int offs_Host_EvC_FailAddr,
4478 Bool chainingAllowed,
4479 Bool addProfInc,
4480 Addr max_ga )
4481 {
4482 Int i, j;
4483 HReg hreg, hregHI;
4484 ISelEnv* env;
4485 UInt hwcaps_host = archinfo_host->hwcaps;
4486 X86AMode *amCounter, *amFailAddr;
4487
4488 /* sanity ... */
4489 vassert(arch_host == VexArchX86);
4490 vassert(0 == (hwcaps_host
4491 & ~(VEX_HWCAPS_X86_MMXEXT
4492 | VEX_HWCAPS_X86_SSE1
4493 | VEX_HWCAPS_X86_SSE2
4494 | VEX_HWCAPS_X86_SSE3
4495 | VEX_HWCAPS_X86_LZCNT)));
4496
4497 /* Check that the host's endianness is as expected. */
4498 vassert(archinfo_host->endness == VexEndnessLE);
4499
4500 /* Make up an initial environment to use. */
4501 env = LibVEX_Alloc_inline(sizeof(ISelEnv));
4502 env->vreg_ctr = 0;
4503
4504 /* Set up output code array. */
4505 env->code = newHInstrArray();
4506
4507 /* Copy BB's type env. */
4508 env->type_env = bb->tyenv;
4509
4510 /* Make up an IRTemp -> virtual HReg mapping. This doesn't
4511 change as we go along. */
4512 env->n_vregmap = bb->tyenv->types_used;
4513 env->vregmap = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
4514 env->vregmapHI = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
4515
4516 /* and finally ... */
4517 env->chainingAllowed = chainingAllowed;
4518 env->hwcaps = hwcaps_host;
4519 env->max_ga = max_ga;
4520
4521 /* For each IR temporary, allocate a suitably-kinded virtual
4522 register. */
4523 j = 0;
4524 for (i = 0; i < env->n_vregmap; i++) {
4525 hregHI = hreg = INVALID_HREG;
4526 switch (bb->tyenv->types[i]) {
4527 case Ity_I1:
4528 case Ity_I8:
4529 case Ity_I16:
4530 case Ity_I32: hreg = mkHReg(True, HRcInt32, 0, j++); break;
4531 case Ity_I64: hreg = mkHReg(True, HRcInt32, 0, j++);
4532 hregHI = mkHReg(True, HRcInt32, 0, j++); break;
4533 case Ity_F32:
4534 case Ity_F64: hreg = mkHReg(True, HRcFlt64, 0, j++); break;
4535 case Ity_V128: hreg = mkHReg(True, HRcVec128, 0, j++); break;
4536 default: ppIRType(bb->tyenv->types[i]);
4537 vpanic("iselBB: IRTemp type");
4538 }
4539 env->vregmap[i] = hreg;
4540 env->vregmapHI[i] = hregHI;
4541 }
4542 env->vreg_ctr = j;
4543
4544 /* The very first instruction must be an event check. */
4545 amCounter = X86AMode_IR(offs_Host_EvC_Counter, hregX86_EBP());
4546 amFailAddr = X86AMode_IR(offs_Host_EvC_FailAddr, hregX86_EBP());
4547 addInstr(env, X86Instr_EvCheck(amCounter, amFailAddr));
4548
4549 /* Possibly a block counter increment (for profiling). At this
4550 point we don't know the address of the counter, so just pretend
4551 it is zero. It will have to be patched later, but before this
4552 translation is used, by a call to LibVEX_patchProfCtr. */
4553 if (addProfInc) {
4554 addInstr(env, X86Instr_ProfInc());
4555 }
4556
4557 /* Ok, finally we can iterate over the statements. */
4558 for (i = 0; i < bb->stmts_used; i++)
4559 iselStmt(env, bb->stmts[i]);
4560
4561 iselNext(env, bb->next, bb->jumpkind, bb->offsIP);
4562
4563 /* record the number of vregs we used. */
4564 env->code->n_vregs = env->vreg_ctr;
4565 return env->code;
4566 }
4567
4568
4569 /*---------------------------------------------------------------*/
4570 /*--- end host_x86_isel.c ---*/
4571 /*---------------------------------------------------------------*/
4572