1 
2 /*---------------------------------------------------------------*/
3 /*--- begin                                   host_x86_isel.c ---*/
4 /*---------------------------------------------------------------*/
5 
6 /*
7    This file is part of Valgrind, a dynamic binary instrumentation
8    framework.
9 
10    Copyright (C) 2004-2017 OpenWorks LLP
11       info@open-works.net
12 
13    This program is free software; you can redistribute it and/or
14    modify it under the terms of the GNU General Public License as
15    published by the Free Software Foundation; either version 2 of the
16    License, or (at your option) any later version.
17 
18    This program is distributed in the hope that it will be useful, but
19    WITHOUT ANY WARRANTY; without even the implied warranty of
20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21    General Public License for more details.
22 
23    You should have received a copy of the GNU General Public License
24    along with this program; if not, write to the Free Software
25    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26    02110-1301, USA.
27 
28    The GNU General Public License is contained in the file COPYING.
29 
30    Neither the names of the U.S. Department of Energy nor the
31    University of California nor the names of its contributors may be
32    used to endorse or promote products derived from this software
33    without prior written permission.
34 */
35 
36 #include "libvex_basictypes.h"
37 #include "libvex_ir.h"
38 #include "libvex.h"
39 
40 #include "ir_match.h"
41 #include "main_util.h"
42 #include "main_globals.h"
43 #include "host_generic_regs.h"
44 #include "host_generic_simd64.h"
45 #include "host_generic_simd128.h"
46 #include "host_x86_defs.h"
47 
48 /* TODO 21 Apr 2005:
49 
50    -- (Really an assembler issue) don't emit CMov32 as a cmov
51       insn, since that's expensive on P4 and conditional branch
52       is cheaper if (as we expect) the condition is highly predictable
53 
54    -- preserve xmm registers across function calls (by declaring them
55       as trashed by call insns)
56 
57    -- preserve x87 ST stack discipline across function calls.  Sigh.
58 
59    -- Check doHelperCall: if a call is conditional, we cannot safely
60       compute any regparm args directly to registers.  Hence, the
61       fast-regparm marshalling should be restricted to unconditional
62       calls only.
63 */
64 
65 /*---------------------------------------------------------*/
66 /*--- x87 control word stuff                            ---*/
67 /*---------------------------------------------------------*/
68 
69 /* Vex-generated code expects to run with the FPU set as follows: all
70    exceptions masked, round-to-nearest, precision = 53 bits.  This
71    corresponds to a FPU control word value of 0x027F.
72 
73    Similarly the SSE control word (%mxcsr) should be 0x1F80.
74 
75    %fpucw and %mxcsr should have these values on entry to
76    Vex-generated code, and should those values should be
77    unchanged at exit.
78 */
79 
80 #define DEFAULT_FPUCW 0x027F
81 
82 /* debugging only, do not use */
83 /* define DEFAULT_FPUCW 0x037F */
84 
85 
86 /*---------------------------------------------------------*/
87 /*--- misc helpers                                      ---*/
88 /*---------------------------------------------------------*/
89 
90 /* These are duplicated in guest-x86/toIR.c */
unop(IROp op,IRExpr * a)91 static IRExpr* unop ( IROp op, IRExpr* a )
92 {
93    return IRExpr_Unop(op, a);
94 }
95 
binop(IROp op,IRExpr * a1,IRExpr * a2)96 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
97 {
98    return IRExpr_Binop(op, a1, a2);
99 }
100 
bind(Int binder)101 static IRExpr* bind ( Int binder )
102 {
103    return IRExpr_Binder(binder);
104 }
105 
isZeroU8(IRExpr * e)106 static Bool isZeroU8 ( IRExpr* e )
107 {
108    return e->tag == Iex_Const
109           && e->Iex.Const.con->tag == Ico_U8
110           && e->Iex.Const.con->Ico.U8 == 0;
111 }
112 
isZeroU32(IRExpr * e)113 static Bool isZeroU32 ( IRExpr* e )
114 {
115    return e->tag == Iex_Const
116           && e->Iex.Const.con->tag == Ico_U32
117           && e->Iex.Const.con->Ico.U32 == 0;
118 }
119 
120 //static Bool isZeroU64 ( IRExpr* e )
121 //{
122 //   return e->tag == Iex_Const
123 //          && e->Iex.Const.con->tag == Ico_U64
124 //          && e->Iex.Const.con->Ico.U64 == 0ULL;
125 //}
126 
127 
128 /*---------------------------------------------------------*/
129 /*--- ISelEnv                                           ---*/
130 /*---------------------------------------------------------*/
131 
132 /* This carries around:
133 
134    - A mapping from IRTemp to IRType, giving the type of any IRTemp we
135      might encounter.  This is computed before insn selection starts,
136      and does not change.
137 
138    - A mapping from IRTemp to HReg.  This tells the insn selector
139      which virtual register(s) are associated with each IRTemp
140      temporary.  This is computed before insn selection starts, and
141      does not change.  We expect this mapping to map precisely the
142      same set of IRTemps as the type mapping does.
143 
144         - vregmap   holds the primary register for the IRTemp.
145         - vregmapHI is only used for 64-bit integer-typed
146              IRTemps.  It holds the identity of a second
147              32-bit virtual HReg, which holds the high half
148              of the value.
149 
150    - The code array, that is, the insns selected so far.
151 
152    - A counter, for generating new virtual registers.
153 
154    - The host subarchitecture we are selecting insns for.
155      This is set at the start and does not change.
156 
157    - A Bool for indicating whether we may generate chain-me
158      instructions for control flow transfers, or whether we must use
159      XAssisted.
160 
161    - The maximum guest address of any guest insn in this block.
162      Actually, the address of the highest-addressed byte from any insn
163      in this block.  Is set at the start and does not change.  This is
164      used for detecting jumps which are definitely forward-edges from
165      this block, and therefore can be made (chained) to the fast entry
166      point of the destination, thereby avoiding the destination's
167      event check.
168 
169    Note, this is all (well, mostly) host-independent.
170 */
171 
172 typedef
173    struct {
174       /* Constant -- are set at the start and do not change. */
175       IRTypeEnv*   type_env;
176 
177       HReg*        vregmap;
178       HReg*        vregmapHI;
179       Int          n_vregmap;
180 
181       UInt         hwcaps;
182 
183       Bool         chainingAllowed;
184       Addr32       max_ga;
185 
186       /* These are modified as we go along. */
187       HInstrArray* code;
188       Int          vreg_ctr;
189    }
190    ISelEnv;
191 
192 
lookupIRTemp(ISelEnv * env,IRTemp tmp)193 static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp )
194 {
195    vassert(tmp >= 0);
196    vassert(tmp < env->n_vregmap);
197    return env->vregmap[tmp];
198 }
199 
lookupIRTemp64(HReg * vrHI,HReg * vrLO,ISelEnv * env,IRTemp tmp)200 static void lookupIRTemp64 ( HReg* vrHI, HReg* vrLO, ISelEnv* env, IRTemp tmp )
201 {
202    vassert(tmp >= 0);
203    vassert(tmp < env->n_vregmap);
204    vassert(! hregIsInvalid(env->vregmapHI[tmp]));
205    *vrLO = env->vregmap[tmp];
206    *vrHI = env->vregmapHI[tmp];
207 }
208 
addInstr(ISelEnv * env,X86Instr * instr)209 static void addInstr ( ISelEnv* env, X86Instr* instr )
210 {
211    addHInstr(env->code, instr);
212    if (vex_traceflags & VEX_TRACE_VCODE) {
213       ppX86Instr(instr, False);
214       vex_printf("\n");
215    }
216 }
217 
newVRegI(ISelEnv * env)218 static HReg newVRegI ( ISelEnv* env )
219 {
220    HReg reg = mkHReg(True/*virtual reg*/, HRcInt32, 0/*enc*/, env->vreg_ctr);
221    env->vreg_ctr++;
222    return reg;
223 }
224 
newVRegF(ISelEnv * env)225 static HReg newVRegF ( ISelEnv* env )
226 {
227    HReg reg = mkHReg(True/*virtual reg*/, HRcFlt64, 0/*enc*/, env->vreg_ctr);
228    env->vreg_ctr++;
229    return reg;
230 }
231 
newVRegV(ISelEnv * env)232 static HReg newVRegV ( ISelEnv* env )
233 {
234    HReg reg = mkHReg(True/*virtual reg*/, HRcVec128, 0/*enc*/, env->vreg_ctr);
235    env->vreg_ctr++;
236    return reg;
237 }
238 
239 
240 /*---------------------------------------------------------*/
241 /*--- ISEL: Forward declarations                        ---*/
242 /*---------------------------------------------------------*/
243 
244 /* These are organised as iselXXX and iselXXX_wrk pairs.  The
245    iselXXX_wrk do the real work, but are not to be called directly.
246    For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
247    checks that all returned registers are virtual.  You should not
248    call the _wrk version directly.
249 */
250 static X86RMI*     iselIntExpr_RMI_wrk ( ISelEnv* env, const IRExpr* e );
251 static X86RMI*     iselIntExpr_RMI     ( ISelEnv* env, const IRExpr* e );
252 
253 static X86RI*      iselIntExpr_RI_wrk ( ISelEnv* env, const IRExpr* e );
254 static X86RI*      iselIntExpr_RI     ( ISelEnv* env, const IRExpr* e );
255 
256 static X86RM*      iselIntExpr_RM_wrk ( ISelEnv* env, const IRExpr* e );
257 static X86RM*      iselIntExpr_RM     ( ISelEnv* env, const IRExpr* e );
258 
259 static HReg        iselIntExpr_R_wrk ( ISelEnv* env, const IRExpr* e );
260 static HReg        iselIntExpr_R     ( ISelEnv* env, const IRExpr* e );
261 
262 static X86AMode*   iselIntExpr_AMode_wrk ( ISelEnv* env, const IRExpr* e );
263 static X86AMode*   iselIntExpr_AMode     ( ISelEnv* env, const IRExpr* e );
264 
265 static void        iselInt64Expr_wrk ( HReg* rHi, HReg* rLo,
266                                        ISelEnv* env, const IRExpr* e );
267 static void        iselInt64Expr     ( HReg* rHi, HReg* rLo,
268                                        ISelEnv* env, const IRExpr* e );
269 
270 static X86CondCode iselCondCode_wrk ( ISelEnv* env, const IRExpr* e );
271 static X86CondCode iselCondCode     ( ISelEnv* env, const IRExpr* e );
272 
273 static HReg        iselDblExpr_wrk ( ISelEnv* env, const IRExpr* e );
274 static HReg        iselDblExpr     ( ISelEnv* env, const IRExpr* e );
275 
276 static HReg        iselFltExpr_wrk ( ISelEnv* env, const IRExpr* e );
277 static HReg        iselFltExpr     ( ISelEnv* env, const IRExpr* e );
278 
279 static HReg        iselVecExpr_wrk ( ISelEnv* env, const IRExpr* e );
280 static HReg        iselVecExpr     ( ISelEnv* env, const IRExpr* e );
281 
282 
283 /*---------------------------------------------------------*/
284 /*--- ISEL: Misc helpers                                ---*/
285 /*---------------------------------------------------------*/
286 
287 /* Make a int reg-reg move. */
288 
mk_iMOVsd_RR(HReg src,HReg dst)289 static X86Instr* mk_iMOVsd_RR ( HReg src, HReg dst )
290 {
291    vassert(hregClass(src) == HRcInt32);
292    vassert(hregClass(dst) == HRcInt32);
293    return X86Instr_Alu32R(Xalu_MOV, X86RMI_Reg(src), dst);
294 }
295 
296 
297 /* Make a vector reg-reg move. */
298 
mk_vMOVsd_RR(HReg src,HReg dst)299 static X86Instr* mk_vMOVsd_RR ( HReg src, HReg dst )
300 {
301    vassert(hregClass(src) == HRcVec128);
302    vassert(hregClass(dst) == HRcVec128);
303    return X86Instr_SseReRg(Xsse_MOV, src, dst);
304 }
305 
306 /* Advance/retreat %esp by n. */
307 
add_to_esp(ISelEnv * env,Int n)308 static void add_to_esp ( ISelEnv* env, Int n )
309 {
310    vassert(n > 0 && n < 256 && (n%4) == 0);
311    addInstr(env,
312             X86Instr_Alu32R(Xalu_ADD, X86RMI_Imm(n), hregX86_ESP()));
313 }
314 
sub_from_esp(ISelEnv * env,Int n)315 static void sub_from_esp ( ISelEnv* env, Int n )
316 {
317    vassert(n > 0 && n < 256 && (n%4) == 0);
318    addInstr(env,
319             X86Instr_Alu32R(Xalu_SUB, X86RMI_Imm(n), hregX86_ESP()));
320 }
321 
322 
323 /* Given an amode, return one which references 4 bytes further
324    along. */
325 
advance4(X86AMode * am)326 static X86AMode* advance4 ( X86AMode* am )
327 {
328    X86AMode* am4 = dopyX86AMode(am);
329    switch (am4->tag) {
330       case Xam_IRRS:
331          am4->Xam.IRRS.imm += 4; break;
332       case Xam_IR:
333          am4->Xam.IR.imm += 4; break;
334       default:
335          vpanic("advance4(x86,host)");
336    }
337    return am4;
338 }
339 
340 
341 /* Push an arg onto the host stack, in preparation for a call to a
342    helper function of some kind.  Returns the number of 32-bit words
343    pushed.  If we encounter an IRExpr_VECRET() then we expect that
344    r_vecRetAddr will be a valid register, that holds the relevant
345    address.
346 */
pushArg(ISelEnv * env,IRExpr * arg,HReg r_vecRetAddr)347 static Int pushArg ( ISelEnv* env, IRExpr* arg, HReg r_vecRetAddr )
348 {
349    if (UNLIKELY(arg->tag == Iex_VECRET)) {
350       vassert(0); //ATC
351       vassert(!hregIsInvalid(r_vecRetAddr));
352       addInstr(env, X86Instr_Push(X86RMI_Reg(r_vecRetAddr)));
353       return 1;
354    }
355    if (UNLIKELY(arg->tag == Iex_GSPTR)) {
356       addInstr(env, X86Instr_Push(X86RMI_Reg(hregX86_EBP())));
357       return 1;
358    }
359    /* Else it's a "normal" expression. */
360    IRType arg_ty = typeOfIRExpr(env->type_env, arg);
361    if (arg_ty == Ity_I32) {
362       addInstr(env, X86Instr_Push(iselIntExpr_RMI(env, arg)));
363       return 1;
364    } else
365    if (arg_ty == Ity_I64) {
366       HReg rHi, rLo;
367       iselInt64Expr(&rHi, &rLo, env, arg);
368       addInstr(env, X86Instr_Push(X86RMI_Reg(rHi)));
369       addInstr(env, X86Instr_Push(X86RMI_Reg(rLo)));
370       return 2;
371    }
372    ppIRExpr(arg);
373    vpanic("pushArg(x86): can't handle arg of this type");
374 }
375 
376 
377 /* Complete the call to a helper function, by calling the
378    helper and clearing the args off the stack. */
379 
380 static
callHelperAndClearArgs(ISelEnv * env,X86CondCode cc,IRCallee * cee,Int n_arg_ws,RetLoc rloc)381 void callHelperAndClearArgs ( ISelEnv* env, X86CondCode cc,
382                               IRCallee* cee, Int n_arg_ws,
383                               RetLoc rloc )
384 {
385    /* Complication.  Need to decide which reg to use as the fn address
386       pointer, in a way that doesn't trash regparm-passed
387       parameters. */
388    vassert(sizeof(void*) == 4);
389 
390    addInstr(env, X86Instr_Call( cc, (Addr)cee->addr,
391                                 cee->regparms, rloc));
392    if (n_arg_ws > 0)
393       add_to_esp(env, 4*n_arg_ws);
394 }
395 
396 
397 /* Used only in doHelperCall.  See big comment in doHelperCall re
398    handling of regparm args.  This function figures out whether
399    evaluation of an expression might require use of a fixed register.
400    If in doubt return True (safe but suboptimal).
401 */
402 static
mightRequireFixedRegs(IRExpr * e)403 Bool mightRequireFixedRegs ( IRExpr* e )
404 {
405    if (UNLIKELY(is_IRExpr_VECRET_or_GSPTR(e))) {
406       // These are always "safe" -- either a copy of %esp in some
407       // arbitrary vreg, or a copy of %ebp, respectively.
408       return False;
409    }
410    /* Else it's a "normal" expression. */
411    switch (e->tag) {
412       case Iex_RdTmp: case Iex_Const: case Iex_Get:
413          return False;
414       default:
415          return True;
416    }
417 }
418 
419 
420 /* Do a complete function call.  |guard| is a Ity_Bit expression
421    indicating whether or not the call happens.  If guard==NULL, the
422    call is unconditional.  |retloc| is set to indicate where the
423    return value is after the call.  The caller (of this fn) must
424    generate code to add |stackAdjustAfterCall| to the stack pointer
425    after the call is done. */
426 
427 static
doHelperCall(UInt * stackAdjustAfterCall,RetLoc * retloc,ISelEnv * env,IRExpr * guard,IRCallee * cee,IRType retTy,IRExpr ** args)428 void doHelperCall ( /*OUT*/UInt*   stackAdjustAfterCall,
429                     /*OUT*/RetLoc* retloc,
430                     ISelEnv* env,
431                     IRExpr* guard,
432                     IRCallee* cee, IRType retTy, IRExpr** args )
433 {
434    X86CondCode cc;
435    HReg        argregs[3];
436    HReg        tmpregs[3];
437    Bool        danger;
438    Int         not_done_yet, n_args, n_arg_ws, stack_limit,
439                i, argreg, argregX;
440 
441    /* Set default returns.  We'll update them later if needed. */
442    *stackAdjustAfterCall = 0;
443    *retloc               = mk_RetLoc_INVALID();
444 
445    /* These are used for cross-checking that IR-level constraints on
446       the use of Iex_VECRET and Iex_GSPTR are observed. */
447    UInt nVECRETs = 0;
448    UInt nGSPTRs  = 0;
449 
450    /* Marshal args for a call, do the call, and clear the stack.
451       Complexities to consider:
452 
453       * The return type can be I{64,32,16,8} or V128.  In the V128
454         case, it is expected that |args| will contain the special
455         node IRExpr_VECRET(), in which case this routine generates
456         code to allocate space on the stack for the vector return
457         value.  Since we are not passing any scalars on the stack, it
458         is enough to preallocate the return space before marshalling
459         any arguments, in this case.
460 
461         |args| may also contain IRExpr_GSPTR(), in which case the
462         value in %ebp is passed as the corresponding argument.
463 
464       * If the callee claims regparmness of 1, 2 or 3, we must pass the
465         first 1, 2 or 3 args in registers (EAX, EDX, and ECX
466         respectively).  To keep things relatively simple, only args of
467         type I32 may be passed as regparms -- just bomb out if anything
468         else turns up.  Clearly this depends on the front ends not
469         trying to pass any other types as regparms.
470    */
471 
472    /* 16 Nov 2004: the regparm handling is complicated by the
473       following problem.
474 
475       Consider a call two a function with two regparm parameters:
476       f(e1,e2).  We need to compute e1 into %eax and e2 into %edx.
477       Suppose code is first generated to compute e1 into %eax.  Then,
478       code is generated to compute e2 into %edx.  Unfortunately, if
479       the latter code sequence uses %eax, it will trash the value of
480       e1 computed by the former sequence.  This could happen if (for
481       example) e2 itself involved a function call.  In the code below,
482       args are evaluated right-to-left, not left-to-right, but the
483       principle and the problem are the same.
484 
485       One solution is to compute all regparm-bound args into vregs
486       first, and once they are all done, move them to the relevant
487       real regs.  This always gives correct code, but it also gives
488       a bunch of vreg-to-rreg moves which are usually redundant but
489       are hard for the register allocator to get rid of.
490 
491       A compromise is to first examine all regparm'd argument
492       expressions.  If they are all so simple that it is clear
493       they will be evaluated without use of any fixed registers,
494       use the old compute-directly-to-fixed-target scheme.  If not,
495       be safe and use the via-vregs scheme.
496 
497       Note this requires being able to examine an expression and
498       determine whether or not evaluation of it might use a fixed
499       register.  That requires knowledge of how the rest of this
500       insn selector works.  Currently just the following 3 are
501       regarded as safe -- hopefully they cover the majority of
502       arguments in practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
503    */
504    vassert(cee->regparms >= 0 && cee->regparms <= 3);
505 
506    /* Count the number of args and also the VECRETs */
507    n_args = n_arg_ws = 0;
508    while (args[n_args]) {
509       IRExpr* arg = args[n_args];
510       n_args++;
511       if (UNLIKELY(arg->tag == Iex_VECRET)) {
512          nVECRETs++;
513       } else if (UNLIKELY(arg->tag == Iex_GSPTR)) {
514          nGSPTRs++;
515       }
516    }
517 
518    /* If this fails, the IR is ill-formed */
519    vassert(nGSPTRs == 0 || nGSPTRs == 1);
520 
521    /* If we have a VECRET, allocate space on the stack for the return
522       value, and record the stack pointer after that. */
523    HReg r_vecRetAddr = INVALID_HREG;
524    if (nVECRETs == 1) {
525       vassert(retTy == Ity_V128 || retTy == Ity_V256);
526       vassert(retTy != Ity_V256); // we don't handle that yet (if ever)
527       r_vecRetAddr = newVRegI(env);
528       sub_from_esp(env, 16);
529       addInstr(env, mk_iMOVsd_RR( hregX86_ESP(), r_vecRetAddr ));
530    } else {
531       // If either of these fail, the IR is ill-formed
532       vassert(retTy != Ity_V128 && retTy != Ity_V256);
533       vassert(nVECRETs == 0);
534    }
535 
536    not_done_yet = n_args;
537 
538    stack_limit = cee->regparms;
539 
540    /* ------ BEGIN marshall all arguments ------ */
541 
542    /* Push (R to L) the stack-passed args, [n_args-1 .. stack_limit] */
543    for (i = n_args-1; i >= stack_limit; i--) {
544       n_arg_ws += pushArg(env, args[i], r_vecRetAddr);
545       not_done_yet--;
546    }
547 
548    /* args [stack_limit-1 .. 0] and possibly %ebp are to be passed in
549       registers. */
550 
551    if (cee->regparms > 0) {
552 
553       /* ------ BEGIN deal with regparms ------ */
554 
555       /* deal with regparms, not forgetting %ebp if needed. */
556       argregs[0] = hregX86_EAX();
557       argregs[1] = hregX86_EDX();
558       argregs[2] = hregX86_ECX();
559       tmpregs[0] = tmpregs[1] = tmpregs[2] = INVALID_HREG;
560 
561       argreg = cee->regparms;
562 
563       /* In keeping with big comment above, detect potential danger
564          and use the via-vregs scheme if needed. */
565       danger = False;
566       for (i = stack_limit-1; i >= 0; i--) {
567          if (mightRequireFixedRegs(args[i])) {
568             danger = True;
569             break;
570          }
571       }
572 
573       if (danger) {
574 
575          /* Move via temporaries */
576          argregX = argreg;
577          for (i = stack_limit-1; i >= 0; i--) {
578 
579             if (0) {
580                vex_printf("x86 host: register param is complex: ");
581                ppIRExpr(args[i]);
582                vex_printf("\n");
583             }
584 
585             IRExpr* arg = args[i];
586             argreg--;
587             vassert(argreg >= 0);
588             if (UNLIKELY(arg->tag == Iex_VECRET)) {
589                vassert(0); //ATC
590             }
591             else if (UNLIKELY(arg->tag == Iex_GSPTR)) {
592                vassert(0); //ATC
593             } else {
594                vassert(typeOfIRExpr(env->type_env, arg) == Ity_I32);
595                tmpregs[argreg] = iselIntExpr_R(env, arg);
596             }
597             not_done_yet--;
598          }
599          for (i = stack_limit-1; i >= 0; i--) {
600             argregX--;
601             vassert(argregX >= 0);
602             addInstr( env, mk_iMOVsd_RR( tmpregs[argregX], argregs[argregX] ) );
603          }
604 
605       } else {
606          /* It's safe to compute all regparm args directly into their
607             target registers. */
608          for (i = stack_limit-1; i >= 0; i--) {
609             IRExpr* arg = args[i];
610             argreg--;
611             vassert(argreg >= 0);
612             if (UNLIKELY(arg->tag == Iex_VECRET)) {
613                vassert(!hregIsInvalid(r_vecRetAddr));
614                addInstr(env, X86Instr_Alu32R(Xalu_MOV,
615                                              X86RMI_Reg(r_vecRetAddr),
616                                              argregs[argreg]));
617             }
618             else if (UNLIKELY(arg->tag == Iex_GSPTR)) {
619                vassert(0); //ATC
620             } else {
621                vassert(typeOfIRExpr(env->type_env, arg) == Ity_I32);
622                addInstr(env, X86Instr_Alu32R(Xalu_MOV,
623                                              iselIntExpr_RMI(env, arg),
624                                              argregs[argreg]));
625             }
626             not_done_yet--;
627          }
628 
629       }
630 
631       /* ------ END deal with regparms ------ */
632 
633    }
634 
635    vassert(not_done_yet == 0);
636 
637    /* ------ END marshall all arguments ------ */
638 
639    /* Now we can compute the condition.  We can't do it earlier
640       because the argument computations could trash the condition
641       codes.  Be a bit clever to handle the common case where the
642       guard is 1:Bit. */
643    cc = Xcc_ALWAYS;
644    if (guard) {
645       if (guard->tag == Iex_Const
646           && guard->Iex.Const.con->tag == Ico_U1
647           && guard->Iex.Const.con->Ico.U1 == True) {
648          /* unconditional -- do nothing */
649       } else {
650          cc = iselCondCode( env, guard );
651       }
652    }
653 
654    /* Do final checks, set the return values, and generate the call
655       instruction proper. */
656    vassert(*stackAdjustAfterCall == 0);
657    vassert(is_RetLoc_INVALID(*retloc));
658    switch (retTy) {
659          case Ity_INVALID:
660             /* Function doesn't return a value. */
661             *retloc = mk_RetLoc_simple(RLPri_None);
662             break;
663          case Ity_I64:
664             *retloc = mk_RetLoc_simple(RLPri_2Int);
665             break;
666          case Ity_I32: case Ity_I16: case Ity_I8:
667             *retloc = mk_RetLoc_simple(RLPri_Int);
668             break;
669          case Ity_V128:
670             *retloc = mk_RetLoc_spRel(RLPri_V128SpRel, 0);
671             *stackAdjustAfterCall = 16;
672             break;
673          case Ity_V256:
674             vassert(0); // ATC
675             *retloc = mk_RetLoc_spRel(RLPri_V256SpRel, 0);
676             *stackAdjustAfterCall = 32;
677             break;
678          default:
679             /* IR can denote other possible return types, but we don't
680                handle those here. */
681            vassert(0);
682    }
683 
684    /* Finally, generate the call itself.  This needs the *retloc value
685       set in the switch above, which is why it's at the end. */
686    callHelperAndClearArgs( env, cc, cee, n_arg_ws, *retloc );
687 }
688 
689 
690 /* Given a guest-state array descriptor, an index expression and a
691    bias, generate an X86AMode holding the relevant guest state
692    offset. */
693 
694 static
genGuestArrayOffset(ISelEnv * env,IRRegArray * descr,IRExpr * off,Int bias)695 X86AMode* genGuestArrayOffset ( ISelEnv* env, IRRegArray* descr,
696                                 IRExpr* off, Int bias )
697 {
698    HReg tmp, roff;
699    Int  elemSz = sizeofIRType(descr->elemTy);
700    Int  nElems = descr->nElems;
701    Int  shift  = 0;
702 
703    /* throw out any cases not generated by an x86 front end.  In
704       theory there might be a day where we need to handle them -- if
705       we ever run non-x86-guest on x86 host. */
706 
707    if (nElems != 8)
708       vpanic("genGuestArrayOffset(x86 host)(1)");
709 
710    switch (elemSz) {
711       case 1:  shift = 0; break;
712       case 4:  shift = 2; break;
713       case 8:  shift = 3; break;
714       default: vpanic("genGuestArrayOffset(x86 host)(2)");
715    }
716 
717    /* Compute off into a reg, %off.  Then return:
718 
719          movl %off, %tmp
720          addl $bias, %tmp  (if bias != 0)
721          andl %tmp, 7
722          ... base(%ebp, %tmp, shift) ...
723    */
724    tmp  = newVRegI(env);
725    roff = iselIntExpr_R(env, off);
726    addInstr(env, mk_iMOVsd_RR(roff, tmp));
727    if (bias != 0) {
728       addInstr(env,
729                X86Instr_Alu32R(Xalu_ADD, X86RMI_Imm(bias), tmp));
730    }
731    addInstr(env,
732             X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(7), tmp));
733    return
734       X86AMode_IRRS( descr->base, hregX86_EBP(), tmp, shift );
735 }
736 
737 
738 /* Mess with the FPU's rounding mode: set to the default rounding mode
739    (DEFAULT_FPUCW). */
740 static
set_FPU_rounding_default(ISelEnv * env)741 void set_FPU_rounding_default ( ISelEnv* env )
742 {
743    /* pushl $DEFAULT_FPUCW
744       fldcw 0(%esp)
745       addl $4, %esp
746    */
747    X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
748    addInstr(env, X86Instr_Push(X86RMI_Imm(DEFAULT_FPUCW)));
749    addInstr(env, X86Instr_FpLdCW(zero_esp));
750    add_to_esp(env, 4);
751 }
752 
753 
754 /* Mess with the FPU's rounding mode: 'mode' is an I32-typed
755    expression denoting a value in the range 0 .. 3, indicating a round
756    mode encoded as per type IRRoundingMode.  Set the x87 FPU to have
757    the same rounding.
758 */
759 static
set_FPU_rounding_mode(ISelEnv * env,IRExpr * mode)760 void set_FPU_rounding_mode ( ISelEnv* env, IRExpr* mode )
761 {
762    HReg rrm  = iselIntExpr_R(env, mode);
763    HReg rrm2 = newVRegI(env);
764    X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
765 
766    /* movl  %rrm, %rrm2
767       andl  $3, %rrm2   -- shouldn't be needed; paranoia
768       shll  $10, %rrm2
769       orl   $DEFAULT_FPUCW, %rrm2
770       pushl %rrm2
771       fldcw 0(%esp)
772       addl  $4, %esp
773    */
774    addInstr(env, mk_iMOVsd_RR(rrm, rrm2));
775    addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(3), rrm2));
776    addInstr(env, X86Instr_Sh32(Xsh_SHL, 10, rrm2));
777    addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Imm(DEFAULT_FPUCW), rrm2));
778    addInstr(env, X86Instr_Push(X86RMI_Reg(rrm2)));
779    addInstr(env, X86Instr_FpLdCW(zero_esp));
780    add_to_esp(env, 4);
781 }
782 
783 
784 /* Generate !src into a new vector register, and be sure that the code
785    is SSE1 compatible.  Amazing that Intel doesn't offer a less crappy
786    way to do this.
787 */
do_sse_Not128(ISelEnv * env,HReg src)788 static HReg do_sse_Not128 ( ISelEnv* env, HReg src )
789 {
790    HReg dst = newVRegV(env);
791    /* Set dst to zero.  If dst contains a NaN then all hell might
792       break loose after the comparison.  So, first zero it. */
793    addInstr(env, X86Instr_SseReRg(Xsse_XOR, dst, dst));
794    /* And now make it all 1s ... */
795    addInstr(env, X86Instr_Sse32Fx4(Xsse_CMPEQF, dst, dst));
796    /* Finally, xor 'src' into it. */
797    addInstr(env, X86Instr_SseReRg(Xsse_XOR, src, dst));
798    /* Doesn't that just totally suck? */
799    return dst;
800 }
801 
802 
803 /* Round an x87 FPU value to 53-bit-mantissa precision, to be used
804    after most non-simple FPU operations (simple = +, -, *, / and
805    sqrt).
806 
807    This could be done a lot more efficiently if needed, by loading
808    zero and adding it to the value to be rounded (fldz ; faddp?).
809 */
roundToF64(ISelEnv * env,HReg reg)810 static void roundToF64 ( ISelEnv* env, HReg reg )
811 {
812    X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
813    sub_from_esp(env, 8);
814    addInstr(env, X86Instr_FpLdSt(False/*store*/, 8, reg, zero_esp));
815    addInstr(env, X86Instr_FpLdSt(True/*load*/, 8, reg, zero_esp));
816    add_to_esp(env, 8);
817 }
818 
819 
820 /*---------------------------------------------------------*/
821 /*--- ISEL: Integer expressions (32/16/8 bit)           ---*/
822 /*---------------------------------------------------------*/
823 
824 /* Select insns for an integer-typed expression, and add them to the
825    code list.  Return a reg holding the result.  This reg will be a
826    virtual register.  THE RETURNED REG MUST NOT BE MODIFIED.  If you
827    want to modify it, ask for a new vreg, copy it in there, and modify
828    the copy.  The register allocator will do its best to map both
829    vregs to the same real register, so the copies will often disappear
830    later in the game.
831 
832    This should handle expressions of 32, 16 and 8-bit type.  All
833    results are returned in a 32-bit register.  For 16- and 8-bit
834    expressions, the upper 16/24 bits are arbitrary, so you should mask
835    or sign extend partial values if necessary.
836 */
837 
iselIntExpr_R(ISelEnv * env,const IRExpr * e)838 static HReg iselIntExpr_R ( ISelEnv* env, const IRExpr* e )
839 {
840    HReg r = iselIntExpr_R_wrk(env, e);
841    /* sanity checks ... */
842 #  if 0
843    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
844 #  endif
845    vassert(hregClass(r) == HRcInt32);
846    vassert(hregIsVirtual(r));
847    return r;
848 }
849 
850 /* DO NOT CALL THIS DIRECTLY ! */
iselIntExpr_R_wrk(ISelEnv * env,const IRExpr * e)851 static HReg iselIntExpr_R_wrk ( ISelEnv* env, const IRExpr* e )
852 {
853    MatchInfo mi;
854 
855    IRType ty = typeOfIRExpr(env->type_env,e);
856    vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
857 
858    switch (e->tag) {
859 
860    /* --------- TEMP --------- */
861    case Iex_RdTmp: {
862       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
863    }
864 
865    /* --------- LOAD --------- */
866    case Iex_Load: {
867       HReg dst = newVRegI(env);
868       X86AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr );
869 
870       /* We can't handle big-endian loads, nor load-linked. */
871       if (e->Iex.Load.end != Iend_LE)
872          goto irreducible;
873 
874       if (ty == Ity_I32) {
875          addInstr(env, X86Instr_Alu32R(Xalu_MOV,
876                                        X86RMI_Mem(amode), dst) );
877          return dst;
878       }
879       if (ty == Ity_I16) {
880          addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
881          return dst;
882       }
883       if (ty == Ity_I8) {
884          addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
885          return dst;
886       }
887       break;
888    }
889 
890    /* --------- TERNARY OP --------- */
891    case Iex_Triop: {
892       IRTriop *triop = e->Iex.Triop.details;
893       /* C3210 flags following FPU partial remainder (fprem), both
894          IEEE compliant (PREM1) and non-IEEE compliant (PREM). */
895       if (triop->op == Iop_PRemC3210F64
896           || triop->op == Iop_PRem1C3210F64) {
897          HReg junk = newVRegF(env);
898          HReg dst  = newVRegI(env);
899          HReg srcL = iselDblExpr(env, triop->arg2);
900          HReg srcR = iselDblExpr(env, triop->arg3);
901          /* XXXROUNDINGFIXME */
902          /* set roundingmode here */
903          addInstr(env, X86Instr_FpBinary(
904                            e->Iex.Binop.op==Iop_PRemC3210F64
905                               ? Xfp_PREM : Xfp_PREM1,
906                            srcL,srcR,junk
907                  ));
908          /* The previous pseudo-insn will have left the FPU's C3210
909             flags set correctly.  So bag them. */
910          addInstr(env, X86Instr_FpStSW_AX());
911          addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst));
912          addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0x4700), dst));
913          return dst;
914       }
915 
916       break;
917    }
918 
919    /* --------- BINARY OP --------- */
920    case Iex_Binop: {
921       X86AluOp   aluOp;
922       X86ShiftOp shOp;
923 
924       /* Pattern: Sub32(0,x) */
925       if (e->Iex.Binop.op == Iop_Sub32 && isZeroU32(e->Iex.Binop.arg1)) {
926          HReg dst = newVRegI(env);
927          HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg2);
928          addInstr(env, mk_iMOVsd_RR(reg,dst));
929          addInstr(env, X86Instr_Unary32(Xun_NEG,dst));
930          return dst;
931       }
932 
933       /* Is it an addition or logical style op? */
934       switch (e->Iex.Binop.op) {
935          case Iop_Add8: case Iop_Add16: case Iop_Add32:
936             aluOp = Xalu_ADD; break;
937          case Iop_Sub8: case Iop_Sub16: case Iop_Sub32:
938             aluOp = Xalu_SUB; break;
939          case Iop_And8: case Iop_And16: case Iop_And32:
940             aluOp = Xalu_AND; break;
941          case Iop_Or8: case Iop_Or16: case Iop_Or32:
942             aluOp = Xalu_OR; break;
943          case Iop_Xor8: case Iop_Xor16: case Iop_Xor32:
944             aluOp = Xalu_XOR; break;
945          case Iop_Mul16: case Iop_Mul32:
946             aluOp = Xalu_MUL; break;
947          default:
948             aluOp = Xalu_INVALID; break;
949       }
950       /* For commutative ops we assume any literal
951          values are on the second operand. */
952       if (aluOp != Xalu_INVALID) {
953          HReg dst    = newVRegI(env);
954          HReg reg    = iselIntExpr_R(env, e->Iex.Binop.arg1);
955          X86RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
956          addInstr(env, mk_iMOVsd_RR(reg,dst));
957          addInstr(env, X86Instr_Alu32R(aluOp, rmi, dst));
958          return dst;
959       }
960       /* Could do better here; forcing the first arg into a reg
961          isn't always clever.
962          -- t70 = Xor32(And32(Xor32(LDle:I32(Add32(t41,0xFFFFFFA0:I32)),
963                         LDle:I32(Add32(t41,0xFFFFFFA4:I32))),LDle:I32(Add32(
964                         t41,0xFFFFFFA8:I32))),LDle:I32(Add32(t41,0xFFFFFFA0:I32)))
965             movl 0xFFFFFFA0(%vr41),%vr107
966             movl 0xFFFFFFA4(%vr41),%vr108
967             movl %vr107,%vr106
968             xorl %vr108,%vr106
969             movl 0xFFFFFFA8(%vr41),%vr109
970             movl %vr106,%vr105
971             andl %vr109,%vr105
972             movl 0xFFFFFFA0(%vr41),%vr110
973             movl %vr105,%vr104
974             xorl %vr110,%vr104
975             movl %vr104,%vr70
976       */
977 
978       /* Perhaps a shift op? */
979       switch (e->Iex.Binop.op) {
980          case Iop_Shl32: case Iop_Shl16: case Iop_Shl8:
981             shOp = Xsh_SHL; break;
982          case Iop_Shr32: case Iop_Shr16: case Iop_Shr8:
983             shOp = Xsh_SHR; break;
984          case Iop_Sar32: case Iop_Sar16: case Iop_Sar8:
985             shOp = Xsh_SAR; break;
986          default:
987             shOp = Xsh_INVALID; break;
988       }
989       if (shOp != Xsh_INVALID) {
990          HReg dst = newVRegI(env);
991 
992          /* regL = the value to be shifted */
993          HReg regL   = iselIntExpr_R(env, e->Iex.Binop.arg1);
994          addInstr(env, mk_iMOVsd_RR(regL,dst));
995 
996          /* Do any necessary widening for 16/8 bit operands */
997          switch (e->Iex.Binop.op) {
998             case Iop_Shr8:
999                addInstr(env, X86Instr_Alu32R(
1000                                 Xalu_AND, X86RMI_Imm(0xFF), dst));
1001                break;
1002             case Iop_Shr16:
1003                addInstr(env, X86Instr_Alu32R(
1004                                 Xalu_AND, X86RMI_Imm(0xFFFF), dst));
1005                break;
1006             case Iop_Sar8:
1007                addInstr(env, X86Instr_Sh32(Xsh_SHL, 24, dst));
1008                addInstr(env, X86Instr_Sh32(Xsh_SAR, 24, dst));
1009                break;
1010             case Iop_Sar16:
1011                addInstr(env, X86Instr_Sh32(Xsh_SHL, 16, dst));
1012                addInstr(env, X86Instr_Sh32(Xsh_SAR, 16, dst));
1013                break;
1014             default: break;
1015          }
1016 
1017          /* Now consider the shift amount.  If it's a literal, we
1018             can do a much better job than the general case. */
1019          if (e->Iex.Binop.arg2->tag == Iex_Const) {
1020             /* assert that the IR is well-typed */
1021             Int nshift;
1022             vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1023             nshift = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1024 	    vassert(nshift >= 0);
1025 	    if (nshift > 0)
1026                /* Can't allow nshift==0 since that means %cl */
1027                addInstr(env, X86Instr_Sh32( shOp, nshift, dst ));
1028          } else {
1029             /* General case; we have to force the amount into %cl. */
1030             HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1031             addInstr(env, mk_iMOVsd_RR(regR,hregX86_ECX()));
1032             addInstr(env, X86Instr_Sh32(shOp, 0/* %cl */, dst));
1033          }
1034          return dst;
1035       }
1036 
1037       /* Handle misc other ops. */
1038 
1039       if (e->Iex.Binop.op == Iop_Max32U) {
1040          HReg src1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1041          HReg dst  = newVRegI(env);
1042          HReg src2 = iselIntExpr_R(env, e->Iex.Binop.arg2);
1043          addInstr(env, mk_iMOVsd_RR(src1,dst));
1044          addInstr(env, X86Instr_Alu32R(Xalu_CMP, X86RMI_Reg(src2), dst));
1045          addInstr(env, X86Instr_CMov32(Xcc_B, X86RM_Reg(src2), dst));
1046          return dst;
1047       }
1048 
1049       if (e->Iex.Binop.op == Iop_8HLto16) {
1050          HReg hi8  = newVRegI(env);
1051          HReg lo8  = newVRegI(env);
1052          HReg hi8s = iselIntExpr_R(env, e->Iex.Binop.arg1);
1053          HReg lo8s = iselIntExpr_R(env, e->Iex.Binop.arg2);
1054          addInstr(env, mk_iMOVsd_RR(hi8s, hi8));
1055          addInstr(env, mk_iMOVsd_RR(lo8s, lo8));
1056          addInstr(env, X86Instr_Sh32(Xsh_SHL, 8, hi8));
1057          addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0xFF), lo8));
1058          addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(lo8), hi8));
1059          return hi8;
1060       }
1061 
1062       if (e->Iex.Binop.op == Iop_16HLto32) {
1063          HReg hi16  = newVRegI(env);
1064          HReg lo16  = newVRegI(env);
1065          HReg hi16s = iselIntExpr_R(env, e->Iex.Binop.arg1);
1066          HReg lo16s = iselIntExpr_R(env, e->Iex.Binop.arg2);
1067          addInstr(env, mk_iMOVsd_RR(hi16s, hi16));
1068          addInstr(env, mk_iMOVsd_RR(lo16s, lo16));
1069          addInstr(env, X86Instr_Sh32(Xsh_SHL, 16, hi16));
1070          addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0xFFFF), lo16));
1071          addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(lo16), hi16));
1072          return hi16;
1073       }
1074 
1075       if (e->Iex.Binop.op == Iop_MullS16 || e->Iex.Binop.op == Iop_MullS8
1076           || e->Iex.Binop.op == Iop_MullU16 || e->Iex.Binop.op == Iop_MullU8) {
1077          HReg a16   = newVRegI(env);
1078          HReg b16   = newVRegI(env);
1079          HReg a16s  = iselIntExpr_R(env, e->Iex.Binop.arg1);
1080          HReg b16s  = iselIntExpr_R(env, e->Iex.Binop.arg2);
1081          Int  shift = (e->Iex.Binop.op == Iop_MullS8
1082                        || e->Iex.Binop.op == Iop_MullU8)
1083                          ? 24 : 16;
1084          X86ShiftOp shr_op = (e->Iex.Binop.op == Iop_MullS8
1085                               || e->Iex.Binop.op == Iop_MullS16)
1086                                 ? Xsh_SAR : Xsh_SHR;
1087 
1088          addInstr(env, mk_iMOVsd_RR(a16s, a16));
1089          addInstr(env, mk_iMOVsd_RR(b16s, b16));
1090          addInstr(env, X86Instr_Sh32(Xsh_SHL, shift, a16));
1091          addInstr(env, X86Instr_Sh32(Xsh_SHL, shift, b16));
1092          addInstr(env, X86Instr_Sh32(shr_op,  shift, a16));
1093          addInstr(env, X86Instr_Sh32(shr_op,  shift, b16));
1094          addInstr(env, X86Instr_Alu32R(Xalu_MUL, X86RMI_Reg(a16), b16));
1095          return b16;
1096       }
1097 
1098       if (e->Iex.Binop.op == Iop_CmpF64) {
1099          HReg fL = iselDblExpr(env, e->Iex.Binop.arg1);
1100          HReg fR = iselDblExpr(env, e->Iex.Binop.arg2);
1101          HReg dst = newVRegI(env);
1102          addInstr(env, X86Instr_FpCmp(fL,fR,dst));
1103          /* shift this right 8 bits so as to conform to CmpF64
1104             definition. */
1105          addInstr(env, X86Instr_Sh32(Xsh_SHR, 8, dst));
1106          return dst;
1107       }
1108 
1109       if (e->Iex.Binop.op == Iop_F64toI32S
1110           || e->Iex.Binop.op == Iop_F64toI16S) {
1111          Int  sz  = e->Iex.Binop.op == Iop_F64toI16S ? 2 : 4;
1112          HReg rf  = iselDblExpr(env, e->Iex.Binop.arg2);
1113          HReg dst = newVRegI(env);
1114 
1115          /* Used several times ... */
1116          X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
1117 
1118 	 /* rf now holds the value to be converted, and rrm holds the
1119 	    rounding mode value, encoded as per the IRRoundingMode
1120 	    enum.  The first thing to do is set the FPU's rounding
1121 	    mode accordingly. */
1122 
1123          /* Create a space for the format conversion. */
1124          /* subl $4, %esp */
1125          sub_from_esp(env, 4);
1126 
1127 	 /* Set host rounding mode */
1128 	 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
1129 
1130          /* gistw/l %rf, 0(%esp) */
1131          addInstr(env, X86Instr_FpLdStI(False/*store*/,
1132                                         toUChar(sz), rf, zero_esp));
1133 
1134          if (sz == 2) {
1135             /* movzwl 0(%esp), %dst */
1136             addInstr(env, X86Instr_LoadEX(2,False,zero_esp,dst));
1137          } else {
1138             /* movl 0(%esp), %dst */
1139             vassert(sz == 4);
1140             addInstr(env, X86Instr_Alu32R(
1141                              Xalu_MOV, X86RMI_Mem(zero_esp), dst));
1142          }
1143 
1144 	 /* Restore default FPU rounding. */
1145          set_FPU_rounding_default( env );
1146 
1147          /* addl $4, %esp */
1148 	 add_to_esp(env, 4);
1149          return dst;
1150       }
1151 
1152       break;
1153    }
1154 
1155    /* --------- UNARY OP --------- */
1156    case Iex_Unop: {
1157 
1158       /* 1Uto8(32to1(expr32)) */
1159       if (e->Iex.Unop.op == Iop_1Uto8) {
1160          DECLARE_PATTERN(p_32to1_then_1Uto8);
1161          DEFINE_PATTERN(p_32to1_then_1Uto8,
1162                         unop(Iop_1Uto8,unop(Iop_32to1,bind(0))));
1163          if (matchIRExpr(&mi,p_32to1_then_1Uto8,e)) {
1164             const IRExpr* expr32 = mi.bindee[0];
1165             HReg dst = newVRegI(env);
1166             HReg src = iselIntExpr_R(env, expr32);
1167             addInstr(env, mk_iMOVsd_RR(src,dst) );
1168             addInstr(env, X86Instr_Alu32R(Xalu_AND,
1169                                           X86RMI_Imm(1), dst));
1170             return dst;
1171          }
1172       }
1173 
1174       /* 8Uto32(LDle(expr32)) */
1175       if (e->Iex.Unop.op == Iop_8Uto32) {
1176          DECLARE_PATTERN(p_LDle8_then_8Uto32);
1177          DEFINE_PATTERN(p_LDle8_then_8Uto32,
1178                         unop(Iop_8Uto32,
1179                              IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
1180          if (matchIRExpr(&mi,p_LDle8_then_8Uto32,e)) {
1181             HReg dst = newVRegI(env);
1182             X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1183             addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
1184             return dst;
1185          }
1186       }
1187 
1188       /* 8Sto32(LDle(expr32)) */
1189       if (e->Iex.Unop.op == Iop_8Sto32) {
1190          DECLARE_PATTERN(p_LDle8_then_8Sto32);
1191          DEFINE_PATTERN(p_LDle8_then_8Sto32,
1192                         unop(Iop_8Sto32,
1193                              IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
1194          if (matchIRExpr(&mi,p_LDle8_then_8Sto32,e)) {
1195             HReg dst = newVRegI(env);
1196             X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1197             addInstr(env, X86Instr_LoadEX(1,True,amode,dst));
1198             return dst;
1199          }
1200       }
1201 
1202       /* 16Uto32(LDle(expr32)) */
1203       if (e->Iex.Unop.op == Iop_16Uto32) {
1204          DECLARE_PATTERN(p_LDle16_then_16Uto32);
1205          DEFINE_PATTERN(p_LDle16_then_16Uto32,
1206                         unop(Iop_16Uto32,
1207                              IRExpr_Load(Iend_LE,Ity_I16,bind(0))) );
1208          if (matchIRExpr(&mi,p_LDle16_then_16Uto32,e)) {
1209             HReg dst = newVRegI(env);
1210             X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1211             addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
1212             return dst;
1213          }
1214       }
1215 
1216       /* 8Uto32(GET:I8) */
1217       if (e->Iex.Unop.op == Iop_8Uto32) {
1218          if (e->Iex.Unop.arg->tag == Iex_Get) {
1219             HReg      dst;
1220             X86AMode* amode;
1221             vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I8);
1222             dst = newVRegI(env);
1223             amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
1224                                 hregX86_EBP());
1225             addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
1226             return dst;
1227          }
1228       }
1229 
1230       /* 16to32(GET:I16) */
1231       if (e->Iex.Unop.op == Iop_16Uto32) {
1232          if (e->Iex.Unop.arg->tag == Iex_Get) {
1233             HReg      dst;
1234             X86AMode* amode;
1235             vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I16);
1236             dst = newVRegI(env);
1237             amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
1238                                 hregX86_EBP());
1239             addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
1240             return dst;
1241          }
1242       }
1243 
1244       switch (e->Iex.Unop.op) {
1245          case Iop_8Uto16:
1246          case Iop_8Uto32:
1247          case Iop_16Uto32: {
1248             HReg dst = newVRegI(env);
1249             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1250             UInt mask = e->Iex.Unop.op==Iop_16Uto32 ? 0xFFFF : 0xFF;
1251             addInstr(env, mk_iMOVsd_RR(src,dst) );
1252             addInstr(env, X86Instr_Alu32R(Xalu_AND,
1253                                           X86RMI_Imm(mask), dst));
1254             return dst;
1255          }
1256          case Iop_8Sto16:
1257          case Iop_8Sto32:
1258          case Iop_16Sto32: {
1259             HReg dst = newVRegI(env);
1260             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1261             UInt amt = e->Iex.Unop.op==Iop_16Sto32 ? 16 : 24;
1262             addInstr(env, mk_iMOVsd_RR(src,dst) );
1263             addInstr(env, X86Instr_Sh32(Xsh_SHL, amt, dst));
1264             addInstr(env, X86Instr_Sh32(Xsh_SAR, amt, dst));
1265             return dst;
1266          }
1267 	 case Iop_Not8:
1268 	 case Iop_Not16:
1269          case Iop_Not32: {
1270             HReg dst = newVRegI(env);
1271             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1272             addInstr(env, mk_iMOVsd_RR(src,dst) );
1273             addInstr(env, X86Instr_Unary32(Xun_NOT,dst));
1274             return dst;
1275          }
1276          case Iop_64HIto32: {
1277             HReg rHi, rLo;
1278             iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1279             return rHi; /* and abandon rLo .. poor wee thing :-) */
1280          }
1281          case Iop_64to32: {
1282             HReg rHi, rLo;
1283             iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1284             return rLo; /* similar stupid comment to the above ... */
1285          }
1286          case Iop_16HIto8:
1287          case Iop_32HIto16: {
1288             HReg dst  = newVRegI(env);
1289             HReg src  = iselIntExpr_R(env, e->Iex.Unop.arg);
1290             Int shift = e->Iex.Unop.op == Iop_16HIto8 ? 8 : 16;
1291             addInstr(env, mk_iMOVsd_RR(src,dst) );
1292             addInstr(env, X86Instr_Sh32(Xsh_SHR, shift, dst));
1293             return dst;
1294          }
1295          case Iop_1Uto32:
1296          case Iop_1Uto8: {
1297             HReg dst         = newVRegI(env);
1298             X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1299             addInstr(env, X86Instr_Set32(cond,dst));
1300             return dst;
1301          }
1302          case Iop_1Sto8:
1303          case Iop_1Sto16:
1304          case Iop_1Sto32: {
1305             /* could do better than this, but for now ... */
1306             HReg dst         = newVRegI(env);
1307             X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1308             addInstr(env, X86Instr_Set32(cond,dst));
1309             addInstr(env, X86Instr_Sh32(Xsh_SHL, 31, dst));
1310             addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, dst));
1311             return dst;
1312          }
1313          case Iop_Ctz32: {
1314             /* Count trailing zeroes, implemented by x86 'bsfl' */
1315             HReg dst = newVRegI(env);
1316             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1317             addInstr(env, X86Instr_Bsfr32(True,src,dst));
1318             return dst;
1319          }
1320          case Iop_Clz32: {
1321             /* Count leading zeroes.  Do 'bsrl' to establish the index
1322                of the highest set bit, and subtract that value from
1323                31. */
1324             HReg tmp = newVRegI(env);
1325             HReg dst = newVRegI(env);
1326             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1327             addInstr(env, X86Instr_Bsfr32(False,src,tmp));
1328             addInstr(env, X86Instr_Alu32R(Xalu_MOV,
1329                                           X86RMI_Imm(31), dst));
1330             addInstr(env, X86Instr_Alu32R(Xalu_SUB,
1331                                           X86RMI_Reg(tmp), dst));
1332             return dst;
1333          }
1334 
1335          case Iop_CmpwNEZ32: {
1336             HReg dst = newVRegI(env);
1337             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1338             addInstr(env, mk_iMOVsd_RR(src,dst));
1339             addInstr(env, X86Instr_Unary32(Xun_NEG,dst));
1340             addInstr(env, X86Instr_Alu32R(Xalu_OR,
1341                                           X86RMI_Reg(src), dst));
1342             addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, dst));
1343             return dst;
1344          }
1345          case Iop_Left8:
1346          case Iop_Left16:
1347          case Iop_Left32: {
1348             HReg dst = newVRegI(env);
1349             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1350             addInstr(env, mk_iMOVsd_RR(src, dst));
1351             addInstr(env, X86Instr_Unary32(Xun_NEG, dst));
1352             addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(src), dst));
1353             return dst;
1354          }
1355 
1356          case Iop_V128to32: {
1357             HReg      dst  = newVRegI(env);
1358             HReg      vec  = iselVecExpr(env, e->Iex.Unop.arg);
1359             X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
1360             sub_from_esp(env, 16);
1361             addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, esp0));
1362             addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(esp0), dst ));
1363             add_to_esp(env, 16);
1364             return dst;
1365          }
1366 
1367          /* ReinterpF32asI32(e) */
1368          /* Given an IEEE754 single, produce an I32 with the same bit
1369             pattern.  Keep stack 8-aligned even though only using 4
1370             bytes. */
1371          case Iop_ReinterpF32asI32: {
1372             HReg rf   = iselFltExpr(env, e->Iex.Unop.arg);
1373             HReg dst  = newVRegI(env);
1374             X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
1375             /* paranoia */
1376             set_FPU_rounding_default(env);
1377             /* subl $8, %esp */
1378             sub_from_esp(env, 8);
1379             /* gstF %rf, 0(%esp) */
1380             addInstr(env,
1381                      X86Instr_FpLdSt(False/*store*/, 4, rf, zero_esp));
1382             /* movl 0(%esp), %dst */
1383             addInstr(env,
1384                      X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(zero_esp), dst));
1385             /* addl $8, %esp */
1386             add_to_esp(env, 8);
1387             return dst;
1388          }
1389 
1390          case Iop_16to8:
1391          case Iop_32to8:
1392          case Iop_32to16:
1393             /* These are no-ops. */
1394             return iselIntExpr_R(env, e->Iex.Unop.arg);
1395 
1396          case Iop_GetMSBs8x8: {
1397             /* Note: the following assumes the helper is of
1398                signature
1399                   UInt fn ( ULong ), and is not a regparm fn.
1400             */
1401             HReg  xLo, xHi;
1402             HReg  dst = newVRegI(env);
1403             Addr fn = (Addr)h_generic_calc_GetMSBs8x8;
1404             iselInt64Expr(&xHi, &xLo, env, e->Iex.Unop.arg);
1405             addInstr(env, X86Instr_Push(X86RMI_Reg(xHi)));
1406             addInstr(env, X86Instr_Push(X86RMI_Reg(xLo)));
1407             addInstr(env, X86Instr_Call( Xcc_ALWAYS, (Addr32)fn,
1408                                          0, mk_RetLoc_simple(RLPri_Int) ));
1409             add_to_esp(env, 2*4);
1410             addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst));
1411             return dst;
1412          }
1413 
1414          default:
1415             break;
1416       }
1417       break;
1418    }
1419 
1420    /* --------- GET --------- */
1421    case Iex_Get: {
1422       if (ty == Ity_I32) {
1423          HReg dst = newVRegI(env);
1424          addInstr(env, X86Instr_Alu32R(
1425                           Xalu_MOV,
1426                           X86RMI_Mem(X86AMode_IR(e->Iex.Get.offset,
1427                                                  hregX86_EBP())),
1428                           dst));
1429          return dst;
1430       }
1431       if (ty == Ity_I8 || ty == Ity_I16) {
1432          HReg dst = newVRegI(env);
1433          addInstr(env, X86Instr_LoadEX(
1434                           toUChar(ty==Ity_I8 ? 1 : 2),
1435                           False,
1436                           X86AMode_IR(e->Iex.Get.offset,hregX86_EBP()),
1437                           dst));
1438          return dst;
1439       }
1440       break;
1441    }
1442 
1443    case Iex_GetI: {
1444       X86AMode* am
1445          = genGuestArrayOffset(
1446               env, e->Iex.GetI.descr,
1447                    e->Iex.GetI.ix, e->Iex.GetI.bias );
1448       HReg dst = newVRegI(env);
1449       if (ty == Ity_I8) {
1450          addInstr(env, X86Instr_LoadEX( 1, False, am, dst ));
1451          return dst;
1452       }
1453       if (ty == Ity_I32) {
1454          addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(am), dst));
1455          return dst;
1456       }
1457       break;
1458    }
1459 
1460    /* --------- CCALL --------- */
1461    case Iex_CCall: {
1462       HReg    dst = newVRegI(env);
1463       vassert(ty == e->Iex.CCall.retty);
1464 
1465       /* be very restrictive for now.  Only 32/64-bit ints allowed for
1466          args, and 32 bits for return type.  Don't forget to change
1467          the RetLoc if more return types are allowed in future. */
1468       if (e->Iex.CCall.retty != Ity_I32)
1469          goto irreducible;
1470 
1471       /* Marshal args, do the call, clear stack. */
1472       UInt   addToSp = 0;
1473       RetLoc rloc    = mk_RetLoc_INVALID();
1474       doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
1475                     e->Iex.CCall.cee, e->Iex.CCall.retty, e->Iex.CCall.args );
1476       vassert(is_sane_RetLoc(rloc));
1477       vassert(rloc.pri == RLPri_Int);
1478       vassert(addToSp == 0);
1479 
1480       addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst));
1481       return dst;
1482    }
1483 
1484    /* --------- LITERAL --------- */
1485    /* 32/16/8-bit literals */
1486    case Iex_Const: {
1487       X86RMI* rmi = iselIntExpr_RMI ( env, e );
1488       HReg    r   = newVRegI(env);
1489       addInstr(env, X86Instr_Alu32R(Xalu_MOV, rmi, r));
1490       return r;
1491    }
1492 
1493    /* --------- MULTIPLEX --------- */
1494    case Iex_ITE: { // VFD
1495      if ((ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8)
1496          && typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) {
1497         HReg   r1  = iselIntExpr_R(env, e->Iex.ITE.iftrue);
1498         X86RM* r0  = iselIntExpr_RM(env, e->Iex.ITE.iffalse);
1499         HReg   dst = newVRegI(env);
1500         addInstr(env, mk_iMOVsd_RR(r1,dst));
1501         X86CondCode cc = iselCondCode(env, e->Iex.ITE.cond);
1502         addInstr(env, X86Instr_CMov32(cc ^ 1, r0, dst));
1503         return dst;
1504       }
1505       break;
1506    }
1507 
1508    default:
1509    break;
1510    } /* switch (e->tag) */
1511 
1512    /* We get here if no pattern matched. */
1513   irreducible:
1514    ppIRExpr(e);
1515    vpanic("iselIntExpr_R: cannot reduce tree");
1516 }
1517 
1518 
1519 /*---------------------------------------------------------*/
1520 /*--- ISEL: Integer expression auxiliaries              ---*/
1521 /*---------------------------------------------------------*/
1522 
1523 /* --------------------- AMODEs --------------------- */
1524 
1525 /* Return an AMode which computes the value of the specified
1526    expression, possibly also adding insns to the code list as a
1527    result.  The expression may only be a 32-bit one.
1528 */
1529 
sane_AMode(X86AMode * am)1530 static Bool sane_AMode ( X86AMode* am )
1531 {
1532    switch (am->tag) {
1533       case Xam_IR:
1534          return
1535             toBool( hregClass(am->Xam.IR.reg) == HRcInt32
1536                     && (hregIsVirtual(am->Xam.IR.reg)
1537                         || sameHReg(am->Xam.IR.reg, hregX86_EBP())) );
1538       case Xam_IRRS:
1539          return
1540             toBool( hregClass(am->Xam.IRRS.base) == HRcInt32
1541                     && hregIsVirtual(am->Xam.IRRS.base)
1542                     && hregClass(am->Xam.IRRS.index) == HRcInt32
1543                     && hregIsVirtual(am->Xam.IRRS.index) );
1544       default:
1545         vpanic("sane_AMode: unknown x86 amode tag");
1546    }
1547 }
1548 
iselIntExpr_AMode(ISelEnv * env,const IRExpr * e)1549 static X86AMode* iselIntExpr_AMode ( ISelEnv* env, const IRExpr* e )
1550 {
1551    X86AMode* am = iselIntExpr_AMode_wrk(env, e);
1552    vassert(sane_AMode(am));
1553    return am;
1554 }
1555 
1556 /* DO NOT CALL THIS DIRECTLY ! */
iselIntExpr_AMode_wrk(ISelEnv * env,const IRExpr * e)1557 static X86AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, const IRExpr* e )
1558 {
1559    IRType ty = typeOfIRExpr(env->type_env,e);
1560    vassert(ty == Ity_I32);
1561 
1562    /* Add32( Add32(expr1, Shl32(expr2, simm)), imm32 ) */
1563    if (e->tag == Iex_Binop
1564        && e->Iex.Binop.op == Iop_Add32
1565        && e->Iex.Binop.arg2->tag == Iex_Const
1566        && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32
1567        && e->Iex.Binop.arg1->tag == Iex_Binop
1568        && e->Iex.Binop.arg1->Iex.Binop.op == Iop_Add32
1569        && e->Iex.Binop.arg1->Iex.Binop.arg2->tag == Iex_Binop
1570        && e->Iex.Binop.arg1->Iex.Binop.arg2->Iex.Binop.op == Iop_Shl32
1571        && e->Iex.Binop.arg1
1572            ->Iex.Binop.arg2->Iex.Binop.arg2->tag == Iex_Const
1573        && e->Iex.Binop.arg1
1574            ->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8) {
1575       UInt shift = e->Iex.Binop.arg1
1576                     ->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1577       UInt imm32 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
1578       if (shift == 1 || shift == 2 || shift == 3) {
1579          HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1->Iex.Binop.arg1);
1580          HReg r2 = iselIntExpr_R(env, e->Iex.Binop.arg1
1581                                        ->Iex.Binop.arg2->Iex.Binop.arg1 );
1582          return X86AMode_IRRS(imm32, r1, r2, shift);
1583       }
1584    }
1585 
1586    /* Add32(expr1, Shl32(expr2, imm)) */
1587    if (e->tag == Iex_Binop
1588        && e->Iex.Binop.op == Iop_Add32
1589        && e->Iex.Binop.arg2->tag == Iex_Binop
1590        && e->Iex.Binop.arg2->Iex.Binop.op == Iop_Shl32
1591        && e->Iex.Binop.arg2->Iex.Binop.arg2->tag == Iex_Const
1592        && e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8) {
1593       UInt shift = e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1594       if (shift == 1 || shift == 2 || shift == 3) {
1595          HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1596          HReg r2 = iselIntExpr_R(env, e->Iex.Binop.arg2->Iex.Binop.arg1 );
1597          return X86AMode_IRRS(0, r1, r2, shift);
1598       }
1599    }
1600 
1601    /* Add32(expr,i) */
1602    if (e->tag == Iex_Binop
1603        && e->Iex.Binop.op == Iop_Add32
1604        && e->Iex.Binop.arg2->tag == Iex_Const
1605        && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
1606       HReg r1 = iselIntExpr_R(env,  e->Iex.Binop.arg1);
1607       return X86AMode_IR(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32, r1);
1608    }
1609 
1610    /* Doesn't match anything in particular.  Generate it into
1611       a register and use that. */
1612    {
1613       HReg r1 = iselIntExpr_R(env, e);
1614       return X86AMode_IR(0, r1);
1615    }
1616 }
1617 
1618 
1619 /* --------------------- RMIs --------------------- */
1620 
1621 /* Similarly, calculate an expression into an X86RMI operand.  As with
1622    iselIntExpr_R, the expression can have type 32, 16 or 8 bits.  */
1623 
iselIntExpr_RMI(ISelEnv * env,const IRExpr * e)1624 static X86RMI* iselIntExpr_RMI ( ISelEnv* env, const IRExpr* e )
1625 {
1626    X86RMI* rmi = iselIntExpr_RMI_wrk(env, e);
1627    /* sanity checks ... */
1628    switch (rmi->tag) {
1629       case Xrmi_Imm:
1630          return rmi;
1631       case Xrmi_Reg:
1632          vassert(hregClass(rmi->Xrmi.Reg.reg) == HRcInt32);
1633          vassert(hregIsVirtual(rmi->Xrmi.Reg.reg));
1634          return rmi;
1635       case Xrmi_Mem:
1636          vassert(sane_AMode(rmi->Xrmi.Mem.am));
1637          return rmi;
1638       default:
1639          vpanic("iselIntExpr_RMI: unknown x86 RMI tag");
1640    }
1641 }
1642 
1643 /* DO NOT CALL THIS DIRECTLY ! */
iselIntExpr_RMI_wrk(ISelEnv * env,const IRExpr * e)1644 static X86RMI* iselIntExpr_RMI_wrk ( ISelEnv* env, const IRExpr* e )
1645 {
1646    IRType ty = typeOfIRExpr(env->type_env,e);
1647    vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1648 
1649    /* special case: immediate */
1650    if (e->tag == Iex_Const) {
1651       UInt u;
1652       switch (e->Iex.Const.con->tag) {
1653          case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
1654          case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
1655          case Ico_U8:  u = 0xFF   & (e->Iex.Const.con->Ico.U8); break;
1656          default: vpanic("iselIntExpr_RMI.Iex_Const(x86h)");
1657       }
1658       return X86RMI_Imm(u);
1659    }
1660 
1661    /* special case: 32-bit GET */
1662    if (e->tag == Iex_Get && ty == Ity_I32) {
1663       return X86RMI_Mem(X86AMode_IR(e->Iex.Get.offset,
1664                                     hregX86_EBP()));
1665    }
1666 
1667    /* special case: 32-bit load from memory */
1668    if (e->tag == Iex_Load && ty == Ity_I32
1669        && e->Iex.Load.end == Iend_LE) {
1670       X86AMode* am = iselIntExpr_AMode(env, e->Iex.Load.addr);
1671       return X86RMI_Mem(am);
1672    }
1673 
1674    /* default case: calculate into a register and return that */
1675    {
1676       HReg r = iselIntExpr_R ( env, e );
1677       return X86RMI_Reg(r);
1678    }
1679 }
1680 
1681 
1682 /* --------------------- RIs --------------------- */
1683 
1684 /* Calculate an expression into an X86RI operand.  As with
1685    iselIntExpr_R, the expression can have type 32, 16 or 8 bits. */
1686 
iselIntExpr_RI(ISelEnv * env,const IRExpr * e)1687 static X86RI* iselIntExpr_RI ( ISelEnv* env, const IRExpr* e )
1688 {
1689    X86RI* ri = iselIntExpr_RI_wrk(env, e);
1690    /* sanity checks ... */
1691    switch (ri->tag) {
1692       case Xri_Imm:
1693          return ri;
1694       case Xri_Reg:
1695          vassert(hregClass(ri->Xri.Reg.reg) == HRcInt32);
1696          vassert(hregIsVirtual(ri->Xri.Reg.reg));
1697          return ri;
1698       default:
1699          vpanic("iselIntExpr_RI: unknown x86 RI tag");
1700    }
1701 }
1702 
1703 /* DO NOT CALL THIS DIRECTLY ! */
iselIntExpr_RI_wrk(ISelEnv * env,const IRExpr * e)1704 static X86RI* iselIntExpr_RI_wrk ( ISelEnv* env, const IRExpr* e )
1705 {
1706    IRType ty = typeOfIRExpr(env->type_env,e);
1707    vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1708 
1709    /* special case: immediate */
1710    if (e->tag == Iex_Const) {
1711       UInt u;
1712       switch (e->Iex.Const.con->tag) {
1713          case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
1714          case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
1715          case Ico_U8:  u = 0xFF   & (e->Iex.Const.con->Ico.U8); break;
1716          default: vpanic("iselIntExpr_RMI.Iex_Const(x86h)");
1717       }
1718       return X86RI_Imm(u);
1719    }
1720 
1721    /* default case: calculate into a register and return that */
1722    {
1723       HReg r = iselIntExpr_R ( env, e );
1724       return X86RI_Reg(r);
1725    }
1726 }
1727 
1728 
1729 /* --------------------- RMs --------------------- */
1730 
1731 /* Similarly, calculate an expression into an X86RM operand.  As with
1732    iselIntExpr_R, the expression can have type 32, 16 or 8 bits.  */
1733 
iselIntExpr_RM(ISelEnv * env,const IRExpr * e)1734 static X86RM* iselIntExpr_RM ( ISelEnv* env, const IRExpr* e )
1735 {
1736    X86RM* rm = iselIntExpr_RM_wrk(env, e);
1737    /* sanity checks ... */
1738    switch (rm->tag) {
1739       case Xrm_Reg:
1740          vassert(hregClass(rm->Xrm.Reg.reg) == HRcInt32);
1741          vassert(hregIsVirtual(rm->Xrm.Reg.reg));
1742          return rm;
1743       case Xrm_Mem:
1744          vassert(sane_AMode(rm->Xrm.Mem.am));
1745          return rm;
1746       default:
1747          vpanic("iselIntExpr_RM: unknown x86 RM tag");
1748    }
1749 }
1750 
1751 /* DO NOT CALL THIS DIRECTLY ! */
iselIntExpr_RM_wrk(ISelEnv * env,const IRExpr * e)1752 static X86RM* iselIntExpr_RM_wrk ( ISelEnv* env, const IRExpr* e )
1753 {
1754    IRType ty = typeOfIRExpr(env->type_env,e);
1755    vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1756 
1757    /* special case: 32-bit GET */
1758    if (e->tag == Iex_Get && ty == Ity_I32) {
1759       return X86RM_Mem(X86AMode_IR(e->Iex.Get.offset,
1760                                    hregX86_EBP()));
1761    }
1762 
1763    /* special case: load from memory */
1764 
1765    /* default case: calculate into a register and return that */
1766    {
1767       HReg r = iselIntExpr_R ( env, e );
1768       return X86RM_Reg(r);
1769    }
1770 }
1771 
1772 
1773 /* --------------------- CONDCODE --------------------- */
1774 
1775 /* Generate code to evaluated a bit-typed expression, returning the
1776    condition code which would correspond when the expression would
1777    notionally have returned 1. */
1778 
iselCondCode(ISelEnv * env,const IRExpr * e)1779 static X86CondCode iselCondCode ( ISelEnv* env, const IRExpr* e )
1780 {
1781    /* Uh, there's nothing we can sanity check here, unfortunately. */
1782    return iselCondCode_wrk(env,e);
1783 }
1784 
1785 /* DO NOT CALL THIS DIRECTLY ! */
iselCondCode_wrk(ISelEnv * env,const IRExpr * e)1786 static X86CondCode iselCondCode_wrk ( ISelEnv* env, const IRExpr* e )
1787 {
1788    MatchInfo mi;
1789 
1790    vassert(e);
1791    vassert(typeOfIRExpr(env->type_env,e) == Ity_I1);
1792 
1793    /* var */
1794    if (e->tag == Iex_RdTmp) {
1795       HReg r32 = lookupIRTemp(env, e->Iex.RdTmp.tmp);
1796       /* Test32 doesn't modify r32; so this is OK. */
1797       addInstr(env, X86Instr_Test32(1,X86RM_Reg(r32)));
1798       return Xcc_NZ;
1799    }
1800 
1801    /* Constant 1:Bit */
1802    if (e->tag == Iex_Const) {
1803       HReg r;
1804       vassert(e->Iex.Const.con->tag == Ico_U1);
1805       vassert(e->Iex.Const.con->Ico.U1 == True
1806               || e->Iex.Const.con->Ico.U1 == False);
1807       r = newVRegI(env);
1808       addInstr(env, X86Instr_Alu32R(Xalu_MOV,X86RMI_Imm(0),r));
1809       addInstr(env, X86Instr_Alu32R(Xalu_XOR,X86RMI_Reg(r),r));
1810       return e->Iex.Const.con->Ico.U1 ? Xcc_Z : Xcc_NZ;
1811    }
1812 
1813    /* Not1(e) */
1814    if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) {
1815       /* Generate code for the arg, and negate the test condition */
1816       return 1 ^ iselCondCode(env, e->Iex.Unop.arg);
1817    }
1818 
1819    /* --- patterns rooted at: 32to1 --- */
1820 
1821    if (e->tag == Iex_Unop
1822        && e->Iex.Unop.op == Iop_32to1) {
1823       X86RM* rm = iselIntExpr_RM(env, e->Iex.Unop.arg);
1824       addInstr(env, X86Instr_Test32(1,rm));
1825       return Xcc_NZ;
1826    }
1827 
1828    /* --- patterns rooted at: CmpNEZ8 --- */
1829 
1830    /* CmpNEZ8(x) */
1831    if (e->tag == Iex_Unop
1832        && e->Iex.Unop.op == Iop_CmpNEZ8) {
1833       X86RM* rm = iselIntExpr_RM(env, e->Iex.Unop.arg);
1834       addInstr(env, X86Instr_Test32(0xFF,rm));
1835       return Xcc_NZ;
1836    }
1837 
1838    /* --- patterns rooted at: CmpNEZ16 --- */
1839 
1840    /* CmpNEZ16(x) */
1841    if (e->tag == Iex_Unop
1842        && e->Iex.Unop.op == Iop_CmpNEZ16) {
1843       X86RM* rm = iselIntExpr_RM(env, e->Iex.Unop.arg);
1844       addInstr(env, X86Instr_Test32(0xFFFF,rm));
1845       return Xcc_NZ;
1846    }
1847 
1848    /* --- patterns rooted at: CmpNEZ32 --- */
1849 
1850    /* CmpNEZ32(And32(x,y)) */
1851    {
1852       DECLARE_PATTERN(p_CmpNEZ32_And32);
1853       DEFINE_PATTERN(p_CmpNEZ32_And32,
1854                      unop(Iop_CmpNEZ32, binop(Iop_And32, bind(0), bind(1))));
1855       if (matchIRExpr(&mi, p_CmpNEZ32_And32, e)) {
1856          HReg    r0   = iselIntExpr_R(env, mi.bindee[0]);
1857          X86RMI* rmi1 = iselIntExpr_RMI(env, mi.bindee[1]);
1858          HReg    tmp  = newVRegI(env);
1859          addInstr(env, mk_iMOVsd_RR(r0, tmp));
1860          addInstr(env, X86Instr_Alu32R(Xalu_AND,rmi1,tmp));
1861          return Xcc_NZ;
1862       }
1863    }
1864 
1865    /* CmpNEZ32(Or32(x,y)) */
1866    {
1867       DECLARE_PATTERN(p_CmpNEZ32_Or32);
1868       DEFINE_PATTERN(p_CmpNEZ32_Or32,
1869                      unop(Iop_CmpNEZ32, binop(Iop_Or32, bind(0), bind(1))));
1870       if (matchIRExpr(&mi, p_CmpNEZ32_Or32, e)) {
1871          HReg    r0   = iselIntExpr_R(env, mi.bindee[0]);
1872          X86RMI* rmi1 = iselIntExpr_RMI(env, mi.bindee[1]);
1873          HReg    tmp  = newVRegI(env);
1874          addInstr(env, mk_iMOVsd_RR(r0, tmp));
1875          addInstr(env, X86Instr_Alu32R(Xalu_OR,rmi1,tmp));
1876          return Xcc_NZ;
1877       }
1878    }
1879 
1880    /* CmpNEZ32(GET(..):I32) */
1881    if (e->tag == Iex_Unop
1882        && e->Iex.Unop.op == Iop_CmpNEZ32
1883        && e->Iex.Unop.arg->tag == Iex_Get) {
1884       X86AMode* am = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
1885                                  hregX86_EBP());
1886       addInstr(env, X86Instr_Alu32M(Xalu_CMP, X86RI_Imm(0), am));
1887       return Xcc_NZ;
1888    }
1889 
1890    /* CmpNEZ32(x) */
1891    if (e->tag == Iex_Unop
1892        && e->Iex.Unop.op == Iop_CmpNEZ32) {
1893       HReg    r1   = iselIntExpr_R(env, e->Iex.Unop.arg);
1894       X86RMI* rmi2 = X86RMI_Imm(0);
1895       addInstr(env, X86Instr_Alu32R(Xalu_CMP,rmi2,r1));
1896       return Xcc_NZ;
1897    }
1898 
1899    /* --- patterns rooted at: CmpNEZ64 --- */
1900 
1901    /* CmpNEZ64(Or64(x,y)) */
1902    {
1903       DECLARE_PATTERN(p_CmpNEZ64_Or64);
1904       DEFINE_PATTERN(p_CmpNEZ64_Or64,
1905                      unop(Iop_CmpNEZ64, binop(Iop_Or64, bind(0), bind(1))));
1906       if (matchIRExpr(&mi, p_CmpNEZ64_Or64, e)) {
1907          HReg    hi1, lo1, hi2, lo2;
1908          HReg    tmp  = newVRegI(env);
1909          iselInt64Expr( &hi1, &lo1, env, mi.bindee[0] );
1910          addInstr(env, mk_iMOVsd_RR(hi1, tmp));
1911          addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(lo1),tmp));
1912          iselInt64Expr( &hi2, &lo2, env, mi.bindee[1] );
1913          addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(hi2),tmp));
1914          addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(lo2),tmp));
1915          return Xcc_NZ;
1916       }
1917    }
1918 
1919    /* CmpNEZ64(x) */
1920    if (e->tag == Iex_Unop
1921        && e->Iex.Unop.op == Iop_CmpNEZ64) {
1922       HReg hi, lo;
1923       HReg tmp = newVRegI(env);
1924       iselInt64Expr( &hi, &lo, env, e->Iex.Unop.arg );
1925       addInstr(env, mk_iMOVsd_RR(hi, tmp));
1926       addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(lo), tmp));
1927       return Xcc_NZ;
1928    }
1929 
1930    /* --- patterns rooted at: Cmp{EQ,NE}{8,16} --- */
1931 
1932    /* CmpEQ8 / CmpNE8 */
1933    if (e->tag == Iex_Binop
1934        && (e->Iex.Binop.op == Iop_CmpEQ8
1935            || e->Iex.Binop.op == Iop_CmpNE8
1936            || e->Iex.Binop.op == Iop_CasCmpEQ8
1937            || e->Iex.Binop.op == Iop_CasCmpNE8)) {
1938       if (isZeroU8(e->Iex.Binop.arg2)) {
1939          HReg    r1   = iselIntExpr_R(env, e->Iex.Binop.arg1);
1940          addInstr(env, X86Instr_Test32(0xFF,X86RM_Reg(r1)));
1941          switch (e->Iex.Binop.op) {
1942             case Iop_CmpEQ8: case Iop_CasCmpEQ8: return Xcc_Z;
1943             case Iop_CmpNE8: case Iop_CasCmpNE8: return Xcc_NZ;
1944             default: vpanic("iselCondCode(x86): CmpXX8(expr,0:I8)");
1945          }
1946       } else {
1947          HReg    r1   = iselIntExpr_R(env, e->Iex.Binop.arg1);
1948          X86RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
1949          HReg    r    = newVRegI(env);
1950          addInstr(env, mk_iMOVsd_RR(r1,r));
1951          addInstr(env, X86Instr_Alu32R(Xalu_XOR,rmi2,r));
1952          addInstr(env, X86Instr_Test32(0xFF,X86RM_Reg(r)));
1953          switch (e->Iex.Binop.op) {
1954             case Iop_CmpEQ8: case Iop_CasCmpEQ8: return Xcc_Z;
1955             case Iop_CmpNE8: case Iop_CasCmpNE8: return Xcc_NZ;
1956             default: vpanic("iselCondCode(x86): CmpXX8(expr,expr)");
1957          }
1958       }
1959    }
1960 
1961    /* CmpEQ16 / CmpNE16 */
1962    if (e->tag == Iex_Binop
1963        && (e->Iex.Binop.op == Iop_CmpEQ16
1964            || e->Iex.Binop.op == Iop_CmpNE16
1965            || e->Iex.Binop.op == Iop_CasCmpEQ16
1966            || e->Iex.Binop.op == Iop_CasCmpNE16
1967            || e->Iex.Binop.op == Iop_ExpCmpNE16)) {
1968       HReg    r1   = iselIntExpr_R(env, e->Iex.Binop.arg1);
1969       X86RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
1970       HReg    r    = newVRegI(env);
1971       addInstr(env, mk_iMOVsd_RR(r1,r));
1972       addInstr(env, X86Instr_Alu32R(Xalu_XOR,rmi2,r));
1973       addInstr(env, X86Instr_Test32(0xFFFF,X86RM_Reg(r)));
1974       switch (e->Iex.Binop.op) {
1975          case Iop_CmpEQ16: case Iop_CasCmpEQ16:
1976             return Xcc_Z;
1977          case Iop_CmpNE16: case Iop_CasCmpNE16: case Iop_ExpCmpNE16:
1978             return Xcc_NZ;
1979          default:
1980             vpanic("iselCondCode(x86): CmpXX16");
1981       }
1982    }
1983 
1984    /* CmpNE32(ccall, 32-bit constant) (--smc-check=all optimisation).
1985       Saves a "movl %eax, %tmp" compared to the default route. */
1986    if (e->tag == Iex_Binop
1987        && e->Iex.Binop.op == Iop_CmpNE32
1988        && e->Iex.Binop.arg1->tag == Iex_CCall
1989        && e->Iex.Binop.arg2->tag == Iex_Const) {
1990       IRExpr* cal = e->Iex.Binop.arg1;
1991       IRExpr* con = e->Iex.Binop.arg2;
1992       /* clone & partial-eval of generic Iex_CCall and Iex_Const cases */
1993       vassert(cal->Iex.CCall.retty == Ity_I32); /* else ill-typed IR */
1994       vassert(con->Iex.Const.con->tag == Ico_U32);
1995       /* Marshal args, do the call. */
1996       UInt   addToSp = 0;
1997       RetLoc rloc    = mk_RetLoc_INVALID();
1998       doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
1999                     cal->Iex.CCall.cee,
2000                     cal->Iex.CCall.retty, cal->Iex.CCall.args );
2001       vassert(is_sane_RetLoc(rloc));
2002       vassert(rloc.pri == RLPri_Int);
2003       vassert(addToSp == 0);
2004       /* */
2005       addInstr(env, X86Instr_Alu32R(Xalu_CMP,
2006                                     X86RMI_Imm(con->Iex.Const.con->Ico.U32),
2007                                     hregX86_EAX()));
2008       return Xcc_NZ;
2009    }
2010 
2011    /* Cmp*32*(x,y) */
2012    if (e->tag == Iex_Binop
2013        && (e->Iex.Binop.op == Iop_CmpEQ32
2014            || e->Iex.Binop.op == Iop_CmpNE32
2015            || e->Iex.Binop.op == Iop_CmpLT32S
2016            || e->Iex.Binop.op == Iop_CmpLT32U
2017            || e->Iex.Binop.op == Iop_CmpLE32S
2018            || e->Iex.Binop.op == Iop_CmpLE32U
2019            || e->Iex.Binop.op == Iop_CasCmpEQ32
2020            || e->Iex.Binop.op == Iop_CasCmpNE32
2021            || e->Iex.Binop.op == Iop_ExpCmpNE32)) {
2022       HReg    r1   = iselIntExpr_R(env, e->Iex.Binop.arg1);
2023       X86RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
2024       addInstr(env, X86Instr_Alu32R(Xalu_CMP,rmi2,r1));
2025       switch (e->Iex.Binop.op) {
2026          case Iop_CmpEQ32: case Iop_CasCmpEQ32: return Xcc_Z;
2027          case Iop_CmpNE32:
2028          case Iop_CasCmpNE32: case Iop_ExpCmpNE32: return Xcc_NZ;
2029          case Iop_CmpLT32S: return Xcc_L;
2030          case Iop_CmpLT32U: return Xcc_B;
2031          case Iop_CmpLE32S: return Xcc_LE;
2032          case Iop_CmpLE32U: return Xcc_BE;
2033          default: vpanic("iselCondCode(x86): CmpXX32");
2034       }
2035    }
2036 
2037    /* CmpNE64 */
2038    if (e->tag == Iex_Binop
2039        && (e->Iex.Binop.op == Iop_CmpNE64
2040            || e->Iex.Binop.op == Iop_CmpEQ64)) {
2041       HReg hi1, hi2, lo1, lo2;
2042       HReg tHi = newVRegI(env);
2043       HReg tLo = newVRegI(env);
2044       iselInt64Expr( &hi1, &lo1, env, e->Iex.Binop.arg1 );
2045       iselInt64Expr( &hi2, &lo2, env, e->Iex.Binop.arg2 );
2046       addInstr(env, mk_iMOVsd_RR(hi1, tHi));
2047       addInstr(env, X86Instr_Alu32R(Xalu_XOR,X86RMI_Reg(hi2), tHi));
2048       addInstr(env, mk_iMOVsd_RR(lo1, tLo));
2049       addInstr(env, X86Instr_Alu32R(Xalu_XOR,X86RMI_Reg(lo2), tLo));
2050       addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(tHi), tLo));
2051       switch (e->Iex.Binop.op) {
2052          case Iop_CmpNE64: return Xcc_NZ;
2053          case Iop_CmpEQ64: return Xcc_Z;
2054          default: vpanic("iselCondCode(x86): CmpXX64");
2055       }
2056    }
2057 
2058    ppIRExpr(e);
2059    vpanic("iselCondCode");
2060 }
2061 
2062 
2063 /*---------------------------------------------------------*/
2064 /*--- ISEL: Integer expressions (64 bit)                ---*/
2065 /*---------------------------------------------------------*/
2066 
2067 /* Compute a 64-bit value into a register pair, which is returned as
2068    the first two parameters.  As with iselIntExpr_R, these may be
2069    either real or virtual regs; in any case they must not be changed
2070    by subsequent code emitted by the caller.  */
2071 
iselInt64Expr(HReg * rHi,HReg * rLo,ISelEnv * env,const IRExpr * e)2072 static void iselInt64Expr ( HReg* rHi, HReg* rLo, ISelEnv* env,
2073                             const IRExpr* e )
2074 {
2075    iselInt64Expr_wrk(rHi, rLo, env, e);
2076 #  if 0
2077    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2078 #  endif
2079    vassert(hregClass(*rHi) == HRcInt32);
2080    vassert(hregIsVirtual(*rHi));
2081    vassert(hregClass(*rLo) == HRcInt32);
2082    vassert(hregIsVirtual(*rLo));
2083 }
2084 
2085 /* DO NOT CALL THIS DIRECTLY ! */
iselInt64Expr_wrk(HReg * rHi,HReg * rLo,ISelEnv * env,const IRExpr * e)2086 static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env,
2087                                 const IRExpr* e )
2088 {
2089    MatchInfo mi;
2090    HWord fn = 0; /* helper fn for most SIMD64 stuff */
2091    vassert(e);
2092    vassert(typeOfIRExpr(env->type_env,e) == Ity_I64);
2093 
2094    /* 64-bit literal */
2095    if (e->tag == Iex_Const) {
2096       ULong w64 = e->Iex.Const.con->Ico.U64;
2097       UInt  wHi = toUInt(w64 >> 32);
2098       UInt  wLo = toUInt(w64);
2099       HReg  tLo = newVRegI(env);
2100       HReg  tHi = newVRegI(env);
2101       vassert(e->Iex.Const.con->tag == Ico_U64);
2102       if (wLo == wHi) {
2103          /* Save a precious Int register in this special case. */
2104          addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wLo), tLo));
2105          *rHi = tLo;
2106          *rLo = tLo;
2107       } else {
2108          addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wHi), tHi));
2109          addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wLo), tLo));
2110          *rHi = tHi;
2111          *rLo = tLo;
2112       }
2113       return;
2114    }
2115 
2116    /* read 64-bit IRTemp */
2117    if (e->tag == Iex_RdTmp) {
2118       lookupIRTemp64( rHi, rLo, env, e->Iex.RdTmp.tmp);
2119       return;
2120    }
2121 
2122    /* 64-bit load */
2123    if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
2124       HReg     tLo, tHi;
2125       X86AMode *am0, *am4;
2126       vassert(e->Iex.Load.ty == Ity_I64);
2127       tLo = newVRegI(env);
2128       tHi = newVRegI(env);
2129       am0 = iselIntExpr_AMode(env, e->Iex.Load.addr);
2130       am4 = advance4(am0);
2131       addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am0), tLo ));
2132       addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi ));
2133       *rHi = tHi;
2134       *rLo = tLo;
2135       return;
2136    }
2137 
2138    /* 64-bit GET */
2139    if (e->tag == Iex_Get) {
2140       X86AMode* am  = X86AMode_IR(e->Iex.Get.offset, hregX86_EBP());
2141       X86AMode* am4 = advance4(am);
2142       HReg tLo = newVRegI(env);
2143       HReg tHi = newVRegI(env);
2144       addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am), tLo ));
2145       addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi ));
2146       *rHi = tHi;
2147       *rLo = tLo;
2148       return;
2149    }
2150 
2151    /* 64-bit GETI */
2152    if (e->tag == Iex_GetI) {
2153       X86AMode* am
2154          = genGuestArrayOffset( env, e->Iex.GetI.descr,
2155                                      e->Iex.GetI.ix, e->Iex.GetI.bias );
2156       X86AMode* am4 = advance4(am);
2157       HReg tLo = newVRegI(env);
2158       HReg tHi = newVRegI(env);
2159       addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am), tLo ));
2160       addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi ));
2161       *rHi = tHi;
2162       *rLo = tLo;
2163       return;
2164    }
2165 
2166    /* 64-bit ITE: ITE(g, expr, expr) */ // VFD
2167    if (e->tag == Iex_ITE) {
2168       HReg e0Lo, e0Hi, e1Lo, e1Hi;
2169       HReg tLo = newVRegI(env);
2170       HReg tHi = newVRegI(env);
2171       iselInt64Expr(&e0Hi, &e0Lo, env, e->Iex.ITE.iffalse);
2172       iselInt64Expr(&e1Hi, &e1Lo, env, e->Iex.ITE.iftrue);
2173       addInstr(env, mk_iMOVsd_RR(e1Hi, tHi));
2174       addInstr(env, mk_iMOVsd_RR(e1Lo, tLo));
2175       X86CondCode cc = iselCondCode(env, e->Iex.ITE.cond);
2176       /* This assumes the first cmov32 doesn't trash the condition
2177          codes, so they are still available for the second cmov32 */
2178       addInstr(env, X86Instr_CMov32(cc ^ 1, X86RM_Reg(e0Hi), tHi));
2179       addInstr(env, X86Instr_CMov32(cc ^ 1, X86RM_Reg(e0Lo), tLo));
2180       *rHi = tHi;
2181       *rLo = tLo;
2182       return;
2183    }
2184 
2185    /* --------- BINARY ops --------- */
2186    if (e->tag == Iex_Binop) {
2187       switch (e->Iex.Binop.op) {
2188          /* 32 x 32 -> 64 multiply */
2189          case Iop_MullU32:
2190          case Iop_MullS32: {
2191             /* get one operand into %eax, and the other into a R/M.
2192                Need to make an educated guess about which is better in
2193                which. */
2194             HReg   tLo    = newVRegI(env);
2195             HReg   tHi    = newVRegI(env);
2196             Bool   syned  = toBool(e->Iex.Binop.op == Iop_MullS32);
2197             X86RM* rmLeft = iselIntExpr_RM(env, e->Iex.Binop.arg1);
2198             HReg   rRight = iselIntExpr_R(env, e->Iex.Binop.arg2);
2199             addInstr(env, mk_iMOVsd_RR(rRight, hregX86_EAX()));
2200             addInstr(env, X86Instr_MulL(syned, rmLeft));
2201             /* Result is now in EDX:EAX.  Tell the caller. */
2202             addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2203             addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2204             *rHi = tHi;
2205             *rLo = tLo;
2206             return;
2207          }
2208 
2209          /* 64 x 32 -> (32(rem),32(div)) division */
2210          case Iop_DivModU64to32:
2211          case Iop_DivModS64to32: {
2212             /* Get the 64-bit operand into edx:eax, and the other into
2213                any old R/M. */
2214             HReg sHi, sLo;
2215             HReg   tLo     = newVRegI(env);
2216             HReg   tHi     = newVRegI(env);
2217             Bool   syned   = toBool(e->Iex.Binop.op == Iop_DivModS64to32);
2218             X86RM* rmRight = iselIntExpr_RM(env, e->Iex.Binop.arg2);
2219             iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1);
2220             addInstr(env, mk_iMOVsd_RR(sHi, hregX86_EDX()));
2221             addInstr(env, mk_iMOVsd_RR(sLo, hregX86_EAX()));
2222             addInstr(env, X86Instr_Div(syned, rmRight));
2223             addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2224             addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2225             *rHi = tHi;
2226             *rLo = tLo;
2227             return;
2228          }
2229 
2230          /* Or64/And64/Xor64 */
2231          case Iop_Or64:
2232          case Iop_And64:
2233          case Iop_Xor64: {
2234             HReg xLo, xHi, yLo, yHi;
2235             HReg tLo = newVRegI(env);
2236             HReg tHi = newVRegI(env);
2237             X86AluOp op = e->Iex.Binop.op==Iop_Or64 ? Xalu_OR
2238                           : e->Iex.Binop.op==Iop_And64 ? Xalu_AND
2239                           : Xalu_XOR;
2240             iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2241             iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
2242             addInstr(env, mk_iMOVsd_RR(xHi, tHi));
2243             addInstr(env, X86Instr_Alu32R(op, X86RMI_Reg(yHi), tHi));
2244             addInstr(env, mk_iMOVsd_RR(xLo, tLo));
2245             addInstr(env, X86Instr_Alu32R(op, X86RMI_Reg(yLo), tLo));
2246             *rHi = tHi;
2247             *rLo = tLo;
2248             return;
2249          }
2250 
2251          /* Add64/Sub64 */
2252          case Iop_Add64:
2253             if (e->Iex.Binop.arg2->tag == Iex_Const) {
2254                /* special case Add64(e, const) */
2255                ULong w64 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
2256                UInt  wHi = toUInt(w64 >> 32);
2257                UInt  wLo = toUInt(w64);
2258                HReg  tLo = newVRegI(env);
2259                HReg  tHi = newVRegI(env);
2260                HReg  xLo, xHi;
2261                vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64);
2262                iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2263                addInstr(env, mk_iMOVsd_RR(xHi, tHi));
2264                addInstr(env, mk_iMOVsd_RR(xLo, tLo));
2265                addInstr(env, X86Instr_Alu32R(Xalu_ADD, X86RMI_Imm(wLo), tLo));
2266                addInstr(env, X86Instr_Alu32R(Xalu_ADC, X86RMI_Imm(wHi), tHi));
2267                *rHi = tHi;
2268                *rLo = tLo;
2269                return;
2270             }
2271             /* else fall through to the generic case */
2272          case Iop_Sub64: {
2273             HReg xLo, xHi, yLo, yHi;
2274             HReg tLo = newVRegI(env);
2275             HReg tHi = newVRegI(env);
2276             iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2277             addInstr(env, mk_iMOVsd_RR(xHi, tHi));
2278             addInstr(env, mk_iMOVsd_RR(xLo, tLo));
2279             iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
2280             if (e->Iex.Binop.op==Iop_Add64) {
2281                addInstr(env, X86Instr_Alu32R(Xalu_ADD, X86RMI_Reg(yLo), tLo));
2282                addInstr(env, X86Instr_Alu32R(Xalu_ADC, X86RMI_Reg(yHi), tHi));
2283             } else {
2284                addInstr(env, X86Instr_Alu32R(Xalu_SUB, X86RMI_Reg(yLo), tLo));
2285                addInstr(env, X86Instr_Alu32R(Xalu_SBB, X86RMI_Reg(yHi), tHi));
2286             }
2287             *rHi = tHi;
2288             *rLo = tLo;
2289             return;
2290          }
2291 
2292          /* 32HLto64(e1,e2) */
2293          case Iop_32HLto64:
2294             *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2295             *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2296             return;
2297 
2298          /* 64-bit shifts */
2299          case Iop_Shl64: {
2300             /* We use the same ingenious scheme as gcc.  Put the value
2301                to be shifted into %hi:%lo, and the shift amount into
2302                %cl.  Then (dsts on right, a la ATT syntax):
2303 
2304                shldl %cl, %lo, %hi   -- make %hi be right for the
2305                                      -- shift amt %cl % 32
2306                shll  %cl, %lo        -- make %lo be right for the
2307                                      -- shift amt %cl % 32
2308 
2309                Now, if (shift amount % 64) is in the range 32 .. 63,
2310                we have to do a fixup, which puts the result low half
2311                into the result high half, and zeroes the low half:
2312 
2313                testl $32, %ecx
2314 
2315                cmovnz %lo, %hi
2316                movl $0, %tmp         -- sigh; need yet another reg
2317                cmovnz %tmp, %lo
2318             */
2319             HReg rAmt, sHi, sLo, tHi, tLo, tTemp;
2320             tLo = newVRegI(env);
2321             tHi = newVRegI(env);
2322             tTemp = newVRegI(env);
2323             rAmt = iselIntExpr_R(env, e->Iex.Binop.arg2);
2324             iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1);
2325             addInstr(env, mk_iMOVsd_RR(rAmt, hregX86_ECX()));
2326             addInstr(env, mk_iMOVsd_RR(sHi, tHi));
2327             addInstr(env, mk_iMOVsd_RR(sLo, tLo));
2328             /* Ok.  Now shift amt is in %ecx, and value is in tHi/tLo
2329                and those regs are legitimately modifiable. */
2330             addInstr(env, X86Instr_Sh3232(Xsh_SHL, 0/*%cl*/, tLo, tHi));
2331             addInstr(env, X86Instr_Sh32(Xsh_SHL, 0/*%cl*/, tLo));
2332             addInstr(env, X86Instr_Test32(32, X86RM_Reg(hregX86_ECX())));
2333             addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tLo), tHi));
2334             addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tTemp));
2335             addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tTemp), tLo));
2336             *rHi = tHi;
2337             *rLo = tLo;
2338             return;
2339          }
2340 
2341          case Iop_Shr64: {
2342             /* We use the same ingenious scheme as gcc.  Put the value
2343                to be shifted into %hi:%lo, and the shift amount into
2344                %cl.  Then:
2345 
2346                shrdl %cl, %hi, %lo   -- make %lo be right for the
2347                                      -- shift amt %cl % 32
2348                shrl  %cl, %hi        -- make %hi be right for the
2349                                      -- shift amt %cl % 32
2350 
2351                Now, if (shift amount % 64) is in the range 32 .. 63,
2352                we have to do a fixup, which puts the result high half
2353                into the result low half, and zeroes the high half:
2354 
2355                testl $32, %ecx
2356 
2357                cmovnz %hi, %lo
2358                movl $0, %tmp         -- sigh; need yet another reg
2359                cmovnz %tmp, %hi
2360             */
2361             HReg rAmt, sHi, sLo, tHi, tLo, tTemp;
2362             tLo = newVRegI(env);
2363             tHi = newVRegI(env);
2364             tTemp = newVRegI(env);
2365             rAmt = iselIntExpr_R(env, e->Iex.Binop.arg2);
2366             iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1);
2367             addInstr(env, mk_iMOVsd_RR(rAmt, hregX86_ECX()));
2368             addInstr(env, mk_iMOVsd_RR(sHi, tHi));
2369             addInstr(env, mk_iMOVsd_RR(sLo, tLo));
2370             /* Ok.  Now shift amt is in %ecx, and value is in tHi/tLo
2371                and those regs are legitimately modifiable. */
2372             addInstr(env, X86Instr_Sh3232(Xsh_SHR, 0/*%cl*/, tHi, tLo));
2373             addInstr(env, X86Instr_Sh32(Xsh_SHR, 0/*%cl*/, tHi));
2374             addInstr(env, X86Instr_Test32(32, X86RM_Reg(hregX86_ECX())));
2375             addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tHi), tLo));
2376             addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tTemp));
2377             addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tTemp), tHi));
2378             *rHi = tHi;
2379             *rLo = tLo;
2380             return;
2381          }
2382 
2383          case Iop_Sar64: {
2384             /* gcc -O2 does the following.  I don't know how it works, but it
2385                does work.  Don't mess with it.  This is hard to test because the
2386                x86 front end doesn't create Iop_Sar64 for any x86 instruction,
2387                so it's impossible to write a test program that feeds values
2388                through Iop_Sar64 and prints their results.  The implementation
2389                here was tested by using psrlq on mmx registers -- that generates
2390                Iop_Shr64 -- and temporarily hacking the front end to generate
2391                Iop_Sar64 for that instruction instead.
2392 
2393                movl  %amount, %ecx
2394                movl  %srcHi,  %r1
2395                movl  %srcLo,  %r2
2396 
2397                movl   %r1, %r3
2398                sarl   %cl, %r3
2399                movl   %r2, %r4
2400                shrdl  %cl, %r1, %r4
2401                movl   %r3, %r2
2402                sarl   $31, %r2
2403                andl   $32, %ecx
2404                cmovne %r3, %r4   // = resLo
2405                cmovne %r2, %r3   // = resHi
2406             */
2407             HReg amount = iselIntExpr_R(env, e->Iex.Binop.arg2);
2408             HReg srcHi = INVALID_HREG, srcLo = INVALID_HREG;
2409             iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Binop.arg1);
2410             HReg r1 = newVRegI(env);
2411             HReg r2 = newVRegI(env);
2412             HReg r3 = newVRegI(env);
2413             HReg r4 = newVRegI(env);
2414             addInstr(env, mk_iMOVsd_RR(amount, hregX86_ECX()));
2415             addInstr(env, mk_iMOVsd_RR(srcHi, r1));
2416             addInstr(env, mk_iMOVsd_RR(srcLo, r2));
2417 
2418             addInstr(env, mk_iMOVsd_RR(r1, r3));
2419             addInstr(env, X86Instr_Sh32(Xsh_SAR, 0/*%cl*/, r3));
2420             addInstr(env, mk_iMOVsd_RR(r2, r4));
2421             addInstr(env, X86Instr_Sh3232(Xsh_SHR, 0/*%cl*/, r1, r4));
2422             addInstr(env, mk_iMOVsd_RR(r3, r2));
2423             addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, r2));
2424             addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(32),
2425                                                     hregX86_ECX()));
2426             addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(r3), r4));
2427             addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(r2), r3));
2428             *rHi = r3;
2429             *rLo = r4;
2430             return;
2431          }
2432 
2433          /* F64 -> I64 */
2434          /* Sigh, this is an almost exact copy of the F64 -> I32/I16
2435             case.  Unfortunately I see no easy way to avoid the
2436             duplication. */
2437          case Iop_F64toI64S: {
2438             HReg rf  = iselDblExpr(env, e->Iex.Binop.arg2);
2439             HReg tLo = newVRegI(env);
2440             HReg tHi = newVRegI(env);
2441 
2442             /* Used several times ... */
2443             /* Careful ... this sharing is only safe because
2444 	       zero_esp/four_esp do not hold any registers which the
2445 	       register allocator could attempt to swizzle later. */
2446             X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
2447             X86AMode* four_esp = X86AMode_IR(4, hregX86_ESP());
2448 
2449             /* rf now holds the value to be converted, and rrm holds
2450                the rounding mode value, encoded as per the
2451                IRRoundingMode enum.  The first thing to do is set the
2452                FPU's rounding mode accordingly. */
2453 
2454             /* Create a space for the format conversion. */
2455             /* subl $8, %esp */
2456             sub_from_esp(env, 8);
2457 
2458             /* Set host rounding mode */
2459             set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
2460 
2461             /* gistll %rf, 0(%esp) */
2462             addInstr(env, X86Instr_FpLdStI(False/*store*/, 8, rf, zero_esp));
2463 
2464             /* movl 0(%esp), %dstLo */
2465             /* movl 4(%esp), %dstHi */
2466             addInstr(env, X86Instr_Alu32R(
2467                              Xalu_MOV, X86RMI_Mem(zero_esp), tLo));
2468             addInstr(env, X86Instr_Alu32R(
2469                              Xalu_MOV, X86RMI_Mem(four_esp), tHi));
2470 
2471             /* Restore default FPU rounding. */
2472             set_FPU_rounding_default( env );
2473 
2474             /* addl $8, %esp */
2475             add_to_esp(env, 8);
2476 
2477             *rHi = tHi;
2478             *rLo = tLo;
2479             return;
2480          }
2481 
2482          case Iop_Add8x8:
2483             fn = (HWord)h_generic_calc_Add8x8; goto binnish;
2484          case Iop_Add16x4:
2485             fn = (HWord)h_generic_calc_Add16x4; goto binnish;
2486          case Iop_Add32x2:
2487             fn = (HWord)h_generic_calc_Add32x2; goto binnish;
2488 
2489          case Iop_Avg8Ux8:
2490             fn = (HWord)h_generic_calc_Avg8Ux8; goto binnish;
2491          case Iop_Avg16Ux4:
2492             fn = (HWord)h_generic_calc_Avg16Ux4; goto binnish;
2493 
2494          case Iop_CmpEQ8x8:
2495             fn = (HWord)h_generic_calc_CmpEQ8x8; goto binnish;
2496          case Iop_CmpEQ16x4:
2497             fn = (HWord)h_generic_calc_CmpEQ16x4; goto binnish;
2498          case Iop_CmpEQ32x2:
2499             fn = (HWord)h_generic_calc_CmpEQ32x2; goto binnish;
2500 
2501          case Iop_CmpGT8Sx8:
2502             fn = (HWord)h_generic_calc_CmpGT8Sx8; goto binnish;
2503          case Iop_CmpGT16Sx4:
2504             fn = (HWord)h_generic_calc_CmpGT16Sx4; goto binnish;
2505          case Iop_CmpGT32Sx2:
2506             fn = (HWord)h_generic_calc_CmpGT32Sx2; goto binnish;
2507 
2508          case Iop_InterleaveHI8x8:
2509             fn = (HWord)h_generic_calc_InterleaveHI8x8; goto binnish;
2510          case Iop_InterleaveLO8x8:
2511             fn = (HWord)h_generic_calc_InterleaveLO8x8; goto binnish;
2512          case Iop_InterleaveHI16x4:
2513             fn = (HWord)h_generic_calc_InterleaveHI16x4; goto binnish;
2514          case Iop_InterleaveLO16x4:
2515             fn = (HWord)h_generic_calc_InterleaveLO16x4; goto binnish;
2516          case Iop_InterleaveHI32x2:
2517             fn = (HWord)h_generic_calc_InterleaveHI32x2; goto binnish;
2518          case Iop_InterleaveLO32x2:
2519             fn = (HWord)h_generic_calc_InterleaveLO32x2; goto binnish;
2520          case Iop_CatOddLanes16x4:
2521             fn = (HWord)h_generic_calc_CatOddLanes16x4; goto binnish;
2522          case Iop_CatEvenLanes16x4:
2523             fn = (HWord)h_generic_calc_CatEvenLanes16x4; goto binnish;
2524          case Iop_Perm8x8:
2525             fn = (HWord)h_generic_calc_Perm8x8; goto binnish;
2526 
2527          case Iop_Max8Ux8:
2528             fn = (HWord)h_generic_calc_Max8Ux8; goto binnish;
2529          case Iop_Max16Sx4:
2530             fn = (HWord)h_generic_calc_Max16Sx4; goto binnish;
2531          case Iop_Min8Ux8:
2532             fn = (HWord)h_generic_calc_Min8Ux8; goto binnish;
2533          case Iop_Min16Sx4:
2534             fn = (HWord)h_generic_calc_Min16Sx4; goto binnish;
2535 
2536          case Iop_Mul16x4:
2537             fn = (HWord)h_generic_calc_Mul16x4; goto binnish;
2538          case Iop_Mul32x2:
2539             fn = (HWord)h_generic_calc_Mul32x2; goto binnish;
2540          case Iop_MulHi16Sx4:
2541             fn = (HWord)h_generic_calc_MulHi16Sx4; goto binnish;
2542          case Iop_MulHi16Ux4:
2543             fn = (HWord)h_generic_calc_MulHi16Ux4; goto binnish;
2544 
2545          case Iop_QAdd8Sx8:
2546             fn = (HWord)h_generic_calc_QAdd8Sx8; goto binnish;
2547          case Iop_QAdd16Sx4:
2548             fn = (HWord)h_generic_calc_QAdd16Sx4; goto binnish;
2549          case Iop_QAdd8Ux8:
2550             fn = (HWord)h_generic_calc_QAdd8Ux8; goto binnish;
2551          case Iop_QAdd16Ux4:
2552             fn = (HWord)h_generic_calc_QAdd16Ux4; goto binnish;
2553 
2554          case Iop_QNarrowBin32Sto16Sx4:
2555             fn = (HWord)h_generic_calc_QNarrowBin32Sto16Sx4; goto binnish;
2556          case Iop_QNarrowBin16Sto8Sx8:
2557             fn = (HWord)h_generic_calc_QNarrowBin16Sto8Sx8; goto binnish;
2558          case Iop_QNarrowBin16Sto8Ux8:
2559             fn = (HWord)h_generic_calc_QNarrowBin16Sto8Ux8; goto binnish;
2560          case Iop_NarrowBin16to8x8:
2561             fn = (HWord)h_generic_calc_NarrowBin16to8x8; goto binnish;
2562          case Iop_NarrowBin32to16x4:
2563             fn = (HWord)h_generic_calc_NarrowBin32to16x4; goto binnish;
2564 
2565          case Iop_QSub8Sx8:
2566             fn = (HWord)h_generic_calc_QSub8Sx8; goto binnish;
2567          case Iop_QSub16Sx4:
2568             fn = (HWord)h_generic_calc_QSub16Sx4; goto binnish;
2569          case Iop_QSub8Ux8:
2570             fn = (HWord)h_generic_calc_QSub8Ux8; goto binnish;
2571          case Iop_QSub16Ux4:
2572             fn = (HWord)h_generic_calc_QSub16Ux4; goto binnish;
2573 
2574          case Iop_Sub8x8:
2575             fn = (HWord)h_generic_calc_Sub8x8; goto binnish;
2576          case Iop_Sub16x4:
2577             fn = (HWord)h_generic_calc_Sub16x4; goto binnish;
2578          case Iop_Sub32x2:
2579             fn = (HWord)h_generic_calc_Sub32x2; goto binnish;
2580 
2581          binnish: {
2582             /* Note: the following assumes all helpers are of
2583                signature
2584                   ULong fn ( ULong, ULong ), and they are
2585                not marked as regparm functions.
2586             */
2587             HReg xLo, xHi, yLo, yHi;
2588             HReg tLo = newVRegI(env);
2589             HReg tHi = newVRegI(env);
2590             iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
2591             addInstr(env, X86Instr_Push(X86RMI_Reg(yHi)));
2592             addInstr(env, X86Instr_Push(X86RMI_Reg(yLo)));
2593             iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2594             addInstr(env, X86Instr_Push(X86RMI_Reg(xHi)));
2595             addInstr(env, X86Instr_Push(X86RMI_Reg(xLo)));
2596             addInstr(env, X86Instr_Call( Xcc_ALWAYS, (Addr32)fn,
2597                                          0, mk_RetLoc_simple(RLPri_2Int) ));
2598             add_to_esp(env, 4*4);
2599             addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2600             addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2601             *rHi = tHi;
2602             *rLo = tLo;
2603             return;
2604          }
2605 
2606          case Iop_ShlN32x2:
2607             fn = (HWord)h_generic_calc_ShlN32x2; goto shifty;
2608          case Iop_ShlN16x4:
2609             fn = (HWord)h_generic_calc_ShlN16x4; goto shifty;
2610          case Iop_ShlN8x8:
2611             fn = (HWord)h_generic_calc_ShlN8x8;  goto shifty;
2612          case Iop_ShrN32x2:
2613             fn = (HWord)h_generic_calc_ShrN32x2; goto shifty;
2614          case Iop_ShrN16x4:
2615             fn = (HWord)h_generic_calc_ShrN16x4; goto shifty;
2616          case Iop_SarN32x2:
2617             fn = (HWord)h_generic_calc_SarN32x2; goto shifty;
2618          case Iop_SarN16x4:
2619             fn = (HWord)h_generic_calc_SarN16x4; goto shifty;
2620          case Iop_SarN8x8:
2621             fn = (HWord)h_generic_calc_SarN8x8;  goto shifty;
2622          shifty: {
2623             /* Note: the following assumes all helpers are of
2624                signature
2625                   ULong fn ( ULong, UInt ), and they are
2626                not marked as regparm functions.
2627             */
2628             HReg xLo, xHi;
2629             HReg tLo = newVRegI(env);
2630             HReg tHi = newVRegI(env);
2631             X86RMI* y = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
2632             addInstr(env, X86Instr_Push(y));
2633             iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2634             addInstr(env, X86Instr_Push(X86RMI_Reg(xHi)));
2635             addInstr(env, X86Instr_Push(X86RMI_Reg(xLo)));
2636             addInstr(env, X86Instr_Call( Xcc_ALWAYS, (Addr32)fn,
2637                                          0, mk_RetLoc_simple(RLPri_2Int) ));
2638             add_to_esp(env, 3*4);
2639             addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2640             addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2641             *rHi = tHi;
2642             *rLo = tLo;
2643             return;
2644          }
2645 
2646          default:
2647             break;
2648       }
2649    } /* if (e->tag == Iex_Binop) */
2650 
2651 
2652    /* --------- UNARY ops --------- */
2653    if (e->tag == Iex_Unop) {
2654       switch (e->Iex.Unop.op) {
2655 
2656          /* 32Sto64(e) */
2657          case Iop_32Sto64: {
2658             HReg tLo = newVRegI(env);
2659             HReg tHi = newVRegI(env);
2660             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2661             addInstr(env, mk_iMOVsd_RR(src,tHi));
2662             addInstr(env, mk_iMOVsd_RR(src,tLo));
2663             addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tHi));
2664             *rHi = tHi;
2665             *rLo = tLo;
2666             return;
2667          }
2668 
2669          /* 32Uto64(e) */
2670          case Iop_32Uto64: {
2671             HReg tLo = newVRegI(env);
2672             HReg tHi = newVRegI(env);
2673             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2674             addInstr(env, mk_iMOVsd_RR(src,tLo));
2675             addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tHi));
2676             *rHi = tHi;
2677             *rLo = tLo;
2678             return;
2679          }
2680 
2681          /* 16Uto64(e) */
2682          case Iop_16Uto64: {
2683             HReg tLo = newVRegI(env);
2684             HReg tHi = newVRegI(env);
2685             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2686             addInstr(env, mk_iMOVsd_RR(src,tLo));
2687             addInstr(env, X86Instr_Alu32R(Xalu_AND,
2688                                           X86RMI_Imm(0xFFFF), tLo));
2689             addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tHi));
2690             *rHi = tHi;
2691             *rLo = tLo;
2692             return;
2693          }
2694 
2695          /* V128{HI}to64 */
2696          case Iop_V128HIto64:
2697          case Iop_V128to64: {
2698             Int  off = e->Iex.Unop.op==Iop_V128HIto64 ? 8 : 0;
2699             HReg tLo = newVRegI(env);
2700             HReg tHi = newVRegI(env);
2701             HReg vec = iselVecExpr(env, e->Iex.Unop.arg);
2702             X86AMode* esp0  = X86AMode_IR(0,     hregX86_ESP());
2703             X86AMode* espLO = X86AMode_IR(off,   hregX86_ESP());
2704             X86AMode* espHI = X86AMode_IR(off+4, hregX86_ESP());
2705             sub_from_esp(env, 16);
2706             addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, esp0));
2707             addInstr(env, X86Instr_Alu32R( Xalu_MOV,
2708                                            X86RMI_Mem(espLO), tLo ));
2709             addInstr(env, X86Instr_Alu32R( Xalu_MOV,
2710                                            X86RMI_Mem(espHI), tHi ));
2711             add_to_esp(env, 16);
2712             *rHi = tHi;
2713             *rLo = tLo;
2714             return;
2715          }
2716 
2717          /* could do better than this, but for now ... */
2718          case Iop_1Sto64: {
2719             HReg tLo = newVRegI(env);
2720             HReg tHi = newVRegI(env);
2721             X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
2722             addInstr(env, X86Instr_Set32(cond,tLo));
2723             addInstr(env, X86Instr_Sh32(Xsh_SHL, 31, tLo));
2724             addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tLo));
2725             addInstr(env, mk_iMOVsd_RR(tLo, tHi));
2726             *rHi = tHi;
2727             *rLo = tLo;
2728             return;
2729          }
2730 
2731          /* Not64(e) */
2732          case Iop_Not64: {
2733             HReg tLo = newVRegI(env);
2734             HReg tHi = newVRegI(env);
2735             HReg sHi, sLo;
2736             iselInt64Expr(&sHi, &sLo, env, e->Iex.Unop.arg);
2737             addInstr(env, mk_iMOVsd_RR(sHi, tHi));
2738             addInstr(env, mk_iMOVsd_RR(sLo, tLo));
2739             addInstr(env, X86Instr_Unary32(Xun_NOT,tHi));
2740             addInstr(env, X86Instr_Unary32(Xun_NOT,tLo));
2741             *rHi = tHi;
2742             *rLo = tLo;
2743             return;
2744          }
2745 
2746          /* Left64(e) */
2747          case Iop_Left64: {
2748             HReg yLo, yHi;
2749             HReg tLo = newVRegI(env);
2750             HReg tHi = newVRegI(env);
2751             /* yHi:yLo = arg */
2752             iselInt64Expr(&yHi, &yLo, env, e->Iex.Unop.arg);
2753             /* tLo = 0 - yLo, and set carry */
2754             addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tLo));
2755             addInstr(env, X86Instr_Alu32R(Xalu_SUB, X86RMI_Reg(yLo), tLo));
2756             /* tHi = 0 - yHi - carry */
2757             addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tHi));
2758             addInstr(env, X86Instr_Alu32R(Xalu_SBB, X86RMI_Reg(yHi), tHi));
2759             /* So now we have tHi:tLo = -arg.  To finish off, or 'arg'
2760                back in, so as to give the final result
2761                tHi:tLo = arg | -arg. */
2762             addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(yLo), tLo));
2763             addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(yHi), tHi));
2764             *rHi = tHi;
2765             *rLo = tLo;
2766             return;
2767          }
2768 
2769          /* --- patterns rooted at: CmpwNEZ64 --- */
2770 
2771          /* CmpwNEZ64(e) */
2772          case Iop_CmpwNEZ64: {
2773 
2774          DECLARE_PATTERN(p_CmpwNEZ64_Or64);
2775          DEFINE_PATTERN(p_CmpwNEZ64_Or64,
2776                         unop(Iop_CmpwNEZ64,binop(Iop_Or64,bind(0),bind(1))));
2777          if (matchIRExpr(&mi, p_CmpwNEZ64_Or64, e)) {
2778             /* CmpwNEZ64(Or64(x,y)) */
2779             HReg xHi,xLo,yHi,yLo;
2780             HReg xBoth = newVRegI(env);
2781             HReg merged = newVRegI(env);
2782             HReg tmp2 = newVRegI(env);
2783 
2784             iselInt64Expr(&xHi,&xLo, env, mi.bindee[0]);
2785             addInstr(env, mk_iMOVsd_RR(xHi,xBoth));
2786             addInstr(env, X86Instr_Alu32R(Xalu_OR,
2787                                           X86RMI_Reg(xLo),xBoth));
2788 
2789             iselInt64Expr(&yHi,&yLo, env, mi.bindee[1]);
2790             addInstr(env, mk_iMOVsd_RR(yHi,merged));
2791             addInstr(env, X86Instr_Alu32R(Xalu_OR,
2792                                           X86RMI_Reg(yLo),merged));
2793             addInstr(env, X86Instr_Alu32R(Xalu_OR,
2794                                              X86RMI_Reg(xBoth),merged));
2795 
2796             /* tmp2 = (merged | -merged) >>s 31 */
2797             addInstr(env, mk_iMOVsd_RR(merged,tmp2));
2798             addInstr(env, X86Instr_Unary32(Xun_NEG,tmp2));
2799             addInstr(env, X86Instr_Alu32R(Xalu_OR,
2800                                           X86RMI_Reg(merged), tmp2));
2801             addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tmp2));
2802             *rHi = tmp2;
2803             *rLo = tmp2;
2804             return;
2805          } else {
2806             /* CmpwNEZ64(e) */
2807             HReg srcLo, srcHi;
2808             HReg tmp1  = newVRegI(env);
2809             HReg tmp2  = newVRegI(env);
2810             /* srcHi:srcLo = arg */
2811             iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg);
2812             /* tmp1 = srcHi | srcLo */
2813             addInstr(env, mk_iMOVsd_RR(srcHi,tmp1));
2814             addInstr(env, X86Instr_Alu32R(Xalu_OR,
2815                                           X86RMI_Reg(srcLo), tmp1));
2816             /* tmp2 = (tmp1 | -tmp1) >>s 31 */
2817             addInstr(env, mk_iMOVsd_RR(tmp1,tmp2));
2818             addInstr(env, X86Instr_Unary32(Xun_NEG,tmp2));
2819             addInstr(env, X86Instr_Alu32R(Xalu_OR,
2820                                           X86RMI_Reg(tmp1), tmp2));
2821             addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tmp2));
2822             *rHi = tmp2;
2823             *rLo = tmp2;
2824             return;
2825          }
2826          }
2827 
2828          /* ReinterpF64asI64(e) */
2829          /* Given an IEEE754 double, produce an I64 with the same bit
2830             pattern. */
2831          case Iop_ReinterpF64asI64: {
2832             HReg rf   = iselDblExpr(env, e->Iex.Unop.arg);
2833             HReg tLo  = newVRegI(env);
2834             HReg tHi  = newVRegI(env);
2835             X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
2836             X86AMode* four_esp = X86AMode_IR(4, hregX86_ESP());
2837             /* paranoia */
2838             set_FPU_rounding_default(env);
2839             /* subl $8, %esp */
2840             sub_from_esp(env, 8);
2841             /* gstD %rf, 0(%esp) */
2842             addInstr(env,
2843                      X86Instr_FpLdSt(False/*store*/, 8, rf, zero_esp));
2844             /* movl 0(%esp), %tLo */
2845             addInstr(env,
2846                      X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(zero_esp), tLo));
2847             /* movl 4(%esp), %tHi */
2848             addInstr(env,
2849                      X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(four_esp), tHi));
2850             /* addl $8, %esp */
2851             add_to_esp(env, 8);
2852             *rHi = tHi;
2853             *rLo = tLo;
2854             return;
2855          }
2856 
2857          case Iop_CmpNEZ32x2:
2858             fn = (HWord)h_generic_calc_CmpNEZ32x2; goto unish;
2859          case Iop_CmpNEZ16x4:
2860             fn = (HWord)h_generic_calc_CmpNEZ16x4; goto unish;
2861          case Iop_CmpNEZ8x8:
2862             fn = (HWord)h_generic_calc_CmpNEZ8x8; goto unish;
2863          unish: {
2864             /* Note: the following assumes all helpers are of
2865                signature
2866                   ULong fn ( ULong ), and they are
2867                not marked as regparm functions.
2868             */
2869             HReg xLo, xHi;
2870             HReg tLo = newVRegI(env);
2871             HReg tHi = newVRegI(env);
2872             iselInt64Expr(&xHi, &xLo, env, e->Iex.Unop.arg);
2873             addInstr(env, X86Instr_Push(X86RMI_Reg(xHi)));
2874             addInstr(env, X86Instr_Push(X86RMI_Reg(xLo)));
2875             addInstr(env, X86Instr_Call( Xcc_ALWAYS, (Addr32)fn,
2876                                          0, mk_RetLoc_simple(RLPri_2Int) ));
2877             add_to_esp(env, 2*4);
2878             addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2879             addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2880             *rHi = tHi;
2881             *rLo = tLo;
2882             return;
2883          }
2884 
2885          default:
2886             break;
2887       }
2888    } /* if (e->tag == Iex_Unop) */
2889 
2890 
2891    /* --------- CCALL --------- */
2892    if (e->tag == Iex_CCall) {
2893       HReg tLo = newVRegI(env);
2894       HReg tHi = newVRegI(env);
2895 
2896       /* Marshal args, do the call, clear stack. */
2897       UInt   addToSp = 0;
2898       RetLoc rloc    = mk_RetLoc_INVALID();
2899       doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
2900                     e->Iex.CCall.cee,
2901                     e->Iex.CCall.retty, e->Iex.CCall.args );
2902       vassert(is_sane_RetLoc(rloc));
2903       vassert(rloc.pri == RLPri_2Int);
2904       vassert(addToSp == 0);
2905       /* */
2906 
2907       addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2908       addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2909       *rHi = tHi;
2910       *rLo = tLo;
2911       return;
2912    }
2913 
2914    ppIRExpr(e);
2915    vpanic("iselInt64Expr");
2916 }
2917 
2918 
2919 /*---------------------------------------------------------*/
2920 /*--- ISEL: Floating point expressions (32 bit)         ---*/
2921 /*---------------------------------------------------------*/
2922 
2923 /* Nothing interesting here; really just wrappers for
2924    64-bit stuff. */
2925 
iselFltExpr(ISelEnv * env,const IRExpr * e)2926 static HReg iselFltExpr ( ISelEnv* env, const IRExpr* e )
2927 {
2928    HReg r = iselFltExpr_wrk( env, e );
2929 #  if 0
2930    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2931 #  endif
2932    vassert(hregClass(r) == HRcFlt64); /* yes, really Flt64 */
2933    vassert(hregIsVirtual(r));
2934    return r;
2935 }
2936 
2937 /* DO NOT CALL THIS DIRECTLY */
iselFltExpr_wrk(ISelEnv * env,const IRExpr * e)2938 static HReg iselFltExpr_wrk ( ISelEnv* env, const IRExpr* e )
2939 {
2940    IRType ty = typeOfIRExpr(env->type_env,e);
2941    vassert(ty == Ity_F32);
2942 
2943    if (e->tag == Iex_RdTmp) {
2944       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
2945    }
2946 
2947    if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
2948       X86AMode* am;
2949       HReg res = newVRegF(env);
2950       vassert(e->Iex.Load.ty == Ity_F32);
2951       am = iselIntExpr_AMode(env, e->Iex.Load.addr);
2952       addInstr(env, X86Instr_FpLdSt(True/*load*/, 4, res, am));
2953       return res;
2954    }
2955 
2956    if (e->tag == Iex_Binop
2957        && e->Iex.Binop.op == Iop_F64toF32) {
2958       /* Although the result is still held in a standard FPU register,
2959          we need to round it to reflect the loss of accuracy/range
2960          entailed in casting it to a 32-bit float. */
2961       HReg dst = newVRegF(env);
2962       HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
2963       set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
2964       addInstr(env, X86Instr_Fp64to32(src,dst));
2965       set_FPU_rounding_default( env );
2966       return dst;
2967    }
2968 
2969    if (e->tag == Iex_Get) {
2970       X86AMode* am = X86AMode_IR( e->Iex.Get.offset,
2971                                   hregX86_EBP() );
2972       HReg res = newVRegF(env);
2973       addInstr(env, X86Instr_FpLdSt( True/*load*/, 4, res, am ));
2974       return res;
2975    }
2976 
2977    if (e->tag == Iex_Unop
2978        && e->Iex.Unop.op == Iop_ReinterpI32asF32) {
2979        /* Given an I32, produce an IEEE754 float with the same bit
2980           pattern. */
2981       HReg    dst = newVRegF(env);
2982       X86RMI* rmi = iselIntExpr_RMI(env, e->Iex.Unop.arg);
2983       /* paranoia */
2984       addInstr(env, X86Instr_Push(rmi));
2985       addInstr(env, X86Instr_FpLdSt(
2986                        True/*load*/, 4, dst,
2987                        X86AMode_IR(0, hregX86_ESP())));
2988       add_to_esp(env, 4);
2989       return dst;
2990    }
2991 
2992    if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF32toInt) {
2993       HReg rf  = iselFltExpr(env, e->Iex.Binop.arg2);
2994       HReg dst = newVRegF(env);
2995 
2996       /* rf now holds the value to be rounded.  The first thing to do
2997          is set the FPU's rounding mode accordingly. */
2998 
2999       /* Set host rounding mode */
3000       set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
3001 
3002       /* grndint %rf, %dst */
3003       addInstr(env, X86Instr_FpUnary(Xfp_ROUND, rf, dst));
3004 
3005       /* Restore default FPU rounding. */
3006       set_FPU_rounding_default( env );
3007 
3008       return dst;
3009    }
3010 
3011    ppIRExpr(e);
3012    vpanic("iselFltExpr_wrk");
3013 }
3014 
3015 
3016 /*---------------------------------------------------------*/
3017 /*--- ISEL: Floating point expressions (64 bit)         ---*/
3018 /*---------------------------------------------------------*/
3019 
3020 /* Compute a 64-bit floating point value into a register, the identity
3021    of which is returned.  As with iselIntExpr_R, the reg may be either
3022    real or virtual; in any case it must not be changed by subsequent
3023    code emitted by the caller.  */
3024 
3025 /* IEEE 754 formats.  From http://www.freesoft.org/CIE/RFC/1832/32.htm:
3026 
3027     Type                  S (1 bit)   E (11 bits)   F (52 bits)
3028     ----                  ---------   -----------   -----------
3029     signalling NaN        u           2047 (max)    .0uuuuu---u
3030                                                     (with at least
3031                                                      one 1 bit)
3032     quiet NaN             u           2047 (max)    .1uuuuu---u
3033 
3034     negative infinity     1           2047 (max)    .000000---0
3035 
3036     positive infinity     0           2047 (max)    .000000---0
3037 
3038     negative zero         1           0             .000000---0
3039 
3040     positive zero         0           0             .000000---0
3041 */
3042 
iselDblExpr(ISelEnv * env,const IRExpr * e)3043 static HReg iselDblExpr ( ISelEnv* env, const IRExpr* e )
3044 {
3045    HReg r = iselDblExpr_wrk( env, e );
3046 #  if 0
3047    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3048 #  endif
3049    vassert(hregClass(r) == HRcFlt64);
3050    vassert(hregIsVirtual(r));
3051    return r;
3052 }
3053 
3054 /* DO NOT CALL THIS DIRECTLY */
iselDblExpr_wrk(ISelEnv * env,const IRExpr * e)3055 static HReg iselDblExpr_wrk ( ISelEnv* env, const IRExpr* e )
3056 {
3057    IRType ty = typeOfIRExpr(env->type_env,e);
3058    vassert(e);
3059    vassert(ty == Ity_F64);
3060 
3061    if (e->tag == Iex_RdTmp) {
3062       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
3063    }
3064 
3065    if (e->tag == Iex_Const) {
3066       union { UInt u32x2[2]; ULong u64; Double f64; } u;
3067       HReg freg = newVRegF(env);
3068       vassert(sizeof(u) == 8);
3069       vassert(sizeof(u.u64) == 8);
3070       vassert(sizeof(u.f64) == 8);
3071       vassert(sizeof(u.u32x2) == 8);
3072 
3073       if (e->Iex.Const.con->tag == Ico_F64) {
3074          u.f64 = e->Iex.Const.con->Ico.F64;
3075       }
3076       else if (e->Iex.Const.con->tag == Ico_F64i) {
3077          u.u64 = e->Iex.Const.con->Ico.F64i;
3078       }
3079       else
3080          vpanic("iselDblExpr(x86): const");
3081 
3082       addInstr(env, X86Instr_Push(X86RMI_Imm(u.u32x2[1])));
3083       addInstr(env, X86Instr_Push(X86RMI_Imm(u.u32x2[0])));
3084       addInstr(env, X86Instr_FpLdSt(True/*load*/, 8, freg,
3085                                     X86AMode_IR(0, hregX86_ESP())));
3086       add_to_esp(env, 8);
3087       return freg;
3088    }
3089 
3090    if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
3091       X86AMode* am;
3092       HReg res = newVRegF(env);
3093       vassert(e->Iex.Load.ty == Ity_F64);
3094       am = iselIntExpr_AMode(env, e->Iex.Load.addr);
3095       addInstr(env, X86Instr_FpLdSt(True/*load*/, 8, res, am));
3096       return res;
3097    }
3098 
3099    if (e->tag == Iex_Get) {
3100       X86AMode* am = X86AMode_IR( e->Iex.Get.offset,
3101                                   hregX86_EBP() );
3102       HReg res = newVRegF(env);
3103       addInstr(env, X86Instr_FpLdSt( True/*load*/, 8, res, am ));
3104       return res;
3105    }
3106 
3107    if (e->tag == Iex_GetI) {
3108       X86AMode* am
3109          = genGuestArrayOffset(
3110               env, e->Iex.GetI.descr,
3111                    e->Iex.GetI.ix, e->Iex.GetI.bias );
3112       HReg res = newVRegF(env);
3113       addInstr(env, X86Instr_FpLdSt( True/*load*/, 8, res, am ));
3114       return res;
3115    }
3116 
3117    if (e->tag == Iex_Triop) {
3118       X86FpOp fpop = Xfp_INVALID;
3119       IRTriop *triop = e->Iex.Triop.details;
3120       switch (triop->op) {
3121          case Iop_AddF64:    fpop = Xfp_ADD; break;
3122          case Iop_SubF64:    fpop = Xfp_SUB; break;
3123          case Iop_MulF64:    fpop = Xfp_MUL; break;
3124          case Iop_DivF64:    fpop = Xfp_DIV; break;
3125          case Iop_ScaleF64:  fpop = Xfp_SCALE; break;
3126          case Iop_Yl2xF64:   fpop = Xfp_YL2X; break;
3127          case Iop_Yl2xp1F64: fpop = Xfp_YL2XP1; break;
3128          case Iop_AtanF64:   fpop = Xfp_ATAN; break;
3129          case Iop_PRemF64:   fpop = Xfp_PREM; break;
3130          case Iop_PRem1F64:  fpop = Xfp_PREM1; break;
3131          default: break;
3132       }
3133       if (fpop != Xfp_INVALID) {
3134          HReg res  = newVRegF(env);
3135          HReg srcL = iselDblExpr(env, triop->arg2);
3136          HReg srcR = iselDblExpr(env, triop->arg3);
3137          /* XXXROUNDINGFIXME */
3138          /* set roundingmode here */
3139          addInstr(env, X86Instr_FpBinary(fpop,srcL,srcR,res));
3140 	 if (fpop != Xfp_ADD && fpop != Xfp_SUB
3141 	     && fpop != Xfp_MUL && fpop != Xfp_DIV)
3142             roundToF64(env, res);
3143          return res;
3144       }
3145    }
3146 
3147    if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF64toInt) {
3148       HReg rf  = iselDblExpr(env, e->Iex.Binop.arg2);
3149       HReg dst = newVRegF(env);
3150 
3151       /* rf now holds the value to be rounded.  The first thing to do
3152          is set the FPU's rounding mode accordingly. */
3153 
3154       /* Set host rounding mode */
3155       set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
3156 
3157       /* grndint %rf, %dst */
3158       addInstr(env, X86Instr_FpUnary(Xfp_ROUND, rf, dst));
3159 
3160       /* Restore default FPU rounding. */
3161       set_FPU_rounding_default( env );
3162 
3163       return dst;
3164    }
3165 
3166    if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_I64StoF64) {
3167       HReg dst = newVRegF(env);
3168       HReg rHi,rLo;
3169       iselInt64Expr( &rHi, &rLo, env, e->Iex.Binop.arg2);
3170       addInstr(env, X86Instr_Push(X86RMI_Reg(rHi)));
3171       addInstr(env, X86Instr_Push(X86RMI_Reg(rLo)));
3172 
3173       /* Set host rounding mode */
3174       set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
3175 
3176       addInstr(env, X86Instr_FpLdStI(
3177                        True/*load*/, 8, dst,
3178                        X86AMode_IR(0, hregX86_ESP())));
3179 
3180       /* Restore default FPU rounding. */
3181       set_FPU_rounding_default( env );
3182 
3183       add_to_esp(env, 8);
3184       return dst;
3185    }
3186 
3187    if (e->tag == Iex_Binop) {
3188       X86FpOp fpop = Xfp_INVALID;
3189       switch (e->Iex.Binop.op) {
3190          case Iop_SinF64:  fpop = Xfp_SIN; break;
3191          case Iop_CosF64:  fpop = Xfp_COS; break;
3192          case Iop_TanF64:  fpop = Xfp_TAN; break;
3193          case Iop_2xm1F64: fpop = Xfp_2XM1; break;
3194          case Iop_SqrtF64: fpop = Xfp_SQRT; break;
3195          default: break;
3196       }
3197       if (fpop != Xfp_INVALID) {
3198          HReg res = newVRegF(env);
3199          HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
3200          /* XXXROUNDINGFIXME */
3201          /* set roundingmode here */
3202          /* Note that X86Instr_FpUnary(Xfp_TAN,..) sets the condition
3203             codes.  I don't think that matters, since this insn
3204             selector never generates such an instruction intervening
3205             between an flag-setting instruction and a flag-using
3206             instruction. */
3207          addInstr(env, X86Instr_FpUnary(fpop,src,res));
3208 	 if (fpop != Xfp_SQRT
3209              && fpop != Xfp_NEG && fpop != Xfp_ABS)
3210             roundToF64(env, res);
3211          return res;
3212       }
3213    }
3214 
3215    if (e->tag == Iex_Unop) {
3216       X86FpOp fpop = Xfp_INVALID;
3217       switch (e->Iex.Unop.op) {
3218          case Iop_NegF64:  fpop = Xfp_NEG; break;
3219          case Iop_AbsF64:  fpop = Xfp_ABS; break;
3220          default: break;
3221       }
3222       if (fpop != Xfp_INVALID) {
3223          HReg res = newVRegF(env);
3224          HReg src = iselDblExpr(env, e->Iex.Unop.arg);
3225          addInstr(env, X86Instr_FpUnary(fpop,src,res));
3226          /* No need to do roundToF64(env,res) for Xfp_NEG or Xfp_ABS,
3227             but might need to do that for other unary ops. */
3228          return res;
3229       }
3230    }
3231 
3232    if (e->tag == Iex_Unop) {
3233       switch (e->Iex.Unop.op) {
3234          case Iop_I32StoF64: {
3235             HReg dst = newVRegF(env);
3236             HReg ri  = iselIntExpr_R(env, e->Iex.Unop.arg);
3237             addInstr(env, X86Instr_Push(X86RMI_Reg(ri)));
3238             set_FPU_rounding_default(env);
3239             addInstr(env, X86Instr_FpLdStI(
3240                              True/*load*/, 4, dst,
3241                              X86AMode_IR(0, hregX86_ESP())));
3242 	    add_to_esp(env, 4);
3243             return dst;
3244          }
3245          case Iop_ReinterpI64asF64: {
3246             /* Given an I64, produce an IEEE754 double with the same
3247                bit pattern. */
3248             HReg dst = newVRegF(env);
3249             HReg rHi, rLo;
3250 	    iselInt64Expr( &rHi, &rLo, env, e->Iex.Unop.arg);
3251             /* paranoia */
3252             set_FPU_rounding_default(env);
3253             addInstr(env, X86Instr_Push(X86RMI_Reg(rHi)));
3254             addInstr(env, X86Instr_Push(X86RMI_Reg(rLo)));
3255             addInstr(env, X86Instr_FpLdSt(
3256                              True/*load*/, 8, dst,
3257                              X86AMode_IR(0, hregX86_ESP())));
3258 	    add_to_esp(env, 8);
3259             return dst;
3260 	 }
3261          case Iop_F32toF64: {
3262             /* this is a no-op */
3263             HReg res = iselFltExpr(env, e->Iex.Unop.arg);
3264             return res;
3265 	 }
3266          default:
3267             break;
3268       }
3269    }
3270 
3271    /* --------- MULTIPLEX --------- */
3272    if (e->tag == Iex_ITE) { // VFD
3273      if (ty == Ity_F64
3274          && typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) {
3275         HReg r1  = iselDblExpr(env, e->Iex.ITE.iftrue);
3276         HReg r0  = iselDblExpr(env, e->Iex.ITE.iffalse);
3277         HReg dst = newVRegF(env);
3278         addInstr(env, X86Instr_FpUnary(Xfp_MOV,r1,dst));
3279         X86CondCode cc = iselCondCode(env, e->Iex.ITE.cond);
3280         addInstr(env, X86Instr_FpCMov(cc ^ 1, r0, dst));
3281         return dst;
3282       }
3283    }
3284 
3285    ppIRExpr(e);
3286    vpanic("iselDblExpr_wrk");
3287 }
3288 
3289 
3290 /*---------------------------------------------------------*/
3291 /*--- ISEL: SIMD (Vector) expressions, 128 bit.         ---*/
3292 /*---------------------------------------------------------*/
3293 
iselVecExpr(ISelEnv * env,const IRExpr * e)3294 static HReg iselVecExpr ( ISelEnv* env, const IRExpr* e )
3295 {
3296    HReg r = iselVecExpr_wrk( env, e );
3297 #  if 0
3298    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3299 #  endif
3300    vassert(hregClass(r) == HRcVec128);
3301    vassert(hregIsVirtual(r));
3302    return r;
3303 }
3304 
3305 
3306 /* DO NOT CALL THIS DIRECTLY */
iselVecExpr_wrk(ISelEnv * env,const IRExpr * e)3307 static HReg iselVecExpr_wrk ( ISelEnv* env, const IRExpr* e )
3308 {
3309 
3310 #  define REQUIRE_SSE1                                    \
3311       do { if (env->hwcaps == 0/*baseline, no sse*/       \
3312                ||  env->hwcaps == VEX_HWCAPS_X86_MMXEXT /*Integer SSE*/) \
3313               goto vec_fail;                              \
3314       } while (0)
3315 
3316 #  define REQUIRE_SSE2                                    \
3317       do { if (0 == (env->hwcaps & VEX_HWCAPS_X86_SSE2))  \
3318               goto vec_fail;                              \
3319       } while (0)
3320 
3321 #  define SSE2_OR_ABOVE                                   \
3322        (env->hwcaps & VEX_HWCAPS_X86_SSE2)
3323 
3324    HWord     fn = 0; /* address of helper fn, if required */
3325    MatchInfo mi;
3326    Bool      arg1isEReg = False;
3327    X86SseOp  op = Xsse_INVALID;
3328    IRType    ty = typeOfIRExpr(env->type_env,e);
3329    vassert(e);
3330    vassert(ty == Ity_V128);
3331 
3332    REQUIRE_SSE1;
3333 
3334    if (e->tag == Iex_RdTmp) {
3335       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
3336    }
3337 
3338    if (e->tag == Iex_Get) {
3339       HReg dst = newVRegV(env);
3340       addInstr(env, X86Instr_SseLdSt(
3341                        True/*load*/,
3342                        dst,
3343                        X86AMode_IR(e->Iex.Get.offset, hregX86_EBP())
3344                     )
3345               );
3346       return dst;
3347    }
3348 
3349    if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
3350       HReg      dst = newVRegV(env);
3351       X86AMode* am  = iselIntExpr_AMode(env, e->Iex.Load.addr);
3352       addInstr(env, X86Instr_SseLdSt( True/*load*/, dst, am ));
3353       return dst;
3354    }
3355 
3356    if (e->tag == Iex_Const) {
3357       HReg dst = newVRegV(env);
3358       vassert(e->Iex.Const.con->tag == Ico_V128);
3359       addInstr(env, X86Instr_SseConst(e->Iex.Const.con->Ico.V128, dst));
3360       return dst;
3361    }
3362 
3363    if (e->tag == Iex_Unop) {
3364 
3365    if (SSE2_OR_ABOVE) {
3366       /* 64UtoV128(LDle:I64(addr)) */
3367       DECLARE_PATTERN(p_zwiden_load64);
3368       DEFINE_PATTERN(p_zwiden_load64,
3369                      unop(Iop_64UtoV128,
3370                           IRExpr_Load(Iend_LE,Ity_I64,bind(0))));
3371       if (matchIRExpr(&mi, p_zwiden_load64, e)) {
3372          X86AMode* am = iselIntExpr_AMode(env, mi.bindee[0]);
3373          HReg dst = newVRegV(env);
3374          addInstr(env, X86Instr_SseLdzLO(8, dst, am));
3375          return dst;
3376       }
3377    }
3378 
3379    switch (e->Iex.Unop.op) {
3380 
3381       case Iop_NotV128: {
3382          HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3383          return do_sse_Not128(env, arg);
3384       }
3385 
3386       case Iop_CmpNEZ64x2: {
3387          /* We can use SSE2 instructions for this. */
3388          /* Ideally, we want to do a 64Ix2 comparison against zero of
3389             the operand.  Problem is no such insn exists.  Solution
3390             therefore is to do a 32Ix4 comparison instead, and bitwise-
3391             negate (NOT) the result.  Let a,b,c,d be 32-bit lanes, and
3392             let the not'd result of this initial comparison be a:b:c:d.
3393             What we need to compute is (a|b):(a|b):(c|d):(c|d).  So, use
3394             pshufd to create a value b:a:d:c, and OR that with a:b:c:d,
3395             giving the required result.
3396 
3397             The required selection sequence is 2,3,0,1, which
3398             according to Intel's documentation means the pshufd
3399             literal value is 0xB1, that is,
3400             (2 << 6) | (3 << 4) | (0 << 2) | (1 << 0)
3401          */
3402          HReg arg  = iselVecExpr(env, e->Iex.Unop.arg);
3403          HReg tmp  = newVRegV(env);
3404          HReg dst  = newVRegV(env);
3405          REQUIRE_SSE2;
3406          addInstr(env, X86Instr_SseReRg(Xsse_XOR, tmp, tmp));
3407          addInstr(env, X86Instr_SseReRg(Xsse_CMPEQ32, arg, tmp));
3408          tmp = do_sse_Not128(env, tmp);
3409          addInstr(env, X86Instr_SseShuf(0xB1, tmp, dst));
3410          addInstr(env, X86Instr_SseReRg(Xsse_OR, tmp, dst));
3411          return dst;
3412       }
3413 
3414       case Iop_CmpNEZ32x4: {
3415          /* Sigh, we have to generate lousy code since this has to
3416             work on SSE1 hosts */
3417          /* basically, the idea is: for each lane:
3418                movl lane, %r ; negl %r   (now CF = lane==0 ? 0 : 1)
3419                sbbl %r, %r               (now %r = 1Sto32(CF))
3420                movl %r, lane
3421          */
3422          Int       i;
3423          X86AMode* am;
3424          X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3425          HReg      arg  = iselVecExpr(env, e->Iex.Unop.arg);
3426          HReg      dst  = newVRegV(env);
3427          HReg      r32  = newVRegI(env);
3428          sub_from_esp(env, 16);
3429          addInstr(env, X86Instr_SseLdSt(False/*store*/, arg, esp0));
3430          for (i = 0; i < 4; i++) {
3431             am = X86AMode_IR(i*4, hregX86_ESP());
3432             addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(am), r32));
3433             addInstr(env, X86Instr_Unary32(Xun_NEG, r32));
3434             addInstr(env, X86Instr_Alu32R(Xalu_SBB, X86RMI_Reg(r32), r32));
3435             addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r32), am));
3436          }
3437          addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0));
3438          add_to_esp(env, 16);
3439          return dst;
3440       }
3441 
3442       case Iop_CmpNEZ8x16:
3443       case Iop_CmpNEZ16x8: {
3444          /* We can use SSE2 instructions for this. */
3445          HReg arg;
3446          HReg vec0 = newVRegV(env);
3447          HReg vec1 = newVRegV(env);
3448          HReg dst  = newVRegV(env);
3449          X86SseOp cmpOp
3450             = e->Iex.Unop.op==Iop_CmpNEZ16x8 ? Xsse_CMPEQ16
3451                                              : Xsse_CMPEQ8;
3452          REQUIRE_SSE2;
3453          addInstr(env, X86Instr_SseReRg(Xsse_XOR, vec0, vec0));
3454          addInstr(env, mk_vMOVsd_RR(vec0, vec1));
3455          addInstr(env, X86Instr_Sse32Fx4(Xsse_CMPEQF, vec1, vec1));
3456          /* defer arg computation to here so as to give CMPEQF as long
3457             as possible to complete */
3458          arg = iselVecExpr(env, e->Iex.Unop.arg);
3459          /* vec0 is all 0s; vec1 is all 1s */
3460          addInstr(env, mk_vMOVsd_RR(arg, dst));
3461          /* 16x8 or 8x16 comparison == */
3462          addInstr(env, X86Instr_SseReRg(cmpOp, vec0, dst));
3463          /* invert result */
3464          addInstr(env, X86Instr_SseReRg(Xsse_XOR, vec1, dst));
3465          return dst;
3466       }
3467 
3468       case Iop_RecipEst32Fx4: op = Xsse_RCPF;   goto do_32Fx4_unary;
3469       case Iop_RSqrtEst32Fx4: op = Xsse_RSQRTF; goto do_32Fx4_unary;
3470       do_32Fx4_unary:
3471       {
3472          HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3473          HReg dst = newVRegV(env);
3474          addInstr(env, X86Instr_Sse32Fx4(op, arg, dst));
3475          return dst;
3476       }
3477 
3478       case Iop_RecipEst32F0x4: op = Xsse_RCPF;   goto do_32F0x4_unary;
3479       case Iop_RSqrtEst32F0x4: op = Xsse_RSQRTF; goto do_32F0x4_unary;
3480       case Iop_Sqrt32F0x4:     op = Xsse_SQRTF;  goto do_32F0x4_unary;
3481       do_32F0x4_unary:
3482       {
3483          /* A bit subtle.  We have to copy the arg to the result
3484             register first, because actually doing the SSE scalar insn
3485             leaves the upper 3/4 of the destination register
3486             unchanged.  Whereas the required semantics of these
3487             primops is that the upper 3/4 is simply copied in from the
3488             argument. */
3489          HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3490          HReg dst = newVRegV(env);
3491          addInstr(env, mk_vMOVsd_RR(arg, dst));
3492          addInstr(env, X86Instr_Sse32FLo(op, arg, dst));
3493          return dst;
3494       }
3495 
3496       case Iop_Sqrt64F0x2:  op = Xsse_SQRTF;  goto do_64F0x2_unary;
3497       do_64F0x2_unary:
3498       {
3499          /* A bit subtle.  We have to copy the arg to the result
3500             register first, because actually doing the SSE scalar insn
3501             leaves the upper half of the destination register
3502             unchanged.  Whereas the required semantics of these
3503             primops is that the upper half is simply copied in from the
3504             argument. */
3505          HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3506          HReg dst = newVRegV(env);
3507          REQUIRE_SSE2;
3508          addInstr(env, mk_vMOVsd_RR(arg, dst));
3509          addInstr(env, X86Instr_Sse64FLo(op, arg, dst));
3510          return dst;
3511       }
3512 
3513       case Iop_32UtoV128: {
3514          HReg      dst  = newVRegV(env);
3515          X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3516          X86RMI*   rmi  = iselIntExpr_RMI(env, e->Iex.Unop.arg);
3517          addInstr(env, X86Instr_Push(rmi));
3518 	 addInstr(env, X86Instr_SseLdzLO(4, dst, esp0));
3519          add_to_esp(env, 4);
3520          return dst;
3521       }
3522 
3523       case Iop_64UtoV128: {
3524          HReg      rHi, rLo;
3525          HReg      dst  = newVRegV(env);
3526          X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3527          iselInt64Expr(&rHi, &rLo, env, e->Iex.Unop.arg);
3528          addInstr(env, X86Instr_Push(X86RMI_Reg(rHi)));
3529          addInstr(env, X86Instr_Push(X86RMI_Reg(rLo)));
3530 	 addInstr(env, X86Instr_SseLdzLO(8, dst, esp0));
3531          add_to_esp(env, 8);
3532          return dst;
3533       }
3534 
3535       default:
3536          break;
3537    } /* switch (e->Iex.Unop.op) */
3538    } /* if (e->tag == Iex_Unop) */
3539 
3540    if (e->tag == Iex_Binop) {
3541    switch (e->Iex.Binop.op) {
3542 
3543       case Iop_Sqrt64Fx2:
3544          REQUIRE_SSE2;
3545          /* fallthrough */
3546       case Iop_Sqrt32Fx4: {
3547          /* :: (rmode, vec) -> vec */
3548          HReg arg = iselVecExpr(env, e->Iex.Binop.arg2);
3549          HReg dst = newVRegV(env);
3550          /* XXXROUNDINGFIXME */
3551          /* set roundingmode here */
3552          addInstr(env, (e->Iex.Binop.op == Iop_Sqrt64Fx2
3553                            ? X86Instr_Sse64Fx2 : X86Instr_Sse32Fx4)
3554                        (Xsse_SQRTF, arg, dst));
3555          return dst;
3556       }
3557 
3558       case Iop_SetV128lo32: {
3559          HReg dst = newVRegV(env);
3560          HReg srcV = iselVecExpr(env, e->Iex.Binop.arg1);
3561          HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2);
3562          X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3563          sub_from_esp(env, 16);
3564          addInstr(env, X86Instr_SseLdSt(False/*store*/, srcV, esp0));
3565          addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(srcI), esp0));
3566          addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0));
3567          add_to_esp(env, 16);
3568          return dst;
3569       }
3570 
3571       case Iop_SetV128lo64: {
3572          HReg dst = newVRegV(env);
3573          HReg srcV = iselVecExpr(env, e->Iex.Binop.arg1);
3574          HReg srcIhi, srcIlo;
3575          X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3576          X86AMode* esp4 = advance4(esp0);
3577          iselInt64Expr(&srcIhi, &srcIlo, env, e->Iex.Binop.arg2);
3578          sub_from_esp(env, 16);
3579          addInstr(env, X86Instr_SseLdSt(False/*store*/, srcV, esp0));
3580          addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(srcIlo), esp0));
3581          addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(srcIhi), esp4));
3582          addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0));
3583          add_to_esp(env, 16);
3584          return dst;
3585       }
3586 
3587       case Iop_64HLtoV128: {
3588          HReg r3, r2, r1, r0;
3589          X86AMode* esp0  = X86AMode_IR(0, hregX86_ESP());
3590          X86AMode* esp4  = advance4(esp0);
3591          X86AMode* esp8  = advance4(esp4);
3592          X86AMode* esp12 = advance4(esp8);
3593          HReg dst = newVRegV(env);
3594 	 /* do this via the stack (easy, convenient, etc) */
3595          sub_from_esp(env, 16);
3596          /* Do the less significant 64 bits */
3597          iselInt64Expr(&r1, &r0, env, e->Iex.Binop.arg2);
3598          addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r0), esp0));
3599          addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r1), esp4));
3600          /* Do the more significant 64 bits */
3601          iselInt64Expr(&r3, &r2, env, e->Iex.Binop.arg1);
3602          addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r2), esp8));
3603          addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r3), esp12));
3604 	 /* Fetch result back from stack. */
3605          addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0));
3606          add_to_esp(env, 16);
3607          return dst;
3608       }
3609 
3610       case Iop_CmpEQ32Fx4: op = Xsse_CMPEQF; goto do_32Fx4;
3611       case Iop_CmpLT32Fx4: op = Xsse_CMPLTF; goto do_32Fx4;
3612       case Iop_CmpLE32Fx4: op = Xsse_CMPLEF; goto do_32Fx4;
3613       case Iop_CmpUN32Fx4: op = Xsse_CMPUNF; goto do_32Fx4;
3614       case Iop_Max32Fx4:   op = Xsse_MAXF;   goto do_32Fx4;
3615       case Iop_Min32Fx4:   op = Xsse_MINF;   goto do_32Fx4;
3616       do_32Fx4:
3617       {
3618          HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3619          HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3620          HReg dst = newVRegV(env);
3621          addInstr(env, mk_vMOVsd_RR(argL, dst));
3622          addInstr(env, X86Instr_Sse32Fx4(op, argR, dst));
3623          return dst;
3624       }
3625 
3626       case Iop_CmpEQ64Fx2: op = Xsse_CMPEQF; goto do_64Fx2;
3627       case Iop_CmpLT64Fx2: op = Xsse_CMPLTF; goto do_64Fx2;
3628       case Iop_CmpLE64Fx2: op = Xsse_CMPLEF; goto do_64Fx2;
3629       case Iop_CmpUN64Fx2: op = Xsse_CMPUNF; goto do_64Fx2;
3630       case Iop_Max64Fx2:   op = Xsse_MAXF;   goto do_64Fx2;
3631       case Iop_Min64Fx2:   op = Xsse_MINF;   goto do_64Fx2;
3632       do_64Fx2:
3633       {
3634          HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3635          HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3636          HReg dst = newVRegV(env);
3637          REQUIRE_SSE2;
3638          addInstr(env, mk_vMOVsd_RR(argL, dst));
3639          addInstr(env, X86Instr_Sse64Fx2(op, argR, dst));
3640          return dst;
3641       }
3642 
3643       case Iop_CmpEQ32F0x4: op = Xsse_CMPEQF; goto do_32F0x4;
3644       case Iop_CmpLT32F0x4: op = Xsse_CMPLTF; goto do_32F0x4;
3645       case Iop_CmpLE32F0x4: op = Xsse_CMPLEF; goto do_32F0x4;
3646       case Iop_CmpUN32F0x4: op = Xsse_CMPUNF; goto do_32F0x4;
3647       case Iop_Add32F0x4:   op = Xsse_ADDF;   goto do_32F0x4;
3648       case Iop_Div32F0x4:   op = Xsse_DIVF;   goto do_32F0x4;
3649       case Iop_Max32F0x4:   op = Xsse_MAXF;   goto do_32F0x4;
3650       case Iop_Min32F0x4:   op = Xsse_MINF;   goto do_32F0x4;
3651       case Iop_Mul32F0x4:   op = Xsse_MULF;   goto do_32F0x4;
3652       case Iop_Sub32F0x4:   op = Xsse_SUBF;   goto do_32F0x4;
3653       do_32F0x4: {
3654          HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3655          HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3656          HReg dst = newVRegV(env);
3657          addInstr(env, mk_vMOVsd_RR(argL, dst));
3658          addInstr(env, X86Instr_Sse32FLo(op, argR, dst));
3659          return dst;
3660       }
3661 
3662       case Iop_CmpEQ64F0x2: op = Xsse_CMPEQF; goto do_64F0x2;
3663       case Iop_CmpLT64F0x2: op = Xsse_CMPLTF; goto do_64F0x2;
3664       case Iop_CmpLE64F0x2: op = Xsse_CMPLEF; goto do_64F0x2;
3665       case Iop_CmpUN64F0x2: op = Xsse_CMPUNF; goto do_64F0x2;
3666       case Iop_Add64F0x2:   op = Xsse_ADDF;   goto do_64F0x2;
3667       case Iop_Div64F0x2:   op = Xsse_DIVF;   goto do_64F0x2;
3668       case Iop_Max64F0x2:   op = Xsse_MAXF;   goto do_64F0x2;
3669       case Iop_Min64F0x2:   op = Xsse_MINF;   goto do_64F0x2;
3670       case Iop_Mul64F0x2:   op = Xsse_MULF;   goto do_64F0x2;
3671       case Iop_Sub64F0x2:   op = Xsse_SUBF;   goto do_64F0x2;
3672       do_64F0x2: {
3673          HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3674          HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3675          HReg dst = newVRegV(env);
3676          REQUIRE_SSE2;
3677          addInstr(env, mk_vMOVsd_RR(argL, dst));
3678          addInstr(env, X86Instr_Sse64FLo(op, argR, dst));
3679          return dst;
3680       }
3681 
3682       case Iop_QNarrowBin32Sto16Sx8:
3683          op = Xsse_PACKSSD; arg1isEReg = True; goto do_SseReRg;
3684       case Iop_QNarrowBin16Sto8Sx16:
3685          op = Xsse_PACKSSW; arg1isEReg = True; goto do_SseReRg;
3686       case Iop_QNarrowBin16Sto8Ux16:
3687          op = Xsse_PACKUSW; arg1isEReg = True; goto do_SseReRg;
3688 
3689       case Iop_InterleaveHI8x16:
3690          op = Xsse_UNPCKHB; arg1isEReg = True; goto do_SseReRg;
3691       case Iop_InterleaveHI16x8:
3692          op = Xsse_UNPCKHW; arg1isEReg = True; goto do_SseReRg;
3693       case Iop_InterleaveHI32x4:
3694          op = Xsse_UNPCKHD; arg1isEReg = True; goto do_SseReRg;
3695       case Iop_InterleaveHI64x2:
3696          op = Xsse_UNPCKHQ; arg1isEReg = True; goto do_SseReRg;
3697 
3698       case Iop_InterleaveLO8x16:
3699          op = Xsse_UNPCKLB; arg1isEReg = True; goto do_SseReRg;
3700       case Iop_InterleaveLO16x8:
3701          op = Xsse_UNPCKLW; arg1isEReg = True; goto do_SseReRg;
3702       case Iop_InterleaveLO32x4:
3703          op = Xsse_UNPCKLD; arg1isEReg = True; goto do_SseReRg;
3704       case Iop_InterleaveLO64x2:
3705          op = Xsse_UNPCKLQ; arg1isEReg = True; goto do_SseReRg;
3706 
3707       case Iop_AndV128:    op = Xsse_AND;      goto do_SseReRg;
3708       case Iop_OrV128:     op = Xsse_OR;       goto do_SseReRg;
3709       case Iop_XorV128:    op = Xsse_XOR;      goto do_SseReRg;
3710       case Iop_Add8x16:    op = Xsse_ADD8;     goto do_SseReRg;
3711       case Iop_Add16x8:    op = Xsse_ADD16;    goto do_SseReRg;
3712       case Iop_Add32x4:    op = Xsse_ADD32;    goto do_SseReRg;
3713       case Iop_Add64x2:    op = Xsse_ADD64;    goto do_SseReRg;
3714       case Iop_QAdd8Sx16:  op = Xsse_QADD8S;   goto do_SseReRg;
3715       case Iop_QAdd16Sx8:  op = Xsse_QADD16S;  goto do_SseReRg;
3716       case Iop_QAdd8Ux16:  op = Xsse_QADD8U;   goto do_SseReRg;
3717       case Iop_QAdd16Ux8:  op = Xsse_QADD16U;  goto do_SseReRg;
3718       case Iop_Avg8Ux16:   op = Xsse_AVG8U;    goto do_SseReRg;
3719       case Iop_Avg16Ux8:   op = Xsse_AVG16U;   goto do_SseReRg;
3720       case Iop_CmpEQ8x16:  op = Xsse_CMPEQ8;   goto do_SseReRg;
3721       case Iop_CmpEQ16x8:  op = Xsse_CMPEQ16;  goto do_SseReRg;
3722       case Iop_CmpEQ32x4:  op = Xsse_CMPEQ32;  goto do_SseReRg;
3723       case Iop_CmpGT8Sx16: op = Xsse_CMPGT8S;  goto do_SseReRg;
3724       case Iop_CmpGT16Sx8: op = Xsse_CMPGT16S; goto do_SseReRg;
3725       case Iop_CmpGT32Sx4: op = Xsse_CMPGT32S; goto do_SseReRg;
3726       case Iop_Max16Sx8:   op = Xsse_MAX16S;   goto do_SseReRg;
3727       case Iop_Max8Ux16:   op = Xsse_MAX8U;    goto do_SseReRg;
3728       case Iop_Min16Sx8:   op = Xsse_MIN16S;   goto do_SseReRg;
3729       case Iop_Min8Ux16:   op = Xsse_MIN8U;    goto do_SseReRg;
3730       case Iop_MulHi16Ux8: op = Xsse_MULHI16U; goto do_SseReRg;
3731       case Iop_MulHi16Sx8: op = Xsse_MULHI16S; goto do_SseReRg;
3732       case Iop_Mul16x8:    op = Xsse_MUL16;    goto do_SseReRg;
3733       case Iop_Sub8x16:    op = Xsse_SUB8;     goto do_SseReRg;
3734       case Iop_Sub16x8:    op = Xsse_SUB16;    goto do_SseReRg;
3735       case Iop_Sub32x4:    op = Xsse_SUB32;    goto do_SseReRg;
3736       case Iop_Sub64x2:    op = Xsse_SUB64;    goto do_SseReRg;
3737       case Iop_QSub8Sx16:  op = Xsse_QSUB8S;   goto do_SseReRg;
3738       case Iop_QSub16Sx8:  op = Xsse_QSUB16S;  goto do_SseReRg;
3739       case Iop_QSub8Ux16:  op = Xsse_QSUB8U;   goto do_SseReRg;
3740       case Iop_QSub16Ux8:  op = Xsse_QSUB16U;  goto do_SseReRg;
3741       do_SseReRg: {
3742          HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1);
3743          HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2);
3744          HReg dst = newVRegV(env);
3745          if (op != Xsse_OR && op != Xsse_AND && op != Xsse_XOR)
3746             REQUIRE_SSE2;
3747          if (arg1isEReg) {
3748             addInstr(env, mk_vMOVsd_RR(arg2, dst));
3749             addInstr(env, X86Instr_SseReRg(op, arg1, dst));
3750          } else {
3751             addInstr(env, mk_vMOVsd_RR(arg1, dst));
3752             addInstr(env, X86Instr_SseReRg(op, arg2, dst));
3753          }
3754          return dst;
3755       }
3756 
3757       case Iop_ShlN16x8: op = Xsse_SHL16; goto do_SseShift;
3758       case Iop_ShlN32x4: op = Xsse_SHL32; goto do_SseShift;
3759       case Iop_ShlN64x2: op = Xsse_SHL64; goto do_SseShift;
3760       case Iop_SarN16x8: op = Xsse_SAR16; goto do_SseShift;
3761       case Iop_SarN32x4: op = Xsse_SAR32; goto do_SseShift;
3762       case Iop_ShrN16x8: op = Xsse_SHR16; goto do_SseShift;
3763       case Iop_ShrN32x4: op = Xsse_SHR32; goto do_SseShift;
3764       case Iop_ShrN64x2: op = Xsse_SHR64; goto do_SseShift;
3765       do_SseShift: {
3766          HReg      greg = iselVecExpr(env, e->Iex.Binop.arg1);
3767          X86RMI*   rmi  = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
3768          X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3769          HReg      ereg = newVRegV(env);
3770          HReg      dst  = newVRegV(env);
3771          REQUIRE_SSE2;
3772          addInstr(env, X86Instr_Push(X86RMI_Imm(0)));
3773          addInstr(env, X86Instr_Push(X86RMI_Imm(0)));
3774          addInstr(env, X86Instr_Push(X86RMI_Imm(0)));
3775          addInstr(env, X86Instr_Push(rmi));
3776          addInstr(env, X86Instr_SseLdSt(True/*load*/, ereg, esp0));
3777 	 addInstr(env, mk_vMOVsd_RR(greg, dst));
3778          addInstr(env, X86Instr_SseReRg(op, ereg, dst));
3779          add_to_esp(env, 16);
3780          return dst;
3781       }
3782 
3783       case Iop_NarrowBin32to16x8:
3784          fn = (HWord)h_generic_calc_NarrowBin32to16x8;
3785          goto do_SseAssistedBinary;
3786       case Iop_NarrowBin16to8x16:
3787          fn = (HWord)h_generic_calc_NarrowBin16to8x16;
3788          goto do_SseAssistedBinary;
3789       do_SseAssistedBinary: {
3790          /* As with the amd64 case (where this is copied from) we
3791             generate pretty bad code. */
3792          vassert(fn != 0);
3793          HReg dst = newVRegV(env);
3794          HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3795          HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3796          HReg argp = newVRegI(env);
3797          /* subl $112, %esp         -- make a space */
3798          sub_from_esp(env, 112);
3799          /* leal 48(%esp), %r_argp  -- point into it */
3800          addInstr(env, X86Instr_Lea32(X86AMode_IR(48, hregX86_ESP()),
3801                                       argp));
3802          /* andl $-16, %r_argp      -- 16-align the pointer */
3803          addInstr(env, X86Instr_Alu32R(Xalu_AND,
3804                                        X86RMI_Imm( ~(UInt)15 ),
3805                                        argp));
3806          /* Prepare 3 arg regs:
3807             leal  0(%r_argp), %eax
3808             leal 16(%r_argp), %edx
3809             leal 32(%r_argp), %ecx
3810          */
3811          addInstr(env, X86Instr_Lea32(X86AMode_IR(0, argp),
3812                                       hregX86_EAX()));
3813          addInstr(env, X86Instr_Lea32(X86AMode_IR(16, argp),
3814                                       hregX86_EDX()));
3815          addInstr(env, X86Instr_Lea32(X86AMode_IR(32, argp),
3816                                       hregX86_ECX()));
3817          /* Store the two args, at (%edx) and (%ecx):
3818             movupd  %argL, 0(%edx)
3819             movupd  %argR, 0(%ecx)
3820          */
3821          addInstr(env, X86Instr_SseLdSt(False/*!isLoad*/, argL,
3822                                         X86AMode_IR(0, hregX86_EDX())));
3823          addInstr(env, X86Instr_SseLdSt(False/*!isLoad*/, argR,
3824                                         X86AMode_IR(0, hregX86_ECX())));
3825          /* call the helper */
3826          addInstr(env, X86Instr_Call( Xcc_ALWAYS, (Addr32)fn,
3827                                       3, mk_RetLoc_simple(RLPri_None) ));
3828          /* fetch the result from memory, using %r_argp, which the
3829             register allocator will keep alive across the call. */
3830          addInstr(env, X86Instr_SseLdSt(True/*isLoad*/, dst,
3831                                         X86AMode_IR(0, argp)));
3832          /* and finally, clear the space */
3833          add_to_esp(env, 112);
3834          return dst;
3835       }
3836 
3837       default:
3838          break;
3839    } /* switch (e->Iex.Binop.op) */
3840    } /* if (e->tag == Iex_Binop) */
3841 
3842 
3843    if (e->tag == Iex_Triop) {
3844    IRTriop *triop = e->Iex.Triop.details;
3845    switch (triop->op) {
3846 
3847       case Iop_Add32Fx4: op = Xsse_ADDF; goto do_32Fx4_w_rm;
3848       case Iop_Sub32Fx4: op = Xsse_SUBF; goto do_32Fx4_w_rm;
3849       case Iop_Mul32Fx4: op = Xsse_MULF; goto do_32Fx4_w_rm;
3850       case Iop_Div32Fx4: op = Xsse_DIVF; goto do_32Fx4_w_rm;
3851       do_32Fx4_w_rm:
3852       {
3853          HReg argL = iselVecExpr(env, triop->arg2);
3854          HReg argR = iselVecExpr(env, triop->arg3);
3855          HReg dst = newVRegV(env);
3856          addInstr(env, mk_vMOVsd_RR(argL, dst));
3857          /* XXXROUNDINGFIXME */
3858          /* set roundingmode here */
3859          addInstr(env, X86Instr_Sse32Fx4(op, argR, dst));
3860          return dst;
3861       }
3862 
3863       case Iop_Add64Fx2: op = Xsse_ADDF; goto do_64Fx2_w_rm;
3864       case Iop_Sub64Fx2: op = Xsse_SUBF; goto do_64Fx2_w_rm;
3865       case Iop_Mul64Fx2: op = Xsse_MULF; goto do_64Fx2_w_rm;
3866       case Iop_Div64Fx2: op = Xsse_DIVF; goto do_64Fx2_w_rm;
3867       do_64Fx2_w_rm:
3868       {
3869          HReg argL = iselVecExpr(env, triop->arg2);
3870          HReg argR = iselVecExpr(env, triop->arg3);
3871          HReg dst = newVRegV(env);
3872          REQUIRE_SSE2;
3873          addInstr(env, mk_vMOVsd_RR(argL, dst));
3874          /* XXXROUNDINGFIXME */
3875          /* set roundingmode here */
3876          addInstr(env, X86Instr_Sse64Fx2(op, argR, dst));
3877          return dst;
3878       }
3879 
3880       default:
3881          break;
3882    } /* switch (triop->op) */
3883    } /* if (e->tag == Iex_Triop) */
3884 
3885 
3886    if (e->tag == Iex_ITE) { // VFD
3887       HReg r1  = iselVecExpr(env, e->Iex.ITE.iftrue);
3888       HReg r0  = iselVecExpr(env, e->Iex.ITE.iffalse);
3889       HReg dst = newVRegV(env);
3890       addInstr(env, mk_vMOVsd_RR(r1,dst));
3891       X86CondCode cc = iselCondCode(env, e->Iex.ITE.cond);
3892       addInstr(env, X86Instr_SseCMov(cc ^ 1, r0, dst));
3893       return dst;
3894    }
3895 
3896    vec_fail:
3897    vex_printf("iselVecExpr (hwcaps = %s): can't reduce\n",
3898               LibVEX_ppVexHwCaps(VexArchX86,env->hwcaps));
3899    ppIRExpr(e);
3900    vpanic("iselVecExpr_wrk");
3901 
3902 #  undef REQUIRE_SSE1
3903 #  undef REQUIRE_SSE2
3904 #  undef SSE2_OR_ABOVE
3905 }
3906 
3907 
3908 /*---------------------------------------------------------*/
3909 /*--- ISEL: Statements                                  ---*/
3910 /*---------------------------------------------------------*/
3911 
iselStmt(ISelEnv * env,IRStmt * stmt)3912 static void iselStmt ( ISelEnv* env, IRStmt* stmt )
3913 {
3914    if (vex_traceflags & VEX_TRACE_VCODE) {
3915       vex_printf("\n-- ");
3916       ppIRStmt(stmt);
3917       vex_printf("\n");
3918    }
3919 
3920    switch (stmt->tag) {
3921 
3922    /* --------- STORE --------- */
3923    case Ist_Store: {
3924       IRType    tya   = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
3925       IRType    tyd   = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
3926       IREndness end   = stmt->Ist.Store.end;
3927 
3928       if (tya != Ity_I32 || end != Iend_LE)
3929          goto stmt_fail;
3930 
3931       if (tyd == Ity_I32) {
3932          X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
3933          X86RI* ri = iselIntExpr_RI(env, stmt->Ist.Store.data);
3934          addInstr(env, X86Instr_Alu32M(Xalu_MOV,ri,am));
3935          return;
3936       }
3937       if (tyd == Ity_I8 || tyd == Ity_I16) {
3938          X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
3939          HReg r = iselIntExpr_R(env, stmt->Ist.Store.data);
3940          addInstr(env, X86Instr_Store( toUChar(tyd==Ity_I8 ? 1 : 2),
3941                                        r,am ));
3942          return;
3943       }
3944       if (tyd == Ity_F64) {
3945          X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
3946          HReg r = iselDblExpr(env, stmt->Ist.Store.data);
3947          addInstr(env, X86Instr_FpLdSt(False/*store*/, 8, r, am));
3948          return;
3949       }
3950       if (tyd == Ity_F32) {
3951          X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
3952          HReg r = iselFltExpr(env, stmt->Ist.Store.data);
3953          addInstr(env, X86Instr_FpLdSt(False/*store*/, 4, r, am));
3954          return;
3955       }
3956       if (tyd == Ity_I64) {
3957          HReg vHi, vLo, rA;
3958          iselInt64Expr(&vHi, &vLo, env, stmt->Ist.Store.data);
3959          rA = iselIntExpr_R(env, stmt->Ist.Store.addr);
3960          addInstr(env, X86Instr_Alu32M(
3961                           Xalu_MOV, X86RI_Reg(vLo), X86AMode_IR(0, rA)));
3962          addInstr(env, X86Instr_Alu32M(
3963                           Xalu_MOV, X86RI_Reg(vHi), X86AMode_IR(4, rA)));
3964          return;
3965       }
3966       if (tyd == Ity_V128) {
3967          X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
3968          HReg r = iselVecExpr(env, stmt->Ist.Store.data);
3969          addInstr(env, X86Instr_SseLdSt(False/*store*/, r, am));
3970          return;
3971       }
3972       break;
3973    }
3974 
3975    /* --------- PUT --------- */
3976    case Ist_Put: {
3977       IRType ty = typeOfIRExpr(env->type_env, stmt->Ist.Put.data);
3978       if (ty == Ity_I32) {
3979          /* We're going to write to memory, so compute the RHS into an
3980             X86RI. */
3981          X86RI* ri = iselIntExpr_RI(env, stmt->Ist.Put.data);
3982          addInstr(env,
3983                   X86Instr_Alu32M(
3984                      Xalu_MOV,
3985                      ri,
3986                      X86AMode_IR(stmt->Ist.Put.offset,hregX86_EBP())
3987                  ));
3988          return;
3989       }
3990       if (ty == Ity_I8 || ty == Ity_I16) {
3991          HReg r = iselIntExpr_R(env, stmt->Ist.Put.data);
3992          addInstr(env, X86Instr_Store(
3993                           toUChar(ty==Ity_I8 ? 1 : 2),
3994                           r,
3995                           X86AMode_IR(stmt->Ist.Put.offset,
3996                                       hregX86_EBP())));
3997          return;
3998       }
3999       if (ty == Ity_I64) {
4000          HReg vHi, vLo;
4001          X86AMode* am  = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP());
4002          X86AMode* am4 = advance4(am);
4003          iselInt64Expr(&vHi, &vLo, env, stmt->Ist.Put.data);
4004          addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(vLo), am ));
4005          addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(vHi), am4 ));
4006          return;
4007       }
4008       if (ty == Ity_V128) {
4009          HReg      vec = iselVecExpr(env, stmt->Ist.Put.data);
4010          X86AMode* am  = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP());
4011          addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, am));
4012          return;
4013       }
4014       if (ty == Ity_F32) {
4015          HReg f32 = iselFltExpr(env, stmt->Ist.Put.data);
4016          X86AMode* am  = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP());
4017          set_FPU_rounding_default(env); /* paranoia */
4018          addInstr(env, X86Instr_FpLdSt( False/*store*/, 4, f32, am ));
4019          return;
4020       }
4021       if (ty == Ity_F64) {
4022          HReg f64 = iselDblExpr(env, stmt->Ist.Put.data);
4023          X86AMode* am  = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP());
4024          set_FPU_rounding_default(env); /* paranoia */
4025          addInstr(env, X86Instr_FpLdSt( False/*store*/, 8, f64, am ));
4026          return;
4027       }
4028       break;
4029    }
4030 
4031    /* --------- Indexed PUT --------- */
4032    case Ist_PutI: {
4033       IRPutI *puti = stmt->Ist.PutI.details;
4034 
4035       X86AMode* am
4036          = genGuestArrayOffset(
4037               env, puti->descr,
4038                    puti->ix, puti->bias );
4039 
4040       IRType ty = typeOfIRExpr(env->type_env, puti->data);
4041       if (ty == Ity_F64) {
4042          HReg val = iselDblExpr(env, puti->data);
4043          addInstr(env, X86Instr_FpLdSt( False/*store*/, 8, val, am ));
4044          return;
4045       }
4046       if (ty == Ity_I8) {
4047          HReg r = iselIntExpr_R(env, puti->data);
4048          addInstr(env, X86Instr_Store( 1, r, am ));
4049          return;
4050       }
4051       if (ty == Ity_I32) {
4052          HReg r = iselIntExpr_R(env, puti->data);
4053          addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(r), am ));
4054          return;
4055       }
4056       if (ty == Ity_I64) {
4057          HReg rHi, rLo;
4058          X86AMode* am4 = advance4(am);
4059          iselInt64Expr(&rHi, &rLo, env, puti->data);
4060          addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(rLo), am ));
4061          addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(rHi), am4 ));
4062          return;
4063       }
4064       break;
4065    }
4066 
4067    /* --------- TMP --------- */
4068    case Ist_WrTmp: {
4069       IRTemp tmp = stmt->Ist.WrTmp.tmp;
4070       IRType ty = typeOfIRTemp(env->type_env, tmp);
4071 
4072       /* optimisation: if stmt->Ist.WrTmp.data is Add32(..,..),
4073          compute it into an AMode and then use LEA.  This usually
4074          produces fewer instructions, often because (for memcheck
4075          created IR) we get t = address-expression, (t is later used
4076          twice) and so doing this naturally turns address-expression
4077          back into an X86 amode. */
4078       if (ty == Ity_I32
4079           && stmt->Ist.WrTmp.data->tag == Iex_Binop
4080           && stmt->Ist.WrTmp.data->Iex.Binop.op == Iop_Add32) {
4081          X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.WrTmp.data);
4082          HReg dst = lookupIRTemp(env, tmp);
4083          if (am->tag == Xam_IR && am->Xam.IR.imm == 0) {
4084             /* Hmm, iselIntExpr_AMode wimped out and just computed the
4085                value into a register.  Just emit a normal reg-reg move
4086                so reg-alloc can coalesce it away in the usual way. */
4087             HReg src = am->Xam.IR.reg;
4088             addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Reg(src), dst));
4089          } else {
4090             addInstr(env, X86Instr_Lea32(am,dst));
4091          }
4092          return;
4093       }
4094 
4095       if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
4096          X86RMI* rmi = iselIntExpr_RMI(env, stmt->Ist.WrTmp.data);
4097          HReg dst = lookupIRTemp(env, tmp);
4098          addInstr(env, X86Instr_Alu32R(Xalu_MOV,rmi,dst));
4099          return;
4100       }
4101       if (ty == Ity_I64) {
4102          HReg rHi, rLo, dstHi, dstLo;
4103          iselInt64Expr(&rHi,&rLo, env, stmt->Ist.WrTmp.data);
4104          lookupIRTemp64( &dstHi, &dstLo, env, tmp);
4105          addInstr(env, mk_iMOVsd_RR(rHi,dstHi) );
4106          addInstr(env, mk_iMOVsd_RR(rLo,dstLo) );
4107          return;
4108       }
4109       if (ty == Ity_I1) {
4110          X86CondCode cond = iselCondCode(env, stmt->Ist.WrTmp.data);
4111          HReg dst = lookupIRTemp(env, tmp);
4112          addInstr(env, X86Instr_Set32(cond, dst));
4113          return;
4114       }
4115       if (ty == Ity_F64) {
4116          HReg dst = lookupIRTemp(env, tmp);
4117          HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data);
4118          addInstr(env, X86Instr_FpUnary(Xfp_MOV,src,dst));
4119          return;
4120       }
4121       if (ty == Ity_F32) {
4122          HReg dst = lookupIRTemp(env, tmp);
4123          HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data);
4124          addInstr(env, X86Instr_FpUnary(Xfp_MOV,src,dst));
4125          return;
4126       }
4127       if (ty == Ity_V128) {
4128          HReg dst = lookupIRTemp(env, tmp);
4129          HReg src = iselVecExpr(env, stmt->Ist.WrTmp.data);
4130          addInstr(env, mk_vMOVsd_RR(src,dst));
4131          return;
4132       }
4133       break;
4134    }
4135 
4136    /* --------- Call to DIRTY helper --------- */
4137    case Ist_Dirty: {
4138       IRDirty* d = stmt->Ist.Dirty.details;
4139 
4140       /* Figure out the return type, if any. */
4141       IRType retty = Ity_INVALID;
4142       if (d->tmp != IRTemp_INVALID)
4143          retty = typeOfIRTemp(env->type_env, d->tmp);
4144 
4145       Bool retty_ok = False;
4146       switch (retty) {
4147          case Ity_INVALID: /* function doesn't return anything */
4148          case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
4149          case Ity_V128:
4150             retty_ok = True; break;
4151          default:
4152             break;
4153       }
4154       if (!retty_ok)
4155          break; /* will go to stmt_fail: */
4156 
4157       /* Marshal args, do the call, and set the return value to
4158          0x555..555 if this is a conditional call that returns a value
4159          and the call is skipped. */
4160       UInt   addToSp = 0;
4161       RetLoc rloc    = mk_RetLoc_INVALID();
4162       doHelperCall( &addToSp, &rloc, env, d->guard, d->cee, retty, d->args );
4163       vassert(is_sane_RetLoc(rloc));
4164 
4165       /* Now figure out what to do with the returned value, if any. */
4166       switch (retty) {
4167          case Ity_INVALID: {
4168             /* No return value.  Nothing to do. */
4169             vassert(d->tmp == IRTemp_INVALID);
4170             vassert(rloc.pri == RLPri_None);
4171             vassert(addToSp == 0);
4172             return;
4173          }
4174          case Ity_I32: case Ity_I16: case Ity_I8: {
4175             /* The returned value is in %eax.  Park it in the register
4176                associated with tmp. */
4177             vassert(rloc.pri == RLPri_Int);
4178             vassert(addToSp == 0);
4179             HReg dst = lookupIRTemp(env, d->tmp);
4180             addInstr(env, mk_iMOVsd_RR(hregX86_EAX(),dst) );
4181             return;
4182          }
4183          case Ity_I64: {
4184             /* The returned value is in %edx:%eax.  Park it in the
4185                register-pair associated with tmp. */
4186             vassert(rloc.pri == RLPri_2Int);
4187             vassert(addToSp == 0);
4188             HReg dstHi, dstLo;
4189             lookupIRTemp64( &dstHi, &dstLo, env, d->tmp);
4190             addInstr(env, mk_iMOVsd_RR(hregX86_EDX(),dstHi) );
4191             addInstr(env, mk_iMOVsd_RR(hregX86_EAX(),dstLo) );
4192             return;
4193          }
4194          case Ity_V128: {
4195             /* The returned value is on the stack, and *retloc tells
4196                us where.  Fish it off the stack and then move the
4197                stack pointer upwards to clear it, as directed by
4198                doHelperCall. */
4199             vassert(rloc.pri == RLPri_V128SpRel);
4200             vassert(addToSp >= 16);
4201             HReg      dst = lookupIRTemp(env, d->tmp);
4202             X86AMode* am  = X86AMode_IR(rloc.spOff, hregX86_ESP());
4203             addInstr(env, X86Instr_SseLdSt( True/*load*/, dst, am ));
4204             add_to_esp(env, addToSp);
4205             return;
4206          }
4207          default:
4208             /*NOTREACHED*/
4209             vassert(0);
4210       }
4211       break;
4212    }
4213 
4214    /* --------- MEM FENCE --------- */
4215    case Ist_MBE:
4216       switch (stmt->Ist.MBE.event) {
4217          case Imbe_Fence:
4218             addInstr(env, X86Instr_MFence(env->hwcaps));
4219             return;
4220          default:
4221             break;
4222       }
4223       break;
4224 
4225    /* --------- ACAS --------- */
4226    case Ist_CAS:
4227       if (stmt->Ist.CAS.details->oldHi == IRTemp_INVALID) {
4228          /* "normal" singleton CAS */
4229          UChar  sz;
4230          IRCAS* cas = stmt->Ist.CAS.details;
4231          IRType ty  = typeOfIRExpr(env->type_env, cas->dataLo);
4232          /* get: cas->expdLo into %eax, and cas->dataLo into %ebx */
4233          X86AMode* am = iselIntExpr_AMode(env, cas->addr);
4234          HReg rDataLo = iselIntExpr_R(env, cas->dataLo);
4235          HReg rExpdLo = iselIntExpr_R(env, cas->expdLo);
4236          HReg rOldLo  = lookupIRTemp(env, cas->oldLo);
4237          vassert(cas->expdHi == NULL);
4238          vassert(cas->dataHi == NULL);
4239          addInstr(env, mk_iMOVsd_RR(rExpdLo, rOldLo));
4240          addInstr(env, mk_iMOVsd_RR(rExpdLo, hregX86_EAX()));
4241          addInstr(env, mk_iMOVsd_RR(rDataLo, hregX86_EBX()));
4242          switch (ty) {
4243             case Ity_I32: sz = 4; break;
4244             case Ity_I16: sz = 2; break;
4245             case Ity_I8:  sz = 1; break;
4246             default: goto unhandled_cas;
4247          }
4248          addInstr(env, X86Instr_ACAS(am, sz));
4249          addInstr(env,
4250                   X86Instr_CMov32(Xcc_NZ,
4251                                   X86RM_Reg(hregX86_EAX()), rOldLo));
4252          return;
4253       } else {
4254          /* double CAS */
4255          IRCAS* cas = stmt->Ist.CAS.details;
4256          IRType ty  = typeOfIRExpr(env->type_env, cas->dataLo);
4257          /* only 32-bit allowed in this case */
4258          /* get: cas->expdLo into %eax, and cas->dataLo into %ebx */
4259          /* get: cas->expdHi into %edx, and cas->dataHi into %ecx */
4260          X86AMode* am = iselIntExpr_AMode(env, cas->addr);
4261          HReg rDataHi = iselIntExpr_R(env, cas->dataHi);
4262          HReg rDataLo = iselIntExpr_R(env, cas->dataLo);
4263          HReg rExpdHi = iselIntExpr_R(env, cas->expdHi);
4264          HReg rExpdLo = iselIntExpr_R(env, cas->expdLo);
4265          HReg rOldHi  = lookupIRTemp(env, cas->oldHi);
4266          HReg rOldLo  = lookupIRTemp(env, cas->oldLo);
4267          if (ty != Ity_I32)
4268             goto unhandled_cas;
4269          addInstr(env, mk_iMOVsd_RR(rExpdHi, rOldHi));
4270          addInstr(env, mk_iMOVsd_RR(rExpdLo, rOldLo));
4271          addInstr(env, mk_iMOVsd_RR(rExpdHi, hregX86_EDX()));
4272          addInstr(env, mk_iMOVsd_RR(rExpdLo, hregX86_EAX()));
4273          addInstr(env, mk_iMOVsd_RR(rDataHi, hregX86_ECX()));
4274          addInstr(env, mk_iMOVsd_RR(rDataLo, hregX86_EBX()));
4275          addInstr(env, X86Instr_DACAS(am));
4276          addInstr(env,
4277                   X86Instr_CMov32(Xcc_NZ,
4278                                   X86RM_Reg(hregX86_EDX()), rOldHi));
4279          addInstr(env,
4280                   X86Instr_CMov32(Xcc_NZ,
4281                                   X86RM_Reg(hregX86_EAX()), rOldLo));
4282          return;
4283       }
4284       unhandled_cas:
4285       break;
4286 
4287    /* --------- INSTR MARK --------- */
4288    /* Doesn't generate any executable code ... */
4289    case Ist_IMark:
4290        return;
4291 
4292    /* --------- NO-OP --------- */
4293    /* Fairly self-explanatory, wouldn't you say? */
4294    case Ist_NoOp:
4295        return;
4296 
4297    /* --------- EXIT --------- */
4298    case Ist_Exit: {
4299       if (stmt->Ist.Exit.dst->tag != Ico_U32)
4300          vpanic("iselStmt(x86): Ist_Exit: dst is not a 32-bit value");
4301 
4302       X86CondCode cc    = iselCondCode(env, stmt->Ist.Exit.guard);
4303       X86AMode*   amEIP = X86AMode_IR(stmt->Ist.Exit.offsIP,
4304                                       hregX86_EBP());
4305 
4306       /* Case: boring transfer to known address */
4307       if (stmt->Ist.Exit.jk == Ijk_Boring) {
4308          if (env->chainingAllowed) {
4309             /* .. almost always true .. */
4310             /* Skip the event check at the dst if this is a forwards
4311                edge. */
4312             Bool toFastEP
4313                = ((Addr32)stmt->Ist.Exit.dst->Ico.U32) > env->max_ga;
4314             if (0) vex_printf("%s", toFastEP ? "Y" : ",");
4315             addInstr(env, X86Instr_XDirect(stmt->Ist.Exit.dst->Ico.U32,
4316                                            amEIP, cc, toFastEP));
4317          } else {
4318             /* .. very occasionally .. */
4319             /* We can't use chaining, so ask for an assisted transfer,
4320                as that's the only alternative that is allowable. */
4321             HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
4322             addInstr(env, X86Instr_XAssisted(r, amEIP, cc, Ijk_Boring));
4323          }
4324          return;
4325       }
4326 
4327       /* Case: assisted transfer to arbitrary address */
4328       switch (stmt->Ist.Exit.jk) {
4329          /* Keep this list in sync with that in iselNext below */
4330          case Ijk_ClientReq:
4331          case Ijk_EmWarn:
4332          case Ijk_MapFail:
4333          case Ijk_NoDecode:
4334          case Ijk_NoRedir:
4335          case Ijk_SigSEGV:
4336          case Ijk_SigTRAP:
4337          case Ijk_Sys_int128:
4338          case Ijk_Sys_int129:
4339          case Ijk_Sys_int130:
4340          case Ijk_Sys_int145:
4341          case Ijk_Sys_int210:
4342          case Ijk_Sys_syscall:
4343          case Ijk_Sys_sysenter:
4344          case Ijk_InvalICache:
4345          case Ijk_Yield:
4346          {
4347             HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
4348             addInstr(env, X86Instr_XAssisted(r, amEIP, cc, stmt->Ist.Exit.jk));
4349             return;
4350          }
4351          default:
4352             break;
4353       }
4354 
4355       /* Do we ever expect to see any other kind? */
4356       goto stmt_fail;
4357    }
4358 
4359    default: break;
4360    }
4361   stmt_fail:
4362    ppIRStmt(stmt);
4363    vpanic("iselStmt");
4364 }
4365 
4366 
4367 /*---------------------------------------------------------*/
4368 /*--- ISEL: Basic block terminators (Nexts)             ---*/
4369 /*---------------------------------------------------------*/
4370 
iselNext(ISelEnv * env,IRExpr * next,IRJumpKind jk,Int offsIP)4371 static void iselNext ( ISelEnv* env,
4372                        IRExpr* next, IRJumpKind jk, Int offsIP )
4373 {
4374    if (vex_traceflags & VEX_TRACE_VCODE) {
4375       vex_printf( "\n-- PUT(%d) = ", offsIP);
4376       ppIRExpr( next );
4377       vex_printf( "; exit-");
4378       ppIRJumpKind(jk);
4379       vex_printf( "\n");
4380    }
4381 
4382    /* Case: boring transfer to known address */
4383    if (next->tag == Iex_Const) {
4384       IRConst* cdst = next->Iex.Const.con;
4385       vassert(cdst->tag == Ico_U32);
4386       if (jk == Ijk_Boring || jk == Ijk_Call) {
4387          /* Boring transfer to known address */
4388          X86AMode* amEIP = X86AMode_IR(offsIP, hregX86_EBP());
4389          if (env->chainingAllowed) {
4390             /* .. almost always true .. */
4391             /* Skip the event check at the dst if this is a forwards
4392                edge. */
4393             Bool toFastEP
4394                = ((Addr32)cdst->Ico.U32) > env->max_ga;
4395             if (0) vex_printf("%s", toFastEP ? "X" : ".");
4396             addInstr(env, X86Instr_XDirect(cdst->Ico.U32,
4397                                            amEIP, Xcc_ALWAYS,
4398                                            toFastEP));
4399          } else {
4400             /* .. very occasionally .. */
4401             /* We can't use chaining, so ask for an assisted transfer,
4402                as that's the only alternative that is allowable. */
4403             HReg r = iselIntExpr_R(env, next);
4404             addInstr(env, X86Instr_XAssisted(r, amEIP, Xcc_ALWAYS,
4405                                              Ijk_Boring));
4406          }
4407          return;
4408       }
4409    }
4410 
4411    /* Case: call/return (==boring) transfer to any address */
4412    switch (jk) {
4413       case Ijk_Boring: case Ijk_Ret: case Ijk_Call: {
4414          HReg      r     = iselIntExpr_R(env, next);
4415          X86AMode* amEIP = X86AMode_IR(offsIP, hregX86_EBP());
4416          if (env->chainingAllowed) {
4417             addInstr(env, X86Instr_XIndir(r, amEIP, Xcc_ALWAYS));
4418          } else {
4419             addInstr(env, X86Instr_XAssisted(r, amEIP, Xcc_ALWAYS,
4420                                                Ijk_Boring));
4421          }
4422          return;
4423       }
4424       default:
4425          break;
4426    }
4427 
4428    /* Case: assisted transfer to arbitrary address */
4429    switch (jk) {
4430       /* Keep this list in sync with that for Ist_Exit above */
4431       case Ijk_ClientReq:
4432       case Ijk_EmWarn:
4433       case Ijk_MapFail:
4434       case Ijk_NoDecode:
4435       case Ijk_NoRedir:
4436       case Ijk_SigSEGV:
4437       case Ijk_SigTRAP:
4438       case Ijk_Sys_int128:
4439       case Ijk_Sys_int129:
4440       case Ijk_Sys_int130:
4441       case Ijk_Sys_int145:
4442       case Ijk_Sys_int210:
4443       case Ijk_Sys_syscall:
4444       case Ijk_Sys_sysenter:
4445       case Ijk_InvalICache:
4446       case Ijk_Yield:
4447       {
4448          HReg      r     = iselIntExpr_R(env, next);
4449          X86AMode* amEIP = X86AMode_IR(offsIP, hregX86_EBP());
4450          addInstr(env, X86Instr_XAssisted(r, amEIP, Xcc_ALWAYS, jk));
4451          return;
4452       }
4453       default:
4454          break;
4455    }
4456 
4457    vex_printf( "\n-- PUT(%d) = ", offsIP);
4458    ppIRExpr( next );
4459    vex_printf( "; exit-");
4460    ppIRJumpKind(jk);
4461    vex_printf( "\n");
4462    vassert(0); // are we expecting any other kind?
4463 }
4464 
4465 
4466 /*---------------------------------------------------------*/
4467 /*--- Insn selector top-level                           ---*/
4468 /*---------------------------------------------------------*/
4469 
4470 /* Translate an entire SB to x86 code. */
4471 
iselSB_X86(const IRSB * bb,VexArch arch_host,const VexArchInfo * archinfo_host,const VexAbiInfo * vbi,Int offs_Host_EvC_Counter,Int offs_Host_EvC_FailAddr,Bool chainingAllowed,Bool addProfInc,Addr max_ga)4472 HInstrArray* iselSB_X86 ( const IRSB* bb,
4473                           VexArch      arch_host,
4474                           const VexArchInfo* archinfo_host,
4475                           const VexAbiInfo*  vbi/*UNUSED*/,
4476                           Int offs_Host_EvC_Counter,
4477                           Int offs_Host_EvC_FailAddr,
4478                           Bool chainingAllowed,
4479                           Bool addProfInc,
4480                           Addr max_ga )
4481 {
4482    Int      i, j;
4483    HReg     hreg, hregHI;
4484    ISelEnv* env;
4485    UInt     hwcaps_host = archinfo_host->hwcaps;
4486    X86AMode *amCounter, *amFailAddr;
4487 
4488    /* sanity ... */
4489    vassert(arch_host == VexArchX86);
4490    vassert(0 == (hwcaps_host
4491                  & ~(VEX_HWCAPS_X86_MMXEXT
4492                      | VEX_HWCAPS_X86_SSE1
4493                      | VEX_HWCAPS_X86_SSE2
4494                      | VEX_HWCAPS_X86_SSE3
4495                      | VEX_HWCAPS_X86_LZCNT)));
4496 
4497    /* Check that the host's endianness is as expected. */
4498    vassert(archinfo_host->endness == VexEndnessLE);
4499 
4500    /* Make up an initial environment to use. */
4501    env = LibVEX_Alloc_inline(sizeof(ISelEnv));
4502    env->vreg_ctr = 0;
4503 
4504    /* Set up output code array. */
4505    env->code = newHInstrArray();
4506 
4507    /* Copy BB's type env. */
4508    env->type_env = bb->tyenv;
4509 
4510    /* Make up an IRTemp -> virtual HReg mapping.  This doesn't
4511       change as we go along. */
4512    env->n_vregmap = bb->tyenv->types_used;
4513    env->vregmap   = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
4514    env->vregmapHI = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
4515 
4516    /* and finally ... */
4517    env->chainingAllowed = chainingAllowed;
4518    env->hwcaps          = hwcaps_host;
4519    env->max_ga          = max_ga;
4520 
4521    /* For each IR temporary, allocate a suitably-kinded virtual
4522       register. */
4523    j = 0;
4524    for (i = 0; i < env->n_vregmap; i++) {
4525       hregHI = hreg = INVALID_HREG;
4526       switch (bb->tyenv->types[i]) {
4527          case Ity_I1:
4528          case Ity_I8:
4529          case Ity_I16:
4530          case Ity_I32:  hreg   = mkHReg(True, HRcInt32,  0, j++); break;
4531          case Ity_I64:  hreg   = mkHReg(True, HRcInt32,  0, j++);
4532                         hregHI = mkHReg(True, HRcInt32,  0, j++); break;
4533          case Ity_F32:
4534          case Ity_F64:  hreg   = mkHReg(True, HRcFlt64,  0, j++); break;
4535          case Ity_V128: hreg   = mkHReg(True, HRcVec128, 0, j++); break;
4536          default: ppIRType(bb->tyenv->types[i]);
4537                   vpanic("iselBB: IRTemp type");
4538       }
4539       env->vregmap[i]   = hreg;
4540       env->vregmapHI[i] = hregHI;
4541    }
4542    env->vreg_ctr = j;
4543 
4544    /* The very first instruction must be an event check. */
4545    amCounter  = X86AMode_IR(offs_Host_EvC_Counter,  hregX86_EBP());
4546    amFailAddr = X86AMode_IR(offs_Host_EvC_FailAddr, hregX86_EBP());
4547    addInstr(env, X86Instr_EvCheck(amCounter, amFailAddr));
4548 
4549    /* Possibly a block counter increment (for profiling).  At this
4550       point we don't know the address of the counter, so just pretend
4551       it is zero.  It will have to be patched later, but before this
4552       translation is used, by a call to LibVEX_patchProfCtr. */
4553    if (addProfInc) {
4554       addInstr(env, X86Instr_ProfInc());
4555    }
4556 
4557    /* Ok, finally we can iterate over the statements. */
4558    for (i = 0; i < bb->stmts_used; i++)
4559       iselStmt(env, bb->stmts[i]);
4560 
4561    iselNext(env, bb->next, bb->jumpkind, bb->offsIP);
4562 
4563    /* record the number of vregs we used. */
4564    env->code->n_vregs = env->vreg_ctr;
4565    return env->code;
4566 }
4567 
4568 
4569 /*---------------------------------------------------------------*/
4570 /*--- end                                     host_x86_isel.c ---*/
4571 /*---------------------------------------------------------------*/
4572