1 
2 /*---------------------------------------------------------------*/
3 /*--- begin                                   host_x86_isel.c ---*/
4 /*---------------------------------------------------------------*/
5 
6 /*
7    This file is part of Valgrind, a dynamic binary instrumentation
8    framework.
9 
10    Copyright (C) 2004-2015 OpenWorks LLP
11       info@open-works.net
12 
13    This program is free software; you can redistribute it and/or
14    modify it under the terms of the GNU General Public License as
15    published by the Free Software Foundation; either version 2 of the
16    License, or (at your option) any later version.
17 
18    This program is distributed in the hope that it will be useful, but
19    WITHOUT ANY WARRANTY; without even the implied warranty of
20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21    General Public License for more details.
22 
23    You should have received a copy of the GNU General Public License
24    along with this program; if not, write to the Free Software
25    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26    02110-1301, USA.
27 
28    The GNU General Public License is contained in the file COPYING.
29 
30    Neither the names of the U.S. Department of Energy nor the
31    University of California nor the names of its contributors may be
32    used to endorse or promote products derived from this software
33    without prior written permission.
34 */
35 
36 #include "libvex_basictypes.h"
37 #include "libvex_ir.h"
38 #include "libvex.h"
39 
40 #include "ir_match.h"
41 #include "main_util.h"
42 #include "main_globals.h"
43 #include "host_generic_regs.h"
44 #include "host_generic_simd64.h"
45 #include "host_generic_simd128.h"
46 #include "host_x86_defs.h"
47 
48 /* TODO 21 Apr 2005:
49 
50    -- (Really an assembler issue) don't emit CMov32 as a cmov
51       insn, since that's expensive on P4 and conditional branch
52       is cheaper if (as we expect) the condition is highly predictable
53 
54    -- preserve xmm registers across function calls (by declaring them
55       as trashed by call insns)
56 
57    -- preserve x87 ST stack discipline across function calls.  Sigh.
58 
59    -- Check doHelperCall: if a call is conditional, we cannot safely
60       compute any regparm args directly to registers.  Hence, the
61       fast-regparm marshalling should be restricted to unconditional
62       calls only.
63 */
64 
65 /*---------------------------------------------------------*/
66 /*--- x87 control word stuff                            ---*/
67 /*---------------------------------------------------------*/
68 
69 /* Vex-generated code expects to run with the FPU set as follows: all
70    exceptions masked, round-to-nearest, precision = 53 bits.  This
71    corresponds to a FPU control word value of 0x027F.
72 
73    Similarly the SSE control word (%mxcsr) should be 0x1F80.
74 
75    %fpucw and %mxcsr should have these values on entry to
76    Vex-generated code, and should those values should be
77    unchanged at exit.
78 */
79 
80 #define DEFAULT_FPUCW 0x027F
81 
82 /* debugging only, do not use */
83 /* define DEFAULT_FPUCW 0x037F */
84 
85 
86 /*---------------------------------------------------------*/
87 /*--- misc helpers                                      ---*/
88 /*---------------------------------------------------------*/
89 
90 /* These are duplicated in guest-x86/toIR.c */
unop(IROp op,IRExpr * a)91 static IRExpr* unop ( IROp op, IRExpr* a )
92 {
93    return IRExpr_Unop(op, a);
94 }
95 
binop(IROp op,IRExpr * a1,IRExpr * a2)96 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
97 {
98    return IRExpr_Binop(op, a1, a2);
99 }
100 
bind(Int binder)101 static IRExpr* bind ( Int binder )
102 {
103    return IRExpr_Binder(binder);
104 }
105 
isZeroU8(IRExpr * e)106 static Bool isZeroU8 ( IRExpr* e )
107 {
108    return e->tag == Iex_Const
109           && e->Iex.Const.con->tag == Ico_U8
110           && e->Iex.Const.con->Ico.U8 == 0;
111 }
112 
isZeroU32(IRExpr * e)113 static Bool isZeroU32 ( IRExpr* e )
114 {
115    return e->tag == Iex_Const
116           && e->Iex.Const.con->tag == Ico_U32
117           && e->Iex.Const.con->Ico.U32 == 0;
118 }
119 
120 //static Bool isZeroU64 ( IRExpr* e )
121 //{
122 //   return e->tag == Iex_Const
123 //          && e->Iex.Const.con->tag == Ico_U64
124 //          && e->Iex.Const.con->Ico.U64 == 0ULL;
125 //}
126 
127 
128 /*---------------------------------------------------------*/
129 /*--- ISelEnv                                           ---*/
130 /*---------------------------------------------------------*/
131 
132 /* This carries around:
133 
134    - A mapping from IRTemp to IRType, giving the type of any IRTemp we
135      might encounter.  This is computed before insn selection starts,
136      and does not change.
137 
138    - A mapping from IRTemp to HReg.  This tells the insn selector
139      which virtual register(s) are associated with each IRTemp
140      temporary.  This is computed before insn selection starts, and
141      does not change.  We expect this mapping to map precisely the
142      same set of IRTemps as the type mapping does.
143 
144         - vregmap   holds the primary register for the IRTemp.
145         - vregmapHI is only used for 64-bit integer-typed
146              IRTemps.  It holds the identity of a second
147              32-bit virtual HReg, which holds the high half
148              of the value.
149 
150    - The code array, that is, the insns selected so far.
151 
152    - A counter, for generating new virtual registers.
153 
154    - The host subarchitecture we are selecting insns for.
155      This is set at the start and does not change.
156 
157    - A Bool for indicating whether we may generate chain-me
158      instructions for control flow transfers, or whether we must use
159      XAssisted.
160 
161    - The maximum guest address of any guest insn in this block.
162      Actually, the address of the highest-addressed byte from any insn
163      in this block.  Is set at the start and does not change.  This is
164      used for detecting jumps which are definitely forward-edges from
165      this block, and therefore can be made (chained) to the fast entry
166      point of the destination, thereby avoiding the destination's
167      event check.
168 
169    Note, this is all (well, mostly) host-independent.
170 */
171 
172 typedef
173    struct {
174       /* Constant -- are set at the start and do not change. */
175       IRTypeEnv*   type_env;
176 
177       HReg*        vregmap;
178       HReg*        vregmapHI;
179       Int          n_vregmap;
180 
181       UInt         hwcaps;
182 
183       Bool         chainingAllowed;
184       Addr32       max_ga;
185 
186       /* These are modified as we go along. */
187       HInstrArray* code;
188       Int          vreg_ctr;
189    }
190    ISelEnv;
191 
192 
lookupIRTemp(ISelEnv * env,IRTemp tmp)193 static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp )
194 {
195    vassert(tmp >= 0);
196    vassert(tmp < env->n_vregmap);
197    return env->vregmap[tmp];
198 }
199 
lookupIRTemp64(HReg * vrHI,HReg * vrLO,ISelEnv * env,IRTemp tmp)200 static void lookupIRTemp64 ( HReg* vrHI, HReg* vrLO, ISelEnv* env, IRTemp tmp )
201 {
202    vassert(tmp >= 0);
203    vassert(tmp < env->n_vregmap);
204    vassert(! hregIsInvalid(env->vregmapHI[tmp]));
205    *vrLO = env->vregmap[tmp];
206    *vrHI = env->vregmapHI[tmp];
207 }
208 
addInstr(ISelEnv * env,X86Instr * instr)209 static void addInstr ( ISelEnv* env, X86Instr* instr )
210 {
211    addHInstr(env->code, instr);
212    if (vex_traceflags & VEX_TRACE_VCODE) {
213       ppX86Instr(instr, False);
214       vex_printf("\n");
215    }
216 }
217 
newVRegI(ISelEnv * env)218 static HReg newVRegI ( ISelEnv* env )
219 {
220    HReg reg = mkHReg(True/*virtual reg*/, HRcInt32, 0/*enc*/, env->vreg_ctr);
221    env->vreg_ctr++;
222    return reg;
223 }
224 
newVRegF(ISelEnv * env)225 static HReg newVRegF ( ISelEnv* env )
226 {
227    HReg reg = mkHReg(True/*virtual reg*/, HRcFlt64, 0/*enc*/, env->vreg_ctr);
228    env->vreg_ctr++;
229    return reg;
230 }
231 
newVRegV(ISelEnv * env)232 static HReg newVRegV ( ISelEnv* env )
233 {
234    HReg reg = mkHReg(True/*virtual reg*/, HRcVec128, 0/*enc*/, env->vreg_ctr);
235    env->vreg_ctr++;
236    return reg;
237 }
238 
239 
240 /*---------------------------------------------------------*/
241 /*--- ISEL: Forward declarations                        ---*/
242 /*---------------------------------------------------------*/
243 
244 /* These are organised as iselXXX and iselXXX_wrk pairs.  The
245    iselXXX_wrk do the real work, but are not to be called directly.
246    For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
247    checks that all returned registers are virtual.  You should not
248    call the _wrk version directly.
249 */
250 static X86RMI*     iselIntExpr_RMI_wrk ( ISelEnv* env, const IRExpr* e );
251 static X86RMI*     iselIntExpr_RMI     ( ISelEnv* env, const IRExpr* e );
252 
253 static X86RI*      iselIntExpr_RI_wrk ( ISelEnv* env, const IRExpr* e );
254 static X86RI*      iselIntExpr_RI     ( ISelEnv* env, const IRExpr* e );
255 
256 static X86RM*      iselIntExpr_RM_wrk ( ISelEnv* env, const IRExpr* e );
257 static X86RM*      iselIntExpr_RM     ( ISelEnv* env, const IRExpr* e );
258 
259 static HReg        iselIntExpr_R_wrk ( ISelEnv* env, const IRExpr* e );
260 static HReg        iselIntExpr_R     ( ISelEnv* env, const IRExpr* e );
261 
262 static X86AMode*   iselIntExpr_AMode_wrk ( ISelEnv* env, const IRExpr* e );
263 static X86AMode*   iselIntExpr_AMode     ( ISelEnv* env, const IRExpr* e );
264 
265 static void        iselInt64Expr_wrk ( HReg* rHi, HReg* rLo,
266                                        ISelEnv* env, const IRExpr* e );
267 static void        iselInt64Expr     ( HReg* rHi, HReg* rLo,
268                                        ISelEnv* env, const IRExpr* e );
269 
270 static X86CondCode iselCondCode_wrk ( ISelEnv* env, const IRExpr* e );
271 static X86CondCode iselCondCode     ( ISelEnv* env, const IRExpr* e );
272 
273 static HReg        iselDblExpr_wrk ( ISelEnv* env, const IRExpr* e );
274 static HReg        iselDblExpr     ( ISelEnv* env, const IRExpr* e );
275 
276 static HReg        iselFltExpr_wrk ( ISelEnv* env, const IRExpr* e );
277 static HReg        iselFltExpr     ( ISelEnv* env, const IRExpr* e );
278 
279 static HReg        iselVecExpr_wrk ( ISelEnv* env, const IRExpr* e );
280 static HReg        iselVecExpr     ( ISelEnv* env, const IRExpr* e );
281 
282 
283 /*---------------------------------------------------------*/
284 /*--- ISEL: Misc helpers                                ---*/
285 /*---------------------------------------------------------*/
286 
287 /* Make a int reg-reg move. */
288 
mk_iMOVsd_RR(HReg src,HReg dst)289 static X86Instr* mk_iMOVsd_RR ( HReg src, HReg dst )
290 {
291    vassert(hregClass(src) == HRcInt32);
292    vassert(hregClass(dst) == HRcInt32);
293    return X86Instr_Alu32R(Xalu_MOV, X86RMI_Reg(src), dst);
294 }
295 
296 
297 /* Make a vector reg-reg move. */
298 
mk_vMOVsd_RR(HReg src,HReg dst)299 static X86Instr* mk_vMOVsd_RR ( HReg src, HReg dst )
300 {
301    vassert(hregClass(src) == HRcVec128);
302    vassert(hregClass(dst) == HRcVec128);
303    return X86Instr_SseReRg(Xsse_MOV, src, dst);
304 }
305 
306 /* Advance/retreat %esp by n. */
307 
add_to_esp(ISelEnv * env,Int n)308 static void add_to_esp ( ISelEnv* env, Int n )
309 {
310    vassert(n > 0 && n < 256 && (n%4) == 0);
311    addInstr(env,
312             X86Instr_Alu32R(Xalu_ADD, X86RMI_Imm(n), hregX86_ESP()));
313 }
314 
sub_from_esp(ISelEnv * env,Int n)315 static void sub_from_esp ( ISelEnv* env, Int n )
316 {
317    vassert(n > 0 && n < 256 && (n%4) == 0);
318    addInstr(env,
319             X86Instr_Alu32R(Xalu_SUB, X86RMI_Imm(n), hregX86_ESP()));
320 }
321 
322 
323 /* Given an amode, return one which references 4 bytes further
324    along. */
325 
advance4(X86AMode * am)326 static X86AMode* advance4 ( X86AMode* am )
327 {
328    X86AMode* am4 = dopyX86AMode(am);
329    switch (am4->tag) {
330       case Xam_IRRS:
331          am4->Xam.IRRS.imm += 4; break;
332       case Xam_IR:
333          am4->Xam.IR.imm += 4; break;
334       default:
335          vpanic("advance4(x86,host)");
336    }
337    return am4;
338 }
339 
340 
341 /* Push an arg onto the host stack, in preparation for a call to a
342    helper function of some kind.  Returns the number of 32-bit words
343    pushed.  If we encounter an IRExpr_VECRET() then we expect that
344    r_vecRetAddr will be a valid register, that holds the relevant
345    address.
346 */
pushArg(ISelEnv * env,IRExpr * arg,HReg r_vecRetAddr)347 static Int pushArg ( ISelEnv* env, IRExpr* arg, HReg r_vecRetAddr )
348 {
349    if (UNLIKELY(arg->tag == Iex_VECRET)) {
350       vassert(0); //ATC
351       vassert(!hregIsInvalid(r_vecRetAddr));
352       addInstr(env, X86Instr_Push(X86RMI_Reg(r_vecRetAddr)));
353       return 1;
354    }
355    if (UNLIKELY(arg->tag == Iex_GSPTR)) {
356       addInstr(env, X86Instr_Push(X86RMI_Reg(hregX86_EBP())));
357       return 1;
358    }
359    /* Else it's a "normal" expression. */
360    IRType arg_ty = typeOfIRExpr(env->type_env, arg);
361    if (arg_ty == Ity_I32) {
362       addInstr(env, X86Instr_Push(iselIntExpr_RMI(env, arg)));
363       return 1;
364    } else
365    if (arg_ty == Ity_I64) {
366       HReg rHi, rLo;
367       iselInt64Expr(&rHi, &rLo, env, arg);
368       addInstr(env, X86Instr_Push(X86RMI_Reg(rHi)));
369       addInstr(env, X86Instr_Push(X86RMI_Reg(rLo)));
370       return 2;
371    }
372    ppIRExpr(arg);
373    vpanic("pushArg(x86): can't handle arg of this type");
374 }
375 
376 
377 /* Complete the call to a helper function, by calling the
378    helper and clearing the args off the stack. */
379 
380 static
callHelperAndClearArgs(ISelEnv * env,X86CondCode cc,IRCallee * cee,Int n_arg_ws,RetLoc rloc)381 void callHelperAndClearArgs ( ISelEnv* env, X86CondCode cc,
382                               IRCallee* cee, Int n_arg_ws,
383                               RetLoc rloc )
384 {
385    /* Complication.  Need to decide which reg to use as the fn address
386       pointer, in a way that doesn't trash regparm-passed
387       parameters. */
388    vassert(sizeof(void*) == 4);
389 
390    addInstr(env, X86Instr_Call( cc, (Addr)cee->addr,
391                                 cee->regparms, rloc));
392    if (n_arg_ws > 0)
393       add_to_esp(env, 4*n_arg_ws);
394 }
395 
396 
397 /* Used only in doHelperCall.  See big comment in doHelperCall re
398    handling of regparm args.  This function figures out whether
399    evaluation of an expression might require use of a fixed register.
400    If in doubt return True (safe but suboptimal).
401 */
402 static
mightRequireFixedRegs(IRExpr * e)403 Bool mightRequireFixedRegs ( IRExpr* e )
404 {
405    if (UNLIKELY(is_IRExpr_VECRET_or_GSPTR(e))) {
406       // These are always "safe" -- either a copy of %esp in some
407       // arbitrary vreg, or a copy of %ebp, respectively.
408       return False;
409    }
410    /* Else it's a "normal" expression. */
411    switch (e->tag) {
412       case Iex_RdTmp: case Iex_Const: case Iex_Get:
413          return False;
414       default:
415          return True;
416    }
417 }
418 
419 
420 /* Do a complete function call.  |guard| is a Ity_Bit expression
421    indicating whether or not the call happens.  If guard==NULL, the
422    call is unconditional.  |retloc| is set to indicate where the
423    return value is after the call.  The caller (of this fn) must
424    generate code to add |stackAdjustAfterCall| to the stack pointer
425    after the call is done. */
426 
427 static
doHelperCall(UInt * stackAdjustAfterCall,RetLoc * retloc,ISelEnv * env,IRExpr * guard,IRCallee * cee,IRType retTy,IRExpr ** args)428 void doHelperCall ( /*OUT*/UInt*   stackAdjustAfterCall,
429                     /*OUT*/RetLoc* retloc,
430                     ISelEnv* env,
431                     IRExpr* guard,
432                     IRCallee* cee, IRType retTy, IRExpr** args )
433 {
434    X86CondCode cc;
435    HReg        argregs[3];
436    HReg        tmpregs[3];
437    Bool        danger;
438    Int         not_done_yet, n_args, n_arg_ws, stack_limit,
439                i, argreg, argregX;
440 
441    /* Set default returns.  We'll update them later if needed. */
442    *stackAdjustAfterCall = 0;
443    *retloc               = mk_RetLoc_INVALID();
444 
445    /* These are used for cross-checking that IR-level constraints on
446       the use of Iex_VECRET and Iex_GSPTR are observed. */
447    UInt nVECRETs = 0;
448    UInt nGSPTRs  = 0;
449 
450    /* Marshal args for a call, do the call, and clear the stack.
451       Complexities to consider:
452 
453       * The return type can be I{64,32,16,8} or V128.  In the V128
454         case, it is expected that |args| will contain the special
455         node IRExpr_VECRET(), in which case this routine generates
456         code to allocate space on the stack for the vector return
457         value.  Since we are not passing any scalars on the stack, it
458         is enough to preallocate the return space before marshalling
459         any arguments, in this case.
460 
461         |args| may also contain IRExpr_GSPTR(), in which case the
462         value in %ebp is passed as the corresponding argument.
463 
464       * If the callee claims regparmness of 1, 2 or 3, we must pass the
465         first 1, 2 or 3 args in registers (EAX, EDX, and ECX
466         respectively).  To keep things relatively simple, only args of
467         type I32 may be passed as regparms -- just bomb out if anything
468         else turns up.  Clearly this depends on the front ends not
469         trying to pass any other types as regparms.
470    */
471 
472    /* 16 Nov 2004: the regparm handling is complicated by the
473       following problem.
474 
475       Consider a call two a function with two regparm parameters:
476       f(e1,e2).  We need to compute e1 into %eax and e2 into %edx.
477       Suppose code is first generated to compute e1 into %eax.  Then,
478       code is generated to compute e2 into %edx.  Unfortunately, if
479       the latter code sequence uses %eax, it will trash the value of
480       e1 computed by the former sequence.  This could happen if (for
481       example) e2 itself involved a function call.  In the code below,
482       args are evaluated right-to-left, not left-to-right, but the
483       principle and the problem are the same.
484 
485       One solution is to compute all regparm-bound args into vregs
486       first, and once they are all done, move them to the relevant
487       real regs.  This always gives correct code, but it also gives
488       a bunch of vreg-to-rreg moves which are usually redundant but
489       are hard for the register allocator to get rid of.
490 
491       A compromise is to first examine all regparm'd argument
492       expressions.  If they are all so simple that it is clear
493       they will be evaluated without use of any fixed registers,
494       use the old compute-directly-to-fixed-target scheme.  If not,
495       be safe and use the via-vregs scheme.
496 
497       Note this requires being able to examine an expression and
498       determine whether or not evaluation of it might use a fixed
499       register.  That requires knowledge of how the rest of this
500       insn selector works.  Currently just the following 3 are
501       regarded as safe -- hopefully they cover the majority of
502       arguments in practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
503    */
504    vassert(cee->regparms >= 0 && cee->regparms <= 3);
505 
506    /* Count the number of args and also the VECRETs */
507    n_args = n_arg_ws = 0;
508    while (args[n_args]) {
509       IRExpr* arg = args[n_args];
510       n_args++;
511       if (UNLIKELY(arg->tag == Iex_VECRET)) {
512          nVECRETs++;
513       } else if (UNLIKELY(arg->tag == Iex_GSPTR)) {
514          nGSPTRs++;
515       }
516    }
517 
518    /* If this fails, the IR is ill-formed */
519    vassert(nGSPTRs == 0 || nGSPTRs == 1);
520 
521    /* If we have a VECRET, allocate space on the stack for the return
522       value, and record the stack pointer after that. */
523    HReg r_vecRetAddr = INVALID_HREG;
524    if (nVECRETs == 1) {
525       vassert(retTy == Ity_V128 || retTy == Ity_V256);
526       vassert(retTy != Ity_V256); // we don't handle that yet (if ever)
527       r_vecRetAddr = newVRegI(env);
528       sub_from_esp(env, 16);
529       addInstr(env, mk_iMOVsd_RR( hregX86_ESP(), r_vecRetAddr ));
530    } else {
531       // If either of these fail, the IR is ill-formed
532       vassert(retTy != Ity_V128 && retTy != Ity_V256);
533       vassert(nVECRETs == 0);
534    }
535 
536    not_done_yet = n_args;
537 
538    stack_limit = cee->regparms;
539 
540    /* ------ BEGIN marshall all arguments ------ */
541 
542    /* Push (R to L) the stack-passed args, [n_args-1 .. stack_limit] */
543    for (i = n_args-1; i >= stack_limit; i--) {
544       n_arg_ws += pushArg(env, args[i], r_vecRetAddr);
545       not_done_yet--;
546    }
547 
548    /* args [stack_limit-1 .. 0] and possibly %ebp are to be passed in
549       registers. */
550 
551    if (cee->regparms > 0) {
552 
553       /* ------ BEGIN deal with regparms ------ */
554 
555       /* deal with regparms, not forgetting %ebp if needed. */
556       argregs[0] = hregX86_EAX();
557       argregs[1] = hregX86_EDX();
558       argregs[2] = hregX86_ECX();
559       tmpregs[0] = tmpregs[1] = tmpregs[2] = INVALID_HREG;
560 
561       argreg = cee->regparms;
562 
563       /* In keeping with big comment above, detect potential danger
564          and use the via-vregs scheme if needed. */
565       danger = False;
566       for (i = stack_limit-1; i >= 0; i--) {
567          if (mightRequireFixedRegs(args[i])) {
568             danger = True;
569             break;
570          }
571       }
572 
573       if (danger) {
574 
575          /* Move via temporaries */
576          argregX = argreg;
577          for (i = stack_limit-1; i >= 0; i--) {
578 
579             if (0) {
580                vex_printf("x86 host: register param is complex: ");
581                ppIRExpr(args[i]);
582                vex_printf("\n");
583             }
584 
585             IRExpr* arg = args[i];
586             argreg--;
587             vassert(argreg >= 0);
588             if (UNLIKELY(arg->tag == Iex_VECRET)) {
589                vassert(0); //ATC
590             }
591             else if (UNLIKELY(arg->tag == Iex_GSPTR)) {
592                vassert(0); //ATC
593             } else {
594                vassert(typeOfIRExpr(env->type_env, arg) == Ity_I32);
595                tmpregs[argreg] = iselIntExpr_R(env, arg);
596             }
597             not_done_yet--;
598          }
599          for (i = stack_limit-1; i >= 0; i--) {
600             argregX--;
601             vassert(argregX >= 0);
602             addInstr( env, mk_iMOVsd_RR( tmpregs[argregX], argregs[argregX] ) );
603          }
604 
605       } else {
606          /* It's safe to compute all regparm args directly into their
607             target registers. */
608          for (i = stack_limit-1; i >= 0; i--) {
609             IRExpr* arg = args[i];
610             argreg--;
611             vassert(argreg >= 0);
612             if (UNLIKELY(arg->tag == Iex_VECRET)) {
613                vassert(!hregIsInvalid(r_vecRetAddr));
614                addInstr(env, X86Instr_Alu32R(Xalu_MOV,
615                                              X86RMI_Reg(r_vecRetAddr),
616                                              argregs[argreg]));
617             }
618             else if (UNLIKELY(arg->tag == Iex_GSPTR)) {
619                vassert(0); //ATC
620             } else {
621                vassert(typeOfIRExpr(env->type_env, arg) == Ity_I32);
622                addInstr(env, X86Instr_Alu32R(Xalu_MOV,
623                                              iselIntExpr_RMI(env, arg),
624                                              argregs[argreg]));
625             }
626             not_done_yet--;
627          }
628 
629       }
630 
631       /* ------ END deal with regparms ------ */
632 
633    }
634 
635    vassert(not_done_yet == 0);
636 
637    /* ------ END marshall all arguments ------ */
638 
639    /* Now we can compute the condition.  We can't do it earlier
640       because the argument computations could trash the condition
641       codes.  Be a bit clever to handle the common case where the
642       guard is 1:Bit. */
643    cc = Xcc_ALWAYS;
644    if (guard) {
645       if (guard->tag == Iex_Const
646           && guard->Iex.Const.con->tag == Ico_U1
647           && guard->Iex.Const.con->Ico.U1 == True) {
648          /* unconditional -- do nothing */
649       } else {
650          cc = iselCondCode( env, guard );
651       }
652    }
653 
654    /* Do final checks, set the return values, and generate the call
655       instruction proper. */
656    vassert(*stackAdjustAfterCall == 0);
657    vassert(is_RetLoc_INVALID(*retloc));
658    switch (retTy) {
659          case Ity_INVALID:
660             /* Function doesn't return a value. */
661             *retloc = mk_RetLoc_simple(RLPri_None);
662             break;
663          case Ity_I64:
664             *retloc = mk_RetLoc_simple(RLPri_2Int);
665             break;
666          case Ity_I32: case Ity_I16: case Ity_I8:
667             *retloc = mk_RetLoc_simple(RLPri_Int);
668             break;
669          case Ity_V128:
670             *retloc = mk_RetLoc_spRel(RLPri_V128SpRel, 0);
671             *stackAdjustAfterCall = 16;
672             break;
673          case Ity_V256:
674             vassert(0); // ATC
675             *retloc = mk_RetLoc_spRel(RLPri_V256SpRel, 0);
676             *stackAdjustAfterCall = 32;
677             break;
678          default:
679             /* IR can denote other possible return types, but we don't
680                handle those here. */
681            vassert(0);
682    }
683 
684    /* Finally, generate the call itself.  This needs the *retloc value
685       set in the switch above, which is why it's at the end. */
686    callHelperAndClearArgs( env, cc, cee, n_arg_ws, *retloc );
687 }
688 
689 
690 /* Given a guest-state array descriptor, an index expression and a
691    bias, generate an X86AMode holding the relevant guest state
692    offset. */
693 
694 static
genGuestArrayOffset(ISelEnv * env,IRRegArray * descr,IRExpr * off,Int bias)695 X86AMode* genGuestArrayOffset ( ISelEnv* env, IRRegArray* descr,
696                                 IRExpr* off, Int bias )
697 {
698    HReg tmp, roff;
699    Int  elemSz = sizeofIRType(descr->elemTy);
700    Int  nElems = descr->nElems;
701    Int  shift  = 0;
702 
703    /* throw out any cases not generated by an x86 front end.  In
704       theory there might be a day where we need to handle them -- if
705       we ever run non-x86-guest on x86 host. */
706 
707    if (nElems != 8)
708       vpanic("genGuestArrayOffset(x86 host)(1)");
709 
710    switch (elemSz) {
711       case 1:  shift = 0; break;
712       case 4:  shift = 2; break;
713       case 8:  shift = 3; break;
714       default: vpanic("genGuestArrayOffset(x86 host)(2)");
715    }
716 
717    /* Compute off into a reg, %off.  Then return:
718 
719          movl %off, %tmp
720          addl $bias, %tmp  (if bias != 0)
721          andl %tmp, 7
722          ... base(%ebp, %tmp, shift) ...
723    */
724    tmp  = newVRegI(env);
725    roff = iselIntExpr_R(env, off);
726    addInstr(env, mk_iMOVsd_RR(roff, tmp));
727    if (bias != 0) {
728       addInstr(env,
729                X86Instr_Alu32R(Xalu_ADD, X86RMI_Imm(bias), tmp));
730    }
731    addInstr(env,
732             X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(7), tmp));
733    return
734       X86AMode_IRRS( descr->base, hregX86_EBP(), tmp, shift );
735 }
736 
737 
738 /* Mess with the FPU's rounding mode: set to the default rounding mode
739    (DEFAULT_FPUCW). */
740 static
set_FPU_rounding_default(ISelEnv * env)741 void set_FPU_rounding_default ( ISelEnv* env )
742 {
743    /* pushl $DEFAULT_FPUCW
744       fldcw 0(%esp)
745       addl $4, %esp
746    */
747    X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
748    addInstr(env, X86Instr_Push(X86RMI_Imm(DEFAULT_FPUCW)));
749    addInstr(env, X86Instr_FpLdCW(zero_esp));
750    add_to_esp(env, 4);
751 }
752 
753 
754 /* Mess with the FPU's rounding mode: 'mode' is an I32-typed
755    expression denoting a value in the range 0 .. 3, indicating a round
756    mode encoded as per type IRRoundingMode.  Set the x87 FPU to have
757    the same rounding.
758 */
759 static
set_FPU_rounding_mode(ISelEnv * env,IRExpr * mode)760 void set_FPU_rounding_mode ( ISelEnv* env, IRExpr* mode )
761 {
762    HReg rrm  = iselIntExpr_R(env, mode);
763    HReg rrm2 = newVRegI(env);
764    X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
765 
766    /* movl  %rrm, %rrm2
767       andl  $3, %rrm2   -- shouldn't be needed; paranoia
768       shll  $10, %rrm2
769       orl   $DEFAULT_FPUCW, %rrm2
770       pushl %rrm2
771       fldcw 0(%esp)
772       addl  $4, %esp
773    */
774    addInstr(env, mk_iMOVsd_RR(rrm, rrm2));
775    addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(3), rrm2));
776    addInstr(env, X86Instr_Sh32(Xsh_SHL, 10, rrm2));
777    addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Imm(DEFAULT_FPUCW), rrm2));
778    addInstr(env, X86Instr_Push(X86RMI_Reg(rrm2)));
779    addInstr(env, X86Instr_FpLdCW(zero_esp));
780    add_to_esp(env, 4);
781 }
782 
783 
784 /* Generate !src into a new vector register, and be sure that the code
785    is SSE1 compatible.  Amazing that Intel doesn't offer a less crappy
786    way to do this.
787 */
do_sse_Not128(ISelEnv * env,HReg src)788 static HReg do_sse_Not128 ( ISelEnv* env, HReg src )
789 {
790    HReg dst = newVRegV(env);
791    /* Set dst to zero.  If dst contains a NaN then all hell might
792       break loose after the comparison.  So, first zero it. */
793    addInstr(env, X86Instr_SseReRg(Xsse_XOR, dst, dst));
794    /* And now make it all 1s ... */
795    addInstr(env, X86Instr_Sse32Fx4(Xsse_CMPEQF, dst, dst));
796    /* Finally, xor 'src' into it. */
797    addInstr(env, X86Instr_SseReRg(Xsse_XOR, src, dst));
798    /* Doesn't that just totally suck? */
799    return dst;
800 }
801 
802 
803 /* Round an x87 FPU value to 53-bit-mantissa precision, to be used
804    after most non-simple FPU operations (simple = +, -, *, / and
805    sqrt).
806 
807    This could be done a lot more efficiently if needed, by loading
808    zero and adding it to the value to be rounded (fldz ; faddp?).
809 */
roundToF64(ISelEnv * env,HReg reg)810 static void roundToF64 ( ISelEnv* env, HReg reg )
811 {
812    X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
813    sub_from_esp(env, 8);
814    addInstr(env, X86Instr_FpLdSt(False/*store*/, 8, reg, zero_esp));
815    addInstr(env, X86Instr_FpLdSt(True/*load*/, 8, reg, zero_esp));
816    add_to_esp(env, 8);
817 }
818 
819 
820 /*---------------------------------------------------------*/
821 /*--- ISEL: Integer expressions (32/16/8 bit)           ---*/
822 /*---------------------------------------------------------*/
823 
824 /* Select insns for an integer-typed expression, and add them to the
825    code list.  Return a reg holding the result.  This reg will be a
826    virtual register.  THE RETURNED REG MUST NOT BE MODIFIED.  If you
827    want to modify it, ask for a new vreg, copy it in there, and modify
828    the copy.  The register allocator will do its best to map both
829    vregs to the same real register, so the copies will often disappear
830    later in the game.
831 
832    This should handle expressions of 32, 16 and 8-bit type.  All
833    results are returned in a 32-bit register.  For 16- and 8-bit
834    expressions, the upper 16/24 bits are arbitrary, so you should mask
835    or sign extend partial values if necessary.
836 */
837 
iselIntExpr_R(ISelEnv * env,const IRExpr * e)838 static HReg iselIntExpr_R ( ISelEnv* env, const IRExpr* e )
839 {
840    HReg r = iselIntExpr_R_wrk(env, e);
841    /* sanity checks ... */
842 #  if 0
843    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
844 #  endif
845    vassert(hregClass(r) == HRcInt32);
846    vassert(hregIsVirtual(r));
847    return r;
848 }
849 
850 /* DO NOT CALL THIS DIRECTLY ! */
iselIntExpr_R_wrk(ISelEnv * env,const IRExpr * e)851 static HReg iselIntExpr_R_wrk ( ISelEnv* env, const IRExpr* e )
852 {
853    MatchInfo mi;
854 
855    IRType ty = typeOfIRExpr(env->type_env,e);
856    vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
857 
858    switch (e->tag) {
859 
860    /* --------- TEMP --------- */
861    case Iex_RdTmp: {
862       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
863    }
864 
865    /* --------- LOAD --------- */
866    case Iex_Load: {
867       HReg dst = newVRegI(env);
868       X86AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr );
869 
870       /* We can't handle big-endian loads, nor load-linked. */
871       if (e->Iex.Load.end != Iend_LE)
872          goto irreducible;
873 
874       if (ty == Ity_I32) {
875          addInstr(env, X86Instr_Alu32R(Xalu_MOV,
876                                        X86RMI_Mem(amode), dst) );
877          return dst;
878       }
879       if (ty == Ity_I16) {
880          addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
881          return dst;
882       }
883       if (ty == Ity_I8) {
884          addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
885          return dst;
886       }
887       break;
888    }
889 
890    /* --------- TERNARY OP --------- */
891    case Iex_Triop: {
892       IRTriop *triop = e->Iex.Triop.details;
893       /* C3210 flags following FPU partial remainder (fprem), both
894          IEEE compliant (PREM1) and non-IEEE compliant (PREM). */
895       if (triop->op == Iop_PRemC3210F64
896           || triop->op == Iop_PRem1C3210F64) {
897          HReg junk = newVRegF(env);
898          HReg dst  = newVRegI(env);
899          HReg srcL = iselDblExpr(env, triop->arg2);
900          HReg srcR = iselDblExpr(env, triop->arg3);
901          /* XXXROUNDINGFIXME */
902          /* set roundingmode here */
903          addInstr(env, X86Instr_FpBinary(
904                            e->Iex.Binop.op==Iop_PRemC3210F64
905                               ? Xfp_PREM : Xfp_PREM1,
906                            srcL,srcR,junk
907                  ));
908          /* The previous pseudo-insn will have left the FPU's C3210
909             flags set correctly.  So bag them. */
910          addInstr(env, X86Instr_FpStSW_AX());
911          addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst));
912          addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0x4700), dst));
913          return dst;
914       }
915 
916       break;
917    }
918 
919    /* --------- BINARY OP --------- */
920    case Iex_Binop: {
921       X86AluOp   aluOp;
922       X86ShiftOp shOp;
923 
924       /* Pattern: Sub32(0,x) */
925       if (e->Iex.Binop.op == Iop_Sub32 && isZeroU32(e->Iex.Binop.arg1)) {
926          HReg dst = newVRegI(env);
927          HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg2);
928          addInstr(env, mk_iMOVsd_RR(reg,dst));
929          addInstr(env, X86Instr_Unary32(Xun_NEG,dst));
930          return dst;
931       }
932 
933       /* Is it an addition or logical style op? */
934       switch (e->Iex.Binop.op) {
935          case Iop_Add8: case Iop_Add16: case Iop_Add32:
936             aluOp = Xalu_ADD; break;
937          case Iop_Sub8: case Iop_Sub16: case Iop_Sub32:
938             aluOp = Xalu_SUB; break;
939          case Iop_And8: case Iop_And16: case Iop_And32:
940             aluOp = Xalu_AND; break;
941          case Iop_Or8: case Iop_Or16: case Iop_Or32:
942             aluOp = Xalu_OR; break;
943          case Iop_Xor8: case Iop_Xor16: case Iop_Xor32:
944             aluOp = Xalu_XOR; break;
945          case Iop_Mul16: case Iop_Mul32:
946             aluOp = Xalu_MUL; break;
947          default:
948             aluOp = Xalu_INVALID; break;
949       }
950       /* For commutative ops we assume any literal
951          values are on the second operand. */
952       if (aluOp != Xalu_INVALID) {
953          HReg dst    = newVRegI(env);
954          HReg reg    = iselIntExpr_R(env, e->Iex.Binop.arg1);
955          X86RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
956          addInstr(env, mk_iMOVsd_RR(reg,dst));
957          addInstr(env, X86Instr_Alu32R(aluOp, rmi, dst));
958          return dst;
959       }
960       /* Could do better here; forcing the first arg into a reg
961          isn't always clever.
962          -- t70 = Xor32(And32(Xor32(LDle:I32(Add32(t41,0xFFFFFFA0:I32)),
963                         LDle:I32(Add32(t41,0xFFFFFFA4:I32))),LDle:I32(Add32(
964                         t41,0xFFFFFFA8:I32))),LDle:I32(Add32(t41,0xFFFFFFA0:I32)))
965             movl 0xFFFFFFA0(%vr41),%vr107
966             movl 0xFFFFFFA4(%vr41),%vr108
967             movl %vr107,%vr106
968             xorl %vr108,%vr106
969             movl 0xFFFFFFA8(%vr41),%vr109
970             movl %vr106,%vr105
971             andl %vr109,%vr105
972             movl 0xFFFFFFA0(%vr41),%vr110
973             movl %vr105,%vr104
974             xorl %vr110,%vr104
975             movl %vr104,%vr70
976       */
977 
978       /* Perhaps a shift op? */
979       switch (e->Iex.Binop.op) {
980          case Iop_Shl32: case Iop_Shl16: case Iop_Shl8:
981             shOp = Xsh_SHL; break;
982          case Iop_Shr32: case Iop_Shr16: case Iop_Shr8:
983             shOp = Xsh_SHR; break;
984          case Iop_Sar32: case Iop_Sar16: case Iop_Sar8:
985             shOp = Xsh_SAR; break;
986          default:
987             shOp = Xsh_INVALID; break;
988       }
989       if (shOp != Xsh_INVALID) {
990          HReg dst = newVRegI(env);
991 
992          /* regL = the value to be shifted */
993          HReg regL   = iselIntExpr_R(env, e->Iex.Binop.arg1);
994          addInstr(env, mk_iMOVsd_RR(regL,dst));
995 
996          /* Do any necessary widening for 16/8 bit operands */
997          switch (e->Iex.Binop.op) {
998             case Iop_Shr8:
999                addInstr(env, X86Instr_Alu32R(
1000                                 Xalu_AND, X86RMI_Imm(0xFF), dst));
1001                break;
1002             case Iop_Shr16:
1003                addInstr(env, X86Instr_Alu32R(
1004                                 Xalu_AND, X86RMI_Imm(0xFFFF), dst));
1005                break;
1006             case Iop_Sar8:
1007                addInstr(env, X86Instr_Sh32(Xsh_SHL, 24, dst));
1008                addInstr(env, X86Instr_Sh32(Xsh_SAR, 24, dst));
1009                break;
1010             case Iop_Sar16:
1011                addInstr(env, X86Instr_Sh32(Xsh_SHL, 16, dst));
1012                addInstr(env, X86Instr_Sh32(Xsh_SAR, 16, dst));
1013                break;
1014             default: break;
1015          }
1016 
1017          /* Now consider the shift amount.  If it's a literal, we
1018             can do a much better job than the general case. */
1019          if (e->Iex.Binop.arg2->tag == Iex_Const) {
1020             /* assert that the IR is well-typed */
1021             Int nshift;
1022             vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1023             nshift = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1024 	    vassert(nshift >= 0);
1025 	    if (nshift > 0)
1026                /* Can't allow nshift==0 since that means %cl */
1027                addInstr(env, X86Instr_Sh32( shOp, nshift, dst ));
1028          } else {
1029             /* General case; we have to force the amount into %cl. */
1030             HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1031             addInstr(env, mk_iMOVsd_RR(regR,hregX86_ECX()));
1032             addInstr(env, X86Instr_Sh32(shOp, 0/* %cl */, dst));
1033          }
1034          return dst;
1035       }
1036 
1037       /* Handle misc other ops. */
1038 
1039       if (e->Iex.Binop.op == Iop_Max32U) {
1040          HReg src1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1041          HReg dst  = newVRegI(env);
1042          HReg src2 = iselIntExpr_R(env, e->Iex.Binop.arg2);
1043          addInstr(env, mk_iMOVsd_RR(src1,dst));
1044          addInstr(env, X86Instr_Alu32R(Xalu_CMP, X86RMI_Reg(src2), dst));
1045          addInstr(env, X86Instr_CMov32(Xcc_B, X86RM_Reg(src2), dst));
1046          return dst;
1047       }
1048 
1049       if (e->Iex.Binop.op == Iop_8HLto16) {
1050          HReg hi8  = newVRegI(env);
1051          HReg lo8  = newVRegI(env);
1052          HReg hi8s = iselIntExpr_R(env, e->Iex.Binop.arg1);
1053          HReg lo8s = iselIntExpr_R(env, e->Iex.Binop.arg2);
1054          addInstr(env, mk_iMOVsd_RR(hi8s, hi8));
1055          addInstr(env, mk_iMOVsd_RR(lo8s, lo8));
1056          addInstr(env, X86Instr_Sh32(Xsh_SHL, 8, hi8));
1057          addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0xFF), lo8));
1058          addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(lo8), hi8));
1059          return hi8;
1060       }
1061 
1062       if (e->Iex.Binop.op == Iop_16HLto32) {
1063          HReg hi16  = newVRegI(env);
1064          HReg lo16  = newVRegI(env);
1065          HReg hi16s = iselIntExpr_R(env, e->Iex.Binop.arg1);
1066          HReg lo16s = iselIntExpr_R(env, e->Iex.Binop.arg2);
1067          addInstr(env, mk_iMOVsd_RR(hi16s, hi16));
1068          addInstr(env, mk_iMOVsd_RR(lo16s, lo16));
1069          addInstr(env, X86Instr_Sh32(Xsh_SHL, 16, hi16));
1070          addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0xFFFF), lo16));
1071          addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(lo16), hi16));
1072          return hi16;
1073       }
1074 
1075       if (e->Iex.Binop.op == Iop_MullS16 || e->Iex.Binop.op == Iop_MullS8
1076           || e->Iex.Binop.op == Iop_MullU16 || e->Iex.Binop.op == Iop_MullU8) {
1077          HReg a16   = newVRegI(env);
1078          HReg b16   = newVRegI(env);
1079          HReg a16s  = iselIntExpr_R(env, e->Iex.Binop.arg1);
1080          HReg b16s  = iselIntExpr_R(env, e->Iex.Binop.arg2);
1081          Int  shift = (e->Iex.Binop.op == Iop_MullS8
1082                        || e->Iex.Binop.op == Iop_MullU8)
1083                          ? 24 : 16;
1084          X86ShiftOp shr_op = (e->Iex.Binop.op == Iop_MullS8
1085                               || e->Iex.Binop.op == Iop_MullS16)
1086                                 ? Xsh_SAR : Xsh_SHR;
1087 
1088          addInstr(env, mk_iMOVsd_RR(a16s, a16));
1089          addInstr(env, mk_iMOVsd_RR(b16s, b16));
1090          addInstr(env, X86Instr_Sh32(Xsh_SHL, shift, a16));
1091          addInstr(env, X86Instr_Sh32(Xsh_SHL, shift, b16));
1092          addInstr(env, X86Instr_Sh32(shr_op,  shift, a16));
1093          addInstr(env, X86Instr_Sh32(shr_op,  shift, b16));
1094          addInstr(env, X86Instr_Alu32R(Xalu_MUL, X86RMI_Reg(a16), b16));
1095          return b16;
1096       }
1097 
1098       if (e->Iex.Binop.op == Iop_CmpF64) {
1099          HReg fL = iselDblExpr(env, e->Iex.Binop.arg1);
1100          HReg fR = iselDblExpr(env, e->Iex.Binop.arg2);
1101          HReg dst = newVRegI(env);
1102          addInstr(env, X86Instr_FpCmp(fL,fR,dst));
1103          /* shift this right 8 bits so as to conform to CmpF64
1104             definition. */
1105          addInstr(env, X86Instr_Sh32(Xsh_SHR, 8, dst));
1106          return dst;
1107       }
1108 
1109       if (e->Iex.Binop.op == Iop_F64toI32S
1110           || e->Iex.Binop.op == Iop_F64toI16S) {
1111          Int  sz  = e->Iex.Binop.op == Iop_F64toI16S ? 2 : 4;
1112          HReg rf  = iselDblExpr(env, e->Iex.Binop.arg2);
1113          HReg dst = newVRegI(env);
1114 
1115          /* Used several times ... */
1116          X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
1117 
1118 	 /* rf now holds the value to be converted, and rrm holds the
1119 	    rounding mode value, encoded as per the IRRoundingMode
1120 	    enum.  The first thing to do is set the FPU's rounding
1121 	    mode accordingly. */
1122 
1123          /* Create a space for the format conversion. */
1124          /* subl $4, %esp */
1125          sub_from_esp(env, 4);
1126 
1127 	 /* Set host rounding mode */
1128 	 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
1129 
1130          /* gistw/l %rf, 0(%esp) */
1131          addInstr(env, X86Instr_FpLdStI(False/*store*/,
1132                                         toUChar(sz), rf, zero_esp));
1133 
1134          if (sz == 2) {
1135             /* movzwl 0(%esp), %dst */
1136             addInstr(env, X86Instr_LoadEX(2,False,zero_esp,dst));
1137          } else {
1138             /* movl 0(%esp), %dst */
1139             vassert(sz == 4);
1140             addInstr(env, X86Instr_Alu32R(
1141                              Xalu_MOV, X86RMI_Mem(zero_esp), dst));
1142          }
1143 
1144 	 /* Restore default FPU rounding. */
1145          set_FPU_rounding_default( env );
1146 
1147          /* addl $4, %esp */
1148 	 add_to_esp(env, 4);
1149          return dst;
1150       }
1151 
1152       break;
1153    }
1154 
1155    /* --------- UNARY OP --------- */
1156    case Iex_Unop: {
1157 
1158       /* 1Uto8(32to1(expr32)) */
1159       if (e->Iex.Unop.op == Iop_1Uto8) {
1160          DECLARE_PATTERN(p_32to1_then_1Uto8);
1161          DEFINE_PATTERN(p_32to1_then_1Uto8,
1162                         unop(Iop_1Uto8,unop(Iop_32to1,bind(0))));
1163          if (matchIRExpr(&mi,p_32to1_then_1Uto8,e)) {
1164             const IRExpr* expr32 = mi.bindee[0];
1165             HReg dst = newVRegI(env);
1166             HReg src = iselIntExpr_R(env, expr32);
1167             addInstr(env, mk_iMOVsd_RR(src,dst) );
1168             addInstr(env, X86Instr_Alu32R(Xalu_AND,
1169                                           X86RMI_Imm(1), dst));
1170             return dst;
1171          }
1172       }
1173 
1174       /* 8Uto32(LDle(expr32)) */
1175       if (e->Iex.Unop.op == Iop_8Uto32) {
1176          DECLARE_PATTERN(p_LDle8_then_8Uto32);
1177          DEFINE_PATTERN(p_LDle8_then_8Uto32,
1178                         unop(Iop_8Uto32,
1179                              IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
1180          if (matchIRExpr(&mi,p_LDle8_then_8Uto32,e)) {
1181             HReg dst = newVRegI(env);
1182             X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1183             addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
1184             return dst;
1185          }
1186       }
1187 
1188       /* 8Sto32(LDle(expr32)) */
1189       if (e->Iex.Unop.op == Iop_8Sto32) {
1190          DECLARE_PATTERN(p_LDle8_then_8Sto32);
1191          DEFINE_PATTERN(p_LDle8_then_8Sto32,
1192                         unop(Iop_8Sto32,
1193                              IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
1194          if (matchIRExpr(&mi,p_LDle8_then_8Sto32,e)) {
1195             HReg dst = newVRegI(env);
1196             X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1197             addInstr(env, X86Instr_LoadEX(1,True,amode,dst));
1198             return dst;
1199          }
1200       }
1201 
1202       /* 16Uto32(LDle(expr32)) */
1203       if (e->Iex.Unop.op == Iop_16Uto32) {
1204          DECLARE_PATTERN(p_LDle16_then_16Uto32);
1205          DEFINE_PATTERN(p_LDle16_then_16Uto32,
1206                         unop(Iop_16Uto32,
1207                              IRExpr_Load(Iend_LE,Ity_I16,bind(0))) );
1208          if (matchIRExpr(&mi,p_LDle16_then_16Uto32,e)) {
1209             HReg dst = newVRegI(env);
1210             X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1211             addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
1212             return dst;
1213          }
1214       }
1215 
1216       /* 8Uto32(GET:I8) */
1217       if (e->Iex.Unop.op == Iop_8Uto32) {
1218          if (e->Iex.Unop.arg->tag == Iex_Get) {
1219             HReg      dst;
1220             X86AMode* amode;
1221             vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I8);
1222             dst = newVRegI(env);
1223             amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
1224                                 hregX86_EBP());
1225             addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
1226             return dst;
1227          }
1228       }
1229 
1230       /* 16to32(GET:I16) */
1231       if (e->Iex.Unop.op == Iop_16Uto32) {
1232          if (e->Iex.Unop.arg->tag == Iex_Get) {
1233             HReg      dst;
1234             X86AMode* amode;
1235             vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I16);
1236             dst = newVRegI(env);
1237             amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
1238                                 hregX86_EBP());
1239             addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
1240             return dst;
1241          }
1242       }
1243 
1244       switch (e->Iex.Unop.op) {
1245          case Iop_8Uto16:
1246          case Iop_8Uto32:
1247          case Iop_16Uto32: {
1248             HReg dst = newVRegI(env);
1249             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1250             UInt mask = e->Iex.Unop.op==Iop_16Uto32 ? 0xFFFF : 0xFF;
1251             addInstr(env, mk_iMOVsd_RR(src,dst) );
1252             addInstr(env, X86Instr_Alu32R(Xalu_AND,
1253                                           X86RMI_Imm(mask), dst));
1254             return dst;
1255          }
1256          case Iop_8Sto16:
1257          case Iop_8Sto32:
1258          case Iop_16Sto32: {
1259             HReg dst = newVRegI(env);
1260             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1261             UInt amt = e->Iex.Unop.op==Iop_16Sto32 ? 16 : 24;
1262             addInstr(env, mk_iMOVsd_RR(src,dst) );
1263             addInstr(env, X86Instr_Sh32(Xsh_SHL, amt, dst));
1264             addInstr(env, X86Instr_Sh32(Xsh_SAR, amt, dst));
1265             return dst;
1266          }
1267 	 case Iop_Not8:
1268 	 case Iop_Not16:
1269          case Iop_Not32: {
1270             HReg dst = newVRegI(env);
1271             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1272             addInstr(env, mk_iMOVsd_RR(src,dst) );
1273             addInstr(env, X86Instr_Unary32(Xun_NOT,dst));
1274             return dst;
1275          }
1276          case Iop_64HIto32: {
1277             HReg rHi, rLo;
1278             iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1279             return rHi; /* and abandon rLo .. poor wee thing :-) */
1280          }
1281          case Iop_64to32: {
1282             HReg rHi, rLo;
1283             iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1284             return rLo; /* similar stupid comment to the above ... */
1285          }
1286          case Iop_16HIto8:
1287          case Iop_32HIto16: {
1288             HReg dst  = newVRegI(env);
1289             HReg src  = iselIntExpr_R(env, e->Iex.Unop.arg);
1290             Int shift = e->Iex.Unop.op == Iop_16HIto8 ? 8 : 16;
1291             addInstr(env, mk_iMOVsd_RR(src,dst) );
1292             addInstr(env, X86Instr_Sh32(Xsh_SHR, shift, dst));
1293             return dst;
1294          }
1295          case Iop_1Uto32:
1296          case Iop_1Uto8: {
1297             HReg dst         = newVRegI(env);
1298             X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1299             addInstr(env, X86Instr_Set32(cond,dst));
1300             return dst;
1301          }
1302          case Iop_1Sto8:
1303          case Iop_1Sto16:
1304          case Iop_1Sto32: {
1305             /* could do better than this, but for now ... */
1306             HReg dst         = newVRegI(env);
1307             X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1308             addInstr(env, X86Instr_Set32(cond,dst));
1309             addInstr(env, X86Instr_Sh32(Xsh_SHL, 31, dst));
1310             addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, dst));
1311             return dst;
1312          }
1313          case Iop_Ctz32: {
1314             /* Count trailing zeroes, implemented by x86 'bsfl' */
1315             HReg dst = newVRegI(env);
1316             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1317             addInstr(env, X86Instr_Bsfr32(True,src,dst));
1318             return dst;
1319          }
1320          case Iop_Clz32: {
1321             /* Count leading zeroes.  Do 'bsrl' to establish the index
1322                of the highest set bit, and subtract that value from
1323                31. */
1324             HReg tmp = newVRegI(env);
1325             HReg dst = newVRegI(env);
1326             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1327             addInstr(env, X86Instr_Bsfr32(False,src,tmp));
1328             addInstr(env, X86Instr_Alu32R(Xalu_MOV,
1329                                           X86RMI_Imm(31), dst));
1330             addInstr(env, X86Instr_Alu32R(Xalu_SUB,
1331                                           X86RMI_Reg(tmp), dst));
1332             return dst;
1333          }
1334 
1335          case Iop_CmpwNEZ32: {
1336             HReg dst = newVRegI(env);
1337             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1338             addInstr(env, mk_iMOVsd_RR(src,dst));
1339             addInstr(env, X86Instr_Unary32(Xun_NEG,dst));
1340             addInstr(env, X86Instr_Alu32R(Xalu_OR,
1341                                           X86RMI_Reg(src), dst));
1342             addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, dst));
1343             return dst;
1344          }
1345          case Iop_Left8:
1346          case Iop_Left16:
1347          case Iop_Left32: {
1348             HReg dst = newVRegI(env);
1349             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1350             addInstr(env, mk_iMOVsd_RR(src, dst));
1351             addInstr(env, X86Instr_Unary32(Xun_NEG, dst));
1352             addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(src), dst));
1353             return dst;
1354          }
1355 
1356          case Iop_V128to32: {
1357             HReg      dst  = newVRegI(env);
1358             HReg      vec  = iselVecExpr(env, e->Iex.Unop.arg);
1359             X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
1360             sub_from_esp(env, 16);
1361             addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, esp0));
1362             addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(esp0), dst ));
1363             add_to_esp(env, 16);
1364             return dst;
1365          }
1366 
1367          /* ReinterpF32asI32(e) */
1368          /* Given an IEEE754 single, produce an I32 with the same bit
1369             pattern.  Keep stack 8-aligned even though only using 4
1370             bytes. */
1371          case Iop_ReinterpF32asI32: {
1372             HReg rf   = iselFltExpr(env, e->Iex.Unop.arg);
1373             HReg dst  = newVRegI(env);
1374             X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
1375             /* paranoia */
1376             set_FPU_rounding_default(env);
1377             /* subl $8, %esp */
1378             sub_from_esp(env, 8);
1379             /* gstF %rf, 0(%esp) */
1380             addInstr(env,
1381                      X86Instr_FpLdSt(False/*store*/, 4, rf, zero_esp));
1382             /* movl 0(%esp), %dst */
1383             addInstr(env,
1384                      X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(zero_esp), dst));
1385             /* addl $8, %esp */
1386             add_to_esp(env, 8);
1387             return dst;
1388          }
1389 
1390          case Iop_16to8:
1391          case Iop_32to8:
1392          case Iop_32to16:
1393             /* These are no-ops. */
1394             return iselIntExpr_R(env, e->Iex.Unop.arg);
1395 
1396          case Iop_GetMSBs8x8: {
1397             /* Note: the following assumes the helper is of
1398                signature
1399                   UInt fn ( ULong ), and is not a regparm fn.
1400             */
1401             HReg  xLo, xHi;
1402             HReg  dst = newVRegI(env);
1403             Addr fn = (Addr)h_generic_calc_GetMSBs8x8;
1404             iselInt64Expr(&xHi, &xLo, env, e->Iex.Unop.arg);
1405             addInstr(env, X86Instr_Push(X86RMI_Reg(xHi)));
1406             addInstr(env, X86Instr_Push(X86RMI_Reg(xLo)));
1407             addInstr(env, X86Instr_Call( Xcc_ALWAYS, (Addr32)fn,
1408                                          0, mk_RetLoc_simple(RLPri_Int) ));
1409             add_to_esp(env, 2*4);
1410             addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst));
1411             return dst;
1412          }
1413 
1414          default:
1415             break;
1416       }
1417       break;
1418    }
1419 
1420    /* --------- GET --------- */
1421    case Iex_Get: {
1422       if (ty == Ity_I32) {
1423          HReg dst = newVRegI(env);
1424          addInstr(env, X86Instr_Alu32R(
1425                           Xalu_MOV,
1426                           X86RMI_Mem(X86AMode_IR(e->Iex.Get.offset,
1427                                                  hregX86_EBP())),
1428                           dst));
1429          return dst;
1430       }
1431       if (ty == Ity_I8 || ty == Ity_I16) {
1432          HReg dst = newVRegI(env);
1433          addInstr(env, X86Instr_LoadEX(
1434                           toUChar(ty==Ity_I8 ? 1 : 2),
1435                           False,
1436                           X86AMode_IR(e->Iex.Get.offset,hregX86_EBP()),
1437                           dst));
1438          return dst;
1439       }
1440       break;
1441    }
1442 
1443    case Iex_GetI: {
1444       X86AMode* am
1445          = genGuestArrayOffset(
1446               env, e->Iex.GetI.descr,
1447                    e->Iex.GetI.ix, e->Iex.GetI.bias );
1448       HReg dst = newVRegI(env);
1449       if (ty == Ity_I8) {
1450          addInstr(env, X86Instr_LoadEX( 1, False, am, dst ));
1451          return dst;
1452       }
1453       if (ty == Ity_I32) {
1454          addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(am), dst));
1455          return dst;
1456       }
1457       break;
1458    }
1459 
1460    /* --------- CCALL --------- */
1461    case Iex_CCall: {
1462       HReg    dst = newVRegI(env);
1463       vassert(ty == e->Iex.CCall.retty);
1464 
1465       /* be very restrictive for now.  Only 32/64-bit ints allowed for
1466          args, and 32 bits for return type.  Don't forget to change
1467          the RetLoc if more return types are allowed in future. */
1468       if (e->Iex.CCall.retty != Ity_I32)
1469          goto irreducible;
1470 
1471       /* Marshal args, do the call, clear stack. */
1472       UInt   addToSp = 0;
1473       RetLoc rloc    = mk_RetLoc_INVALID();
1474       doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
1475                     e->Iex.CCall.cee, e->Iex.CCall.retty, e->Iex.CCall.args );
1476       vassert(is_sane_RetLoc(rloc));
1477       vassert(rloc.pri == RLPri_Int);
1478       vassert(addToSp == 0);
1479 
1480       addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst));
1481       return dst;
1482    }
1483 
1484    /* --------- LITERAL --------- */
1485    /* 32/16/8-bit literals */
1486    case Iex_Const: {
1487       X86RMI* rmi = iselIntExpr_RMI ( env, e );
1488       HReg    r   = newVRegI(env);
1489       addInstr(env, X86Instr_Alu32R(Xalu_MOV, rmi, r));
1490       return r;
1491    }
1492 
1493    /* --------- MULTIPLEX --------- */
1494    case Iex_ITE: { // VFD
1495      if ((ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8)
1496          && typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) {
1497         HReg   r1  = iselIntExpr_R(env, e->Iex.ITE.iftrue);
1498         X86RM* r0  = iselIntExpr_RM(env, e->Iex.ITE.iffalse);
1499         HReg   dst = newVRegI(env);
1500         addInstr(env, mk_iMOVsd_RR(r1,dst));
1501         X86CondCode cc = iselCondCode(env, e->Iex.ITE.cond);
1502         addInstr(env, X86Instr_CMov32(cc ^ 1, r0, dst));
1503         return dst;
1504       }
1505       break;
1506    }
1507 
1508    default:
1509    break;
1510    } /* switch (e->tag) */
1511 
1512    /* We get here if no pattern matched. */
1513   irreducible:
1514    ppIRExpr(e);
1515    vpanic("iselIntExpr_R: cannot reduce tree");
1516 }
1517 
1518 
1519 /*---------------------------------------------------------*/
1520 /*--- ISEL: Integer expression auxiliaries              ---*/
1521 /*---------------------------------------------------------*/
1522 
1523 /* --------------------- AMODEs --------------------- */
1524 
1525 /* Return an AMode which computes the value of the specified
1526    expression, possibly also adding insns to the code list as a
1527    result.  The expression may only be a 32-bit one.
1528 */
1529 
sane_AMode(X86AMode * am)1530 static Bool sane_AMode ( X86AMode* am )
1531 {
1532    switch (am->tag) {
1533       case Xam_IR:
1534          return
1535             toBool( hregClass(am->Xam.IR.reg) == HRcInt32
1536                     && (hregIsVirtual(am->Xam.IR.reg)
1537                         || sameHReg(am->Xam.IR.reg, hregX86_EBP())) );
1538       case Xam_IRRS:
1539          return
1540             toBool( hregClass(am->Xam.IRRS.base) == HRcInt32
1541                     && hregIsVirtual(am->Xam.IRRS.base)
1542                     && hregClass(am->Xam.IRRS.index) == HRcInt32
1543                     && hregIsVirtual(am->Xam.IRRS.index) );
1544       default:
1545         vpanic("sane_AMode: unknown x86 amode tag");
1546    }
1547 }
1548 
iselIntExpr_AMode(ISelEnv * env,const IRExpr * e)1549 static X86AMode* iselIntExpr_AMode ( ISelEnv* env, const IRExpr* e )
1550 {
1551    X86AMode* am = iselIntExpr_AMode_wrk(env, e);
1552    vassert(sane_AMode(am));
1553    return am;
1554 }
1555 
1556 /* DO NOT CALL THIS DIRECTLY ! */
iselIntExpr_AMode_wrk(ISelEnv * env,const IRExpr * e)1557 static X86AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, const IRExpr* e )
1558 {
1559    IRType ty = typeOfIRExpr(env->type_env,e);
1560    vassert(ty == Ity_I32);
1561 
1562    /* Add32( Add32(expr1, Shl32(expr2, simm)), imm32 ) */
1563    if (e->tag == Iex_Binop
1564        && e->Iex.Binop.op == Iop_Add32
1565        && e->Iex.Binop.arg2->tag == Iex_Const
1566        && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32
1567        && e->Iex.Binop.arg1->tag == Iex_Binop
1568        && e->Iex.Binop.arg1->Iex.Binop.op == Iop_Add32
1569        && e->Iex.Binop.arg1->Iex.Binop.arg2->tag == Iex_Binop
1570        && e->Iex.Binop.arg1->Iex.Binop.arg2->Iex.Binop.op == Iop_Shl32
1571        && e->Iex.Binop.arg1
1572            ->Iex.Binop.arg2->Iex.Binop.arg2->tag == Iex_Const
1573        && e->Iex.Binop.arg1
1574            ->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8) {
1575       UInt shift = e->Iex.Binop.arg1
1576                     ->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1577       UInt imm32 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
1578       if (shift == 1 || shift == 2 || shift == 3) {
1579          HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1->Iex.Binop.arg1);
1580          HReg r2 = iselIntExpr_R(env, e->Iex.Binop.arg1
1581                                        ->Iex.Binop.arg2->Iex.Binop.arg1 );
1582          return X86AMode_IRRS(imm32, r1, r2, shift);
1583       }
1584    }
1585 
1586    /* Add32(expr1, Shl32(expr2, imm)) */
1587    if (e->tag == Iex_Binop
1588        && e->Iex.Binop.op == Iop_Add32
1589        && e->Iex.Binop.arg2->tag == Iex_Binop
1590        && e->Iex.Binop.arg2->Iex.Binop.op == Iop_Shl32
1591        && e->Iex.Binop.arg2->Iex.Binop.arg2->tag == Iex_Const
1592        && e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8) {
1593       UInt shift = e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1594       if (shift == 1 || shift == 2 || shift == 3) {
1595          HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1596          HReg r2 = iselIntExpr_R(env, e->Iex.Binop.arg2->Iex.Binop.arg1 );
1597          return X86AMode_IRRS(0, r1, r2, shift);
1598       }
1599    }
1600 
1601    /* Add32(expr,i) */
1602    if (e->tag == Iex_Binop
1603        && e->Iex.Binop.op == Iop_Add32
1604        && e->Iex.Binop.arg2->tag == Iex_Const
1605        && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
1606       HReg r1 = iselIntExpr_R(env,  e->Iex.Binop.arg1);
1607       return X86AMode_IR(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32, r1);
1608    }
1609 
1610    /* Doesn't match anything in particular.  Generate it into
1611       a register and use that. */
1612    {
1613       HReg r1 = iselIntExpr_R(env, e);
1614       return X86AMode_IR(0, r1);
1615    }
1616 }
1617 
1618 
1619 /* --------------------- RMIs --------------------- */
1620 
1621 /* Similarly, calculate an expression into an X86RMI operand.  As with
1622    iselIntExpr_R, the expression can have type 32, 16 or 8 bits.  */
1623 
iselIntExpr_RMI(ISelEnv * env,const IRExpr * e)1624 static X86RMI* iselIntExpr_RMI ( ISelEnv* env, const IRExpr* e )
1625 {
1626    X86RMI* rmi = iselIntExpr_RMI_wrk(env, e);
1627    /* sanity checks ... */
1628    switch (rmi->tag) {
1629       case Xrmi_Imm:
1630          return rmi;
1631       case Xrmi_Reg:
1632          vassert(hregClass(rmi->Xrmi.Reg.reg) == HRcInt32);
1633          vassert(hregIsVirtual(rmi->Xrmi.Reg.reg));
1634          return rmi;
1635       case Xrmi_Mem:
1636          vassert(sane_AMode(rmi->Xrmi.Mem.am));
1637          return rmi;
1638       default:
1639          vpanic("iselIntExpr_RMI: unknown x86 RMI tag");
1640    }
1641 }
1642 
1643 /* DO NOT CALL THIS DIRECTLY ! */
iselIntExpr_RMI_wrk(ISelEnv * env,const IRExpr * e)1644 static X86RMI* iselIntExpr_RMI_wrk ( ISelEnv* env, const IRExpr* e )
1645 {
1646    IRType ty = typeOfIRExpr(env->type_env,e);
1647    vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1648 
1649    /* special case: immediate */
1650    if (e->tag == Iex_Const) {
1651       UInt u;
1652       switch (e->Iex.Const.con->tag) {
1653          case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
1654          case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
1655          case Ico_U8:  u = 0xFF   & (e->Iex.Const.con->Ico.U8); break;
1656          default: vpanic("iselIntExpr_RMI.Iex_Const(x86h)");
1657       }
1658       return X86RMI_Imm(u);
1659    }
1660 
1661    /* special case: 32-bit GET */
1662    if (e->tag == Iex_Get && ty == Ity_I32) {
1663       return X86RMI_Mem(X86AMode_IR(e->Iex.Get.offset,
1664                                     hregX86_EBP()));
1665    }
1666 
1667    /* special case: 32-bit load from memory */
1668    if (e->tag == Iex_Load && ty == Ity_I32
1669        && e->Iex.Load.end == Iend_LE) {
1670       X86AMode* am = iselIntExpr_AMode(env, e->Iex.Load.addr);
1671       return X86RMI_Mem(am);
1672    }
1673 
1674    /* default case: calculate into a register and return that */
1675    {
1676       HReg r = iselIntExpr_R ( env, e );
1677       return X86RMI_Reg(r);
1678    }
1679 }
1680 
1681 
1682 /* --------------------- RIs --------------------- */
1683 
1684 /* Calculate an expression into an X86RI operand.  As with
1685    iselIntExpr_R, the expression can have type 32, 16 or 8 bits. */
1686 
iselIntExpr_RI(ISelEnv * env,const IRExpr * e)1687 static X86RI* iselIntExpr_RI ( ISelEnv* env, const IRExpr* e )
1688 {
1689    X86RI* ri = iselIntExpr_RI_wrk(env, e);
1690    /* sanity checks ... */
1691    switch (ri->tag) {
1692       case Xri_Imm:
1693          return ri;
1694       case Xri_Reg:
1695          vassert(hregClass(ri->Xri.Reg.reg) == HRcInt32);
1696          vassert(hregIsVirtual(ri->Xri.Reg.reg));
1697          return ri;
1698       default:
1699          vpanic("iselIntExpr_RI: unknown x86 RI tag");
1700    }
1701 }
1702 
1703 /* DO NOT CALL THIS DIRECTLY ! */
iselIntExpr_RI_wrk(ISelEnv * env,const IRExpr * e)1704 static X86RI* iselIntExpr_RI_wrk ( ISelEnv* env, const IRExpr* e )
1705 {
1706    IRType ty = typeOfIRExpr(env->type_env,e);
1707    vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1708 
1709    /* special case: immediate */
1710    if (e->tag == Iex_Const) {
1711       UInt u;
1712       switch (e->Iex.Const.con->tag) {
1713          case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
1714          case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
1715          case Ico_U8:  u = 0xFF   & (e->Iex.Const.con->Ico.U8); break;
1716          default: vpanic("iselIntExpr_RMI.Iex_Const(x86h)");
1717       }
1718       return X86RI_Imm(u);
1719    }
1720 
1721    /* default case: calculate into a register and return that */
1722    {
1723       HReg r = iselIntExpr_R ( env, e );
1724       return X86RI_Reg(r);
1725    }
1726 }
1727 
1728 
1729 /* --------------------- RMs --------------------- */
1730 
1731 /* Similarly, calculate an expression into an X86RM operand.  As with
1732    iselIntExpr_R, the expression can have type 32, 16 or 8 bits.  */
1733 
iselIntExpr_RM(ISelEnv * env,const IRExpr * e)1734 static X86RM* iselIntExpr_RM ( ISelEnv* env, const IRExpr* e )
1735 {
1736    X86RM* rm = iselIntExpr_RM_wrk(env, e);
1737    /* sanity checks ... */
1738    switch (rm->tag) {
1739       case Xrm_Reg:
1740          vassert(hregClass(rm->Xrm.Reg.reg) == HRcInt32);
1741          vassert(hregIsVirtual(rm->Xrm.Reg.reg));
1742          return rm;
1743       case Xrm_Mem:
1744          vassert(sane_AMode(rm->Xrm.Mem.am));
1745          return rm;
1746       default:
1747          vpanic("iselIntExpr_RM: unknown x86 RM tag");
1748    }
1749 }
1750 
1751 /* DO NOT CALL THIS DIRECTLY ! */
iselIntExpr_RM_wrk(ISelEnv * env,const IRExpr * e)1752 static X86RM* iselIntExpr_RM_wrk ( ISelEnv* env, const IRExpr* e )
1753 {
1754    IRType ty = typeOfIRExpr(env->type_env,e);
1755    vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1756 
1757    /* special case: 32-bit GET */
1758    if (e->tag == Iex_Get && ty == Ity_I32) {
1759       return X86RM_Mem(X86AMode_IR(e->Iex.Get.offset,
1760                                    hregX86_EBP()));
1761    }
1762 
1763    /* special case: load from memory */
1764 
1765    /* default case: calculate into a register and return that */
1766    {
1767       HReg r = iselIntExpr_R ( env, e );
1768       return X86RM_Reg(r);
1769    }
1770 }
1771 
1772 
1773 /* --------------------- CONDCODE --------------------- */
1774 
1775 /* Generate code to evaluated a bit-typed expression, returning the
1776    condition code which would correspond when the expression would
1777    notionally have returned 1. */
1778 
iselCondCode(ISelEnv * env,const IRExpr * e)1779 static X86CondCode iselCondCode ( ISelEnv* env, const IRExpr* e )
1780 {
1781    /* Uh, there's nothing we can sanity check here, unfortunately. */
1782    return iselCondCode_wrk(env,e);
1783 }
1784 
1785 /* DO NOT CALL THIS DIRECTLY ! */
iselCondCode_wrk(ISelEnv * env,const IRExpr * e)1786 static X86CondCode iselCondCode_wrk ( ISelEnv* env, const IRExpr* e )
1787 {
1788    MatchInfo mi;
1789 
1790    vassert(e);
1791    vassert(typeOfIRExpr(env->type_env,e) == Ity_I1);
1792 
1793    /* var */
1794    if (e->tag == Iex_RdTmp) {
1795       HReg r32 = lookupIRTemp(env, e->Iex.RdTmp.tmp);
1796       /* Test32 doesn't modify r32; so this is OK. */
1797       addInstr(env, X86Instr_Test32(1,X86RM_Reg(r32)));
1798       return Xcc_NZ;
1799    }
1800 
1801    /* Constant 1:Bit */
1802    if (e->tag == Iex_Const) {
1803       HReg r;
1804       vassert(e->Iex.Const.con->tag == Ico_U1);
1805       vassert(e->Iex.Const.con->Ico.U1 == True
1806               || e->Iex.Const.con->Ico.U1 == False);
1807       r = newVRegI(env);
1808       addInstr(env, X86Instr_Alu32R(Xalu_MOV,X86RMI_Imm(0),r));
1809       addInstr(env, X86Instr_Alu32R(Xalu_XOR,X86RMI_Reg(r),r));
1810       return e->Iex.Const.con->Ico.U1 ? Xcc_Z : Xcc_NZ;
1811    }
1812 
1813    /* Not1(e) */
1814    if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) {
1815       /* Generate code for the arg, and negate the test condition */
1816       return 1 ^ iselCondCode(env, e->Iex.Unop.arg);
1817    }
1818 
1819    /* --- patterns rooted at: 32to1 --- */
1820 
1821    if (e->tag == Iex_Unop
1822        && e->Iex.Unop.op == Iop_32to1) {
1823       X86RM* rm = iselIntExpr_RM(env, e->Iex.Unop.arg);
1824       addInstr(env, X86Instr_Test32(1,rm));
1825       return Xcc_NZ;
1826    }
1827 
1828    /* --- patterns rooted at: CmpNEZ8 --- */
1829 
1830    /* CmpNEZ8(x) */
1831    if (e->tag == Iex_Unop
1832        && e->Iex.Unop.op == Iop_CmpNEZ8) {
1833       X86RM* rm = iselIntExpr_RM(env, e->Iex.Unop.arg);
1834       addInstr(env, X86Instr_Test32(0xFF,rm));
1835       return Xcc_NZ;
1836    }
1837 
1838    /* --- patterns rooted at: CmpNEZ16 --- */
1839 
1840    /* CmpNEZ16(x) */
1841    if (e->tag == Iex_Unop
1842        && e->Iex.Unop.op == Iop_CmpNEZ16) {
1843       X86RM* rm = iselIntExpr_RM(env, e->Iex.Unop.arg);
1844       addInstr(env, X86Instr_Test32(0xFFFF,rm));
1845       return Xcc_NZ;
1846    }
1847 
1848    /* --- patterns rooted at: CmpNEZ32 --- */
1849 
1850    /* CmpNEZ32(And32(x,y)) */
1851    {
1852       DECLARE_PATTERN(p_CmpNEZ32_And32);
1853       DEFINE_PATTERN(p_CmpNEZ32_And32,
1854                      unop(Iop_CmpNEZ32, binop(Iop_And32, bind(0), bind(1))));
1855       if (matchIRExpr(&mi, p_CmpNEZ32_And32, e)) {
1856          HReg    r0   = iselIntExpr_R(env, mi.bindee[0]);
1857          X86RMI* rmi1 = iselIntExpr_RMI(env, mi.bindee[1]);
1858          HReg    tmp  = newVRegI(env);
1859          addInstr(env, mk_iMOVsd_RR(r0, tmp));
1860          addInstr(env, X86Instr_Alu32R(Xalu_AND,rmi1,tmp));
1861          return Xcc_NZ;
1862       }
1863    }
1864 
1865    /* CmpNEZ32(Or32(x,y)) */
1866    {
1867       DECLARE_PATTERN(p_CmpNEZ32_Or32);
1868       DEFINE_PATTERN(p_CmpNEZ32_Or32,
1869                      unop(Iop_CmpNEZ32, binop(Iop_Or32, bind(0), bind(1))));
1870       if (matchIRExpr(&mi, p_CmpNEZ32_Or32, e)) {
1871          HReg    r0   = iselIntExpr_R(env, mi.bindee[0]);
1872          X86RMI* rmi1 = iselIntExpr_RMI(env, mi.bindee[1]);
1873          HReg    tmp  = newVRegI(env);
1874          addInstr(env, mk_iMOVsd_RR(r0, tmp));
1875          addInstr(env, X86Instr_Alu32R(Xalu_OR,rmi1,tmp));
1876          return Xcc_NZ;
1877       }
1878    }
1879 
1880    /* CmpNEZ32(GET(..):I32) */
1881    if (e->tag == Iex_Unop
1882        && e->Iex.Unop.op == Iop_CmpNEZ32
1883        && e->Iex.Unop.arg->tag == Iex_Get) {
1884       X86AMode* am = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
1885                                  hregX86_EBP());
1886       addInstr(env, X86Instr_Alu32M(Xalu_CMP, X86RI_Imm(0), am));
1887       return Xcc_NZ;
1888    }
1889 
1890    /* CmpNEZ32(x) */
1891    if (e->tag == Iex_Unop
1892        && e->Iex.Unop.op == Iop_CmpNEZ32) {
1893       HReg    r1   = iselIntExpr_R(env, e->Iex.Unop.arg);
1894       X86RMI* rmi2 = X86RMI_Imm(0);
1895       addInstr(env, X86Instr_Alu32R(Xalu_CMP,rmi2,r1));
1896       return Xcc_NZ;
1897    }
1898 
1899    /* --- patterns rooted at: CmpNEZ64 --- */
1900 
1901    /* CmpNEZ64(Or64(x,y)) */
1902    {
1903       DECLARE_PATTERN(p_CmpNEZ64_Or64);
1904       DEFINE_PATTERN(p_CmpNEZ64_Or64,
1905                      unop(Iop_CmpNEZ64, binop(Iop_Or64, bind(0), bind(1))));
1906       if (matchIRExpr(&mi, p_CmpNEZ64_Or64, e)) {
1907          HReg    hi1, lo1, hi2, lo2;
1908          HReg    tmp  = newVRegI(env);
1909          iselInt64Expr( &hi1, &lo1, env, mi.bindee[0] );
1910          addInstr(env, mk_iMOVsd_RR(hi1, tmp));
1911          addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(lo1),tmp));
1912          iselInt64Expr( &hi2, &lo2, env, mi.bindee[1] );
1913          addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(hi2),tmp));
1914          addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(lo2),tmp));
1915          return Xcc_NZ;
1916       }
1917    }
1918 
1919    /* CmpNEZ64(x) */
1920    if (e->tag == Iex_Unop
1921        && e->Iex.Unop.op == Iop_CmpNEZ64) {
1922       HReg hi, lo;
1923       HReg tmp = newVRegI(env);
1924       iselInt64Expr( &hi, &lo, env, e->Iex.Unop.arg );
1925       addInstr(env, mk_iMOVsd_RR(hi, tmp));
1926       addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(lo), tmp));
1927       return Xcc_NZ;
1928    }
1929 
1930    /* --- patterns rooted at: Cmp{EQ,NE}{8,16} --- */
1931 
1932    /* CmpEQ8 / CmpNE8 */
1933    if (e->tag == Iex_Binop
1934        && (e->Iex.Binop.op == Iop_CmpEQ8
1935            || e->Iex.Binop.op == Iop_CmpNE8
1936            || e->Iex.Binop.op == Iop_CasCmpEQ8
1937            || e->Iex.Binop.op == Iop_CasCmpNE8)) {
1938       if (isZeroU8(e->Iex.Binop.arg2)) {
1939          HReg    r1   = iselIntExpr_R(env, e->Iex.Binop.arg1);
1940          addInstr(env, X86Instr_Test32(0xFF,X86RM_Reg(r1)));
1941          switch (e->Iex.Binop.op) {
1942             case Iop_CmpEQ8: case Iop_CasCmpEQ8: return Xcc_Z;
1943             case Iop_CmpNE8: case Iop_CasCmpNE8: return Xcc_NZ;
1944             default: vpanic("iselCondCode(x86): CmpXX8(expr,0:I8)");
1945          }
1946       } else {
1947          HReg    r1   = iselIntExpr_R(env, e->Iex.Binop.arg1);
1948          X86RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
1949          HReg    r    = newVRegI(env);
1950          addInstr(env, mk_iMOVsd_RR(r1,r));
1951          addInstr(env, X86Instr_Alu32R(Xalu_XOR,rmi2,r));
1952          addInstr(env, X86Instr_Test32(0xFF,X86RM_Reg(r)));
1953          switch (e->Iex.Binop.op) {
1954             case Iop_CmpEQ8: case Iop_CasCmpEQ8: return Xcc_Z;
1955             case Iop_CmpNE8: case Iop_CasCmpNE8: return Xcc_NZ;
1956             default: vpanic("iselCondCode(x86): CmpXX8(expr,expr)");
1957          }
1958       }
1959    }
1960 
1961    /* CmpEQ16 / CmpNE16 */
1962    if (e->tag == Iex_Binop
1963        && (e->Iex.Binop.op == Iop_CmpEQ16
1964            || e->Iex.Binop.op == Iop_CmpNE16
1965            || e->Iex.Binop.op == Iop_CasCmpEQ16
1966            || e->Iex.Binop.op == Iop_CasCmpNE16
1967            || e->Iex.Binop.op == Iop_ExpCmpNE16)) {
1968       HReg    r1   = iselIntExpr_R(env, e->Iex.Binop.arg1);
1969       X86RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
1970       HReg    r    = newVRegI(env);
1971       addInstr(env, mk_iMOVsd_RR(r1,r));
1972       addInstr(env, X86Instr_Alu32R(Xalu_XOR,rmi2,r));
1973       addInstr(env, X86Instr_Test32(0xFFFF,X86RM_Reg(r)));
1974       switch (e->Iex.Binop.op) {
1975          case Iop_CmpEQ16: case Iop_CasCmpEQ16:
1976             return Xcc_Z;
1977          case Iop_CmpNE16: case Iop_CasCmpNE16: case Iop_ExpCmpNE16:
1978             return Xcc_NZ;
1979          default:
1980             vpanic("iselCondCode(x86): CmpXX16");
1981       }
1982    }
1983 
1984    /* CmpNE32(ccall, 32-bit constant) (--smc-check=all optimisation).
1985       Saves a "movl %eax, %tmp" compared to the default route. */
1986    if (e->tag == Iex_Binop
1987        && e->Iex.Binop.op == Iop_CmpNE32
1988        && e->Iex.Binop.arg1->tag == Iex_CCall
1989        && e->Iex.Binop.arg2->tag == Iex_Const) {
1990       IRExpr* cal = e->Iex.Binop.arg1;
1991       IRExpr* con = e->Iex.Binop.arg2;
1992       /* clone & partial-eval of generic Iex_CCall and Iex_Const cases */
1993       vassert(cal->Iex.CCall.retty == Ity_I32); /* else ill-typed IR */
1994       vassert(con->Iex.Const.con->tag == Ico_U32);
1995       /* Marshal args, do the call. */
1996       UInt   addToSp = 0;
1997       RetLoc rloc    = mk_RetLoc_INVALID();
1998       doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
1999                     cal->Iex.CCall.cee,
2000                     cal->Iex.CCall.retty, cal->Iex.CCall.args );
2001       vassert(is_sane_RetLoc(rloc));
2002       vassert(rloc.pri == RLPri_Int);
2003       vassert(addToSp == 0);
2004       /* */
2005       addInstr(env, X86Instr_Alu32R(Xalu_CMP,
2006                                     X86RMI_Imm(con->Iex.Const.con->Ico.U32),
2007                                     hregX86_EAX()));
2008       return Xcc_NZ;
2009    }
2010 
2011    /* Cmp*32*(x,y) */
2012    if (e->tag == Iex_Binop
2013        && (e->Iex.Binop.op == Iop_CmpEQ32
2014            || e->Iex.Binop.op == Iop_CmpNE32
2015            || e->Iex.Binop.op == Iop_CmpLT32S
2016            || e->Iex.Binop.op == Iop_CmpLT32U
2017            || e->Iex.Binop.op == Iop_CmpLE32S
2018            || e->Iex.Binop.op == Iop_CmpLE32U
2019            || e->Iex.Binop.op == Iop_CasCmpEQ32
2020            || e->Iex.Binop.op == Iop_CasCmpNE32
2021            || e->Iex.Binop.op == Iop_ExpCmpNE32)) {
2022       HReg    r1   = iselIntExpr_R(env, e->Iex.Binop.arg1);
2023       X86RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
2024       addInstr(env, X86Instr_Alu32R(Xalu_CMP,rmi2,r1));
2025       switch (e->Iex.Binop.op) {
2026          case Iop_CmpEQ32: case Iop_CasCmpEQ32: return Xcc_Z;
2027          case Iop_CmpNE32:
2028          case Iop_CasCmpNE32: case Iop_ExpCmpNE32: return Xcc_NZ;
2029          case Iop_CmpLT32S: return Xcc_L;
2030          case Iop_CmpLT32U: return Xcc_B;
2031          case Iop_CmpLE32S: return Xcc_LE;
2032          case Iop_CmpLE32U: return Xcc_BE;
2033          default: vpanic("iselCondCode(x86): CmpXX32");
2034       }
2035    }
2036 
2037    /* CmpNE64 */
2038    if (e->tag == Iex_Binop
2039        && (e->Iex.Binop.op == Iop_CmpNE64
2040            || e->Iex.Binop.op == Iop_CmpEQ64)) {
2041       HReg hi1, hi2, lo1, lo2;
2042       HReg tHi = newVRegI(env);
2043       HReg tLo = newVRegI(env);
2044       iselInt64Expr( &hi1, &lo1, env, e->Iex.Binop.arg1 );
2045       iselInt64Expr( &hi2, &lo2, env, e->Iex.Binop.arg2 );
2046       addInstr(env, mk_iMOVsd_RR(hi1, tHi));
2047       addInstr(env, X86Instr_Alu32R(Xalu_XOR,X86RMI_Reg(hi2), tHi));
2048       addInstr(env, mk_iMOVsd_RR(lo1, tLo));
2049       addInstr(env, X86Instr_Alu32R(Xalu_XOR,X86RMI_Reg(lo2), tLo));
2050       addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(tHi), tLo));
2051       switch (e->Iex.Binop.op) {
2052          case Iop_CmpNE64: return Xcc_NZ;
2053          case Iop_CmpEQ64: return Xcc_Z;
2054          default: vpanic("iselCondCode(x86): CmpXX64");
2055       }
2056    }
2057 
2058    ppIRExpr(e);
2059    vpanic("iselCondCode");
2060 }
2061 
2062 
2063 /*---------------------------------------------------------*/
2064 /*--- ISEL: Integer expressions (64 bit)                ---*/
2065 /*---------------------------------------------------------*/
2066 
2067 /* Compute a 64-bit value into a register pair, which is returned as
2068    the first two parameters.  As with iselIntExpr_R, these may be
2069    either real or virtual regs; in any case they must not be changed
2070    by subsequent code emitted by the caller.  */
2071 
iselInt64Expr(HReg * rHi,HReg * rLo,ISelEnv * env,const IRExpr * e)2072 static void iselInt64Expr ( HReg* rHi, HReg* rLo, ISelEnv* env,
2073                             const IRExpr* e )
2074 {
2075    iselInt64Expr_wrk(rHi, rLo, env, e);
2076 #  if 0
2077    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2078 #  endif
2079    vassert(hregClass(*rHi) == HRcInt32);
2080    vassert(hregIsVirtual(*rHi));
2081    vassert(hregClass(*rLo) == HRcInt32);
2082    vassert(hregIsVirtual(*rLo));
2083 }
2084 
2085 /* DO NOT CALL THIS DIRECTLY ! */
iselInt64Expr_wrk(HReg * rHi,HReg * rLo,ISelEnv * env,const IRExpr * e)2086 static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env,
2087                                 const IRExpr* e )
2088 {
2089    MatchInfo mi;
2090    HWord fn = 0; /* helper fn for most SIMD64 stuff */
2091    vassert(e);
2092    vassert(typeOfIRExpr(env->type_env,e) == Ity_I64);
2093 
2094    /* 64-bit literal */
2095    if (e->tag == Iex_Const) {
2096       ULong w64 = e->Iex.Const.con->Ico.U64;
2097       UInt  wHi = toUInt(w64 >> 32);
2098       UInt  wLo = toUInt(w64);
2099       HReg  tLo = newVRegI(env);
2100       HReg  tHi = newVRegI(env);
2101       vassert(e->Iex.Const.con->tag == Ico_U64);
2102       if (wLo == wHi) {
2103          /* Save a precious Int register in this special case. */
2104          addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wLo), tLo));
2105          *rHi = tLo;
2106          *rLo = tLo;
2107       } else {
2108          addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wHi), tHi));
2109          addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wLo), tLo));
2110          *rHi = tHi;
2111          *rLo = tLo;
2112       }
2113       return;
2114    }
2115 
2116    /* read 64-bit IRTemp */
2117    if (e->tag == Iex_RdTmp) {
2118       lookupIRTemp64( rHi, rLo, env, e->Iex.RdTmp.tmp);
2119       return;
2120    }
2121 
2122    /* 64-bit load */
2123    if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
2124       HReg     tLo, tHi;
2125       X86AMode *am0, *am4;
2126       vassert(e->Iex.Load.ty == Ity_I64);
2127       tLo = newVRegI(env);
2128       tHi = newVRegI(env);
2129       am0 = iselIntExpr_AMode(env, e->Iex.Load.addr);
2130       am4 = advance4(am0);
2131       addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am0), tLo ));
2132       addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi ));
2133       *rHi = tHi;
2134       *rLo = tLo;
2135       return;
2136    }
2137 
2138    /* 64-bit GET */
2139    if (e->tag == Iex_Get) {
2140       X86AMode* am  = X86AMode_IR(e->Iex.Get.offset, hregX86_EBP());
2141       X86AMode* am4 = advance4(am);
2142       HReg tLo = newVRegI(env);
2143       HReg tHi = newVRegI(env);
2144       addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am), tLo ));
2145       addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi ));
2146       *rHi = tHi;
2147       *rLo = tLo;
2148       return;
2149    }
2150 
2151    /* 64-bit GETI */
2152    if (e->tag == Iex_GetI) {
2153       X86AMode* am
2154          = genGuestArrayOffset( env, e->Iex.GetI.descr,
2155                                      e->Iex.GetI.ix, e->Iex.GetI.bias );
2156       X86AMode* am4 = advance4(am);
2157       HReg tLo = newVRegI(env);
2158       HReg tHi = newVRegI(env);
2159       addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am), tLo ));
2160       addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi ));
2161       *rHi = tHi;
2162       *rLo = tLo;
2163       return;
2164    }
2165 
2166    /* 64-bit ITE: ITE(g, expr, expr) */ // VFD
2167    if (e->tag == Iex_ITE) {
2168       HReg e0Lo, e0Hi, e1Lo, e1Hi;
2169       HReg tLo = newVRegI(env);
2170       HReg tHi = newVRegI(env);
2171       iselInt64Expr(&e0Hi, &e0Lo, env, e->Iex.ITE.iffalse);
2172       iselInt64Expr(&e1Hi, &e1Lo, env, e->Iex.ITE.iftrue);
2173       addInstr(env, mk_iMOVsd_RR(e1Hi, tHi));
2174       addInstr(env, mk_iMOVsd_RR(e1Lo, tLo));
2175       X86CondCode cc = iselCondCode(env, e->Iex.ITE.cond);
2176       /* This assumes the first cmov32 doesn't trash the condition
2177          codes, so they are still available for the second cmov32 */
2178       addInstr(env, X86Instr_CMov32(cc ^ 1, X86RM_Reg(e0Hi), tHi));
2179       addInstr(env, X86Instr_CMov32(cc ^ 1, X86RM_Reg(e0Lo), tLo));
2180       *rHi = tHi;
2181       *rLo = tLo;
2182       return;
2183    }
2184 
2185    /* --------- BINARY ops --------- */
2186    if (e->tag == Iex_Binop) {
2187       switch (e->Iex.Binop.op) {
2188          /* 32 x 32 -> 64 multiply */
2189          case Iop_MullU32:
2190          case Iop_MullS32: {
2191             /* get one operand into %eax, and the other into a R/M.
2192                Need to make an educated guess about which is better in
2193                which. */
2194             HReg   tLo    = newVRegI(env);
2195             HReg   tHi    = newVRegI(env);
2196             Bool   syned  = toBool(e->Iex.Binop.op == Iop_MullS32);
2197             X86RM* rmLeft = iselIntExpr_RM(env, e->Iex.Binop.arg1);
2198             HReg   rRight = iselIntExpr_R(env, e->Iex.Binop.arg2);
2199             addInstr(env, mk_iMOVsd_RR(rRight, hregX86_EAX()));
2200             addInstr(env, X86Instr_MulL(syned, rmLeft));
2201             /* Result is now in EDX:EAX.  Tell the caller. */
2202             addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2203             addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2204             *rHi = tHi;
2205             *rLo = tLo;
2206             return;
2207          }
2208 
2209          /* 64 x 32 -> (32(rem),32(div)) division */
2210          case Iop_DivModU64to32:
2211          case Iop_DivModS64to32: {
2212             /* Get the 64-bit operand into edx:eax, and the other into
2213                any old R/M. */
2214             HReg sHi, sLo;
2215             HReg   tLo     = newVRegI(env);
2216             HReg   tHi     = newVRegI(env);
2217             Bool   syned   = toBool(e->Iex.Binop.op == Iop_DivModS64to32);
2218             X86RM* rmRight = iselIntExpr_RM(env, e->Iex.Binop.arg2);
2219             iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1);
2220             addInstr(env, mk_iMOVsd_RR(sHi, hregX86_EDX()));
2221             addInstr(env, mk_iMOVsd_RR(sLo, hregX86_EAX()));
2222             addInstr(env, X86Instr_Div(syned, rmRight));
2223             addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2224             addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2225             *rHi = tHi;
2226             *rLo = tLo;
2227             return;
2228          }
2229 
2230          /* Or64/And64/Xor64 */
2231          case Iop_Or64:
2232          case Iop_And64:
2233          case Iop_Xor64: {
2234             HReg xLo, xHi, yLo, yHi;
2235             HReg tLo = newVRegI(env);
2236             HReg tHi = newVRegI(env);
2237             X86AluOp op = e->Iex.Binop.op==Iop_Or64 ? Xalu_OR
2238                           : e->Iex.Binop.op==Iop_And64 ? Xalu_AND
2239                           : Xalu_XOR;
2240             iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2241             iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
2242             addInstr(env, mk_iMOVsd_RR(xHi, tHi));
2243             addInstr(env, X86Instr_Alu32R(op, X86RMI_Reg(yHi), tHi));
2244             addInstr(env, mk_iMOVsd_RR(xLo, tLo));
2245             addInstr(env, X86Instr_Alu32R(op, X86RMI_Reg(yLo), tLo));
2246             *rHi = tHi;
2247             *rLo = tLo;
2248             return;
2249          }
2250 
2251          /* Add64/Sub64 */
2252          case Iop_Add64:
2253             if (e->Iex.Binop.arg2->tag == Iex_Const) {
2254                /* special case Add64(e, const) */
2255                ULong w64 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
2256                UInt  wHi = toUInt(w64 >> 32);
2257                UInt  wLo = toUInt(w64);
2258                HReg  tLo = newVRegI(env);
2259                HReg  tHi = newVRegI(env);
2260                HReg  xLo, xHi;
2261                vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64);
2262                iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2263                addInstr(env, mk_iMOVsd_RR(xHi, tHi));
2264                addInstr(env, mk_iMOVsd_RR(xLo, tLo));
2265                addInstr(env, X86Instr_Alu32R(Xalu_ADD, X86RMI_Imm(wLo), tLo));
2266                addInstr(env, X86Instr_Alu32R(Xalu_ADC, X86RMI_Imm(wHi), tHi));
2267                *rHi = tHi;
2268                *rLo = tLo;
2269                return;
2270             }
2271             /* else fall through to the generic case */
2272          case Iop_Sub64: {
2273             HReg xLo, xHi, yLo, yHi;
2274             HReg tLo = newVRegI(env);
2275             HReg tHi = newVRegI(env);
2276             iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2277             addInstr(env, mk_iMOVsd_RR(xHi, tHi));
2278             addInstr(env, mk_iMOVsd_RR(xLo, tLo));
2279             iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
2280             if (e->Iex.Binop.op==Iop_Add64) {
2281                addInstr(env, X86Instr_Alu32R(Xalu_ADD, X86RMI_Reg(yLo), tLo));
2282                addInstr(env, X86Instr_Alu32R(Xalu_ADC, X86RMI_Reg(yHi), tHi));
2283             } else {
2284                addInstr(env, X86Instr_Alu32R(Xalu_SUB, X86RMI_Reg(yLo), tLo));
2285                addInstr(env, X86Instr_Alu32R(Xalu_SBB, X86RMI_Reg(yHi), tHi));
2286             }
2287             *rHi = tHi;
2288             *rLo = tLo;
2289             return;
2290          }
2291 
2292          /* 32HLto64(e1,e2) */
2293          case Iop_32HLto64:
2294             *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2295             *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2296             return;
2297 
2298          /* 64-bit shifts */
2299          case Iop_Shl64: {
2300             /* We use the same ingenious scheme as gcc.  Put the value
2301                to be shifted into %hi:%lo, and the shift amount into
2302                %cl.  Then (dsts on right, a la ATT syntax):
2303 
2304                shldl %cl, %lo, %hi   -- make %hi be right for the
2305                                      -- shift amt %cl % 32
2306                shll  %cl, %lo        -- make %lo be right for the
2307                                      -- shift amt %cl % 32
2308 
2309                Now, if (shift amount % 64) is in the range 32 .. 63,
2310                we have to do a fixup, which puts the result low half
2311                into the result high half, and zeroes the low half:
2312 
2313                testl $32, %ecx
2314 
2315                cmovnz %lo, %hi
2316                movl $0, %tmp         -- sigh; need yet another reg
2317                cmovnz %tmp, %lo
2318             */
2319             HReg rAmt, sHi, sLo, tHi, tLo, tTemp;
2320             tLo = newVRegI(env);
2321             tHi = newVRegI(env);
2322             tTemp = newVRegI(env);
2323             rAmt = iselIntExpr_R(env, e->Iex.Binop.arg2);
2324             iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1);
2325             addInstr(env, mk_iMOVsd_RR(rAmt, hregX86_ECX()));
2326             addInstr(env, mk_iMOVsd_RR(sHi, tHi));
2327             addInstr(env, mk_iMOVsd_RR(sLo, tLo));
2328             /* Ok.  Now shift amt is in %ecx, and value is in tHi/tLo
2329                and those regs are legitimately modifiable. */
2330             addInstr(env, X86Instr_Sh3232(Xsh_SHL, 0/*%cl*/, tLo, tHi));
2331             addInstr(env, X86Instr_Sh32(Xsh_SHL, 0/*%cl*/, tLo));
2332             addInstr(env, X86Instr_Test32(32, X86RM_Reg(hregX86_ECX())));
2333             addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tLo), tHi));
2334             addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tTemp));
2335             addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tTemp), tLo));
2336             *rHi = tHi;
2337             *rLo = tLo;
2338             return;
2339          }
2340 
2341          case Iop_Shr64: {
2342             /* We use the same ingenious scheme as gcc.  Put the value
2343                to be shifted into %hi:%lo, and the shift amount into
2344                %cl.  Then:
2345 
2346                shrdl %cl, %hi, %lo   -- make %lo be right for the
2347                                      -- shift amt %cl % 32
2348                shrl  %cl, %hi        -- make %hi be right for the
2349                                      -- shift amt %cl % 32
2350 
2351                Now, if (shift amount % 64) is in the range 32 .. 63,
2352                we have to do a fixup, which puts the result high half
2353                into the result low half, and zeroes the high half:
2354 
2355                testl $32, %ecx
2356 
2357                cmovnz %hi, %lo
2358                movl $0, %tmp         -- sigh; need yet another reg
2359                cmovnz %tmp, %hi
2360             */
2361             HReg rAmt, sHi, sLo, tHi, tLo, tTemp;
2362             tLo = newVRegI(env);
2363             tHi = newVRegI(env);
2364             tTemp = newVRegI(env);
2365             rAmt = iselIntExpr_R(env, e->Iex.Binop.arg2);
2366             iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1);
2367             addInstr(env, mk_iMOVsd_RR(rAmt, hregX86_ECX()));
2368             addInstr(env, mk_iMOVsd_RR(sHi, tHi));
2369             addInstr(env, mk_iMOVsd_RR(sLo, tLo));
2370             /* Ok.  Now shift amt is in %ecx, and value is in tHi/tLo
2371                and those regs are legitimately modifiable. */
2372             addInstr(env, X86Instr_Sh3232(Xsh_SHR, 0/*%cl*/, tHi, tLo));
2373             addInstr(env, X86Instr_Sh32(Xsh_SHR, 0/*%cl*/, tHi));
2374             addInstr(env, X86Instr_Test32(32, X86RM_Reg(hregX86_ECX())));
2375             addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tHi), tLo));
2376             addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tTemp));
2377             addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tTemp), tHi));
2378             *rHi = tHi;
2379             *rLo = tLo;
2380             return;
2381          }
2382 
2383          /* F64 -> I64 */
2384          /* Sigh, this is an almost exact copy of the F64 -> I32/I16
2385             case.  Unfortunately I see no easy way to avoid the
2386             duplication. */
2387          case Iop_F64toI64S: {
2388             HReg rf  = iselDblExpr(env, e->Iex.Binop.arg2);
2389             HReg tLo = newVRegI(env);
2390             HReg tHi = newVRegI(env);
2391 
2392             /* Used several times ... */
2393             /* Careful ... this sharing is only safe because
2394 	       zero_esp/four_esp do not hold any registers which the
2395 	       register allocator could attempt to swizzle later. */
2396             X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
2397             X86AMode* four_esp = X86AMode_IR(4, hregX86_ESP());
2398 
2399             /* rf now holds the value to be converted, and rrm holds
2400                the rounding mode value, encoded as per the
2401                IRRoundingMode enum.  The first thing to do is set the
2402                FPU's rounding mode accordingly. */
2403 
2404             /* Create a space for the format conversion. */
2405             /* subl $8, %esp */
2406             sub_from_esp(env, 8);
2407 
2408             /* Set host rounding mode */
2409             set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
2410 
2411             /* gistll %rf, 0(%esp) */
2412             addInstr(env, X86Instr_FpLdStI(False/*store*/, 8, rf, zero_esp));
2413 
2414             /* movl 0(%esp), %dstLo */
2415             /* movl 4(%esp), %dstHi */
2416             addInstr(env, X86Instr_Alu32R(
2417                              Xalu_MOV, X86RMI_Mem(zero_esp), tLo));
2418             addInstr(env, X86Instr_Alu32R(
2419                              Xalu_MOV, X86RMI_Mem(four_esp), tHi));
2420 
2421             /* Restore default FPU rounding. */
2422             set_FPU_rounding_default( env );
2423 
2424             /* addl $8, %esp */
2425             add_to_esp(env, 8);
2426 
2427             *rHi = tHi;
2428             *rLo = tLo;
2429             return;
2430          }
2431 
2432          case Iop_Add8x8:
2433             fn = (HWord)h_generic_calc_Add8x8; goto binnish;
2434          case Iop_Add16x4:
2435             fn = (HWord)h_generic_calc_Add16x4; goto binnish;
2436          case Iop_Add32x2:
2437             fn = (HWord)h_generic_calc_Add32x2; goto binnish;
2438 
2439          case Iop_Avg8Ux8:
2440             fn = (HWord)h_generic_calc_Avg8Ux8; goto binnish;
2441          case Iop_Avg16Ux4:
2442             fn = (HWord)h_generic_calc_Avg16Ux4; goto binnish;
2443 
2444          case Iop_CmpEQ8x8:
2445             fn = (HWord)h_generic_calc_CmpEQ8x8; goto binnish;
2446          case Iop_CmpEQ16x4:
2447             fn = (HWord)h_generic_calc_CmpEQ16x4; goto binnish;
2448          case Iop_CmpEQ32x2:
2449             fn = (HWord)h_generic_calc_CmpEQ32x2; goto binnish;
2450 
2451          case Iop_CmpGT8Sx8:
2452             fn = (HWord)h_generic_calc_CmpGT8Sx8; goto binnish;
2453          case Iop_CmpGT16Sx4:
2454             fn = (HWord)h_generic_calc_CmpGT16Sx4; goto binnish;
2455          case Iop_CmpGT32Sx2:
2456             fn = (HWord)h_generic_calc_CmpGT32Sx2; goto binnish;
2457 
2458          case Iop_InterleaveHI8x8:
2459             fn = (HWord)h_generic_calc_InterleaveHI8x8; goto binnish;
2460          case Iop_InterleaveLO8x8:
2461             fn = (HWord)h_generic_calc_InterleaveLO8x8; goto binnish;
2462          case Iop_InterleaveHI16x4:
2463             fn = (HWord)h_generic_calc_InterleaveHI16x4; goto binnish;
2464          case Iop_InterleaveLO16x4:
2465             fn = (HWord)h_generic_calc_InterleaveLO16x4; goto binnish;
2466          case Iop_InterleaveHI32x2:
2467             fn = (HWord)h_generic_calc_InterleaveHI32x2; goto binnish;
2468          case Iop_InterleaveLO32x2:
2469             fn = (HWord)h_generic_calc_InterleaveLO32x2; goto binnish;
2470          case Iop_CatOddLanes16x4:
2471             fn = (HWord)h_generic_calc_CatOddLanes16x4; goto binnish;
2472          case Iop_CatEvenLanes16x4:
2473             fn = (HWord)h_generic_calc_CatEvenLanes16x4; goto binnish;
2474          case Iop_Perm8x8:
2475             fn = (HWord)h_generic_calc_Perm8x8; goto binnish;
2476 
2477          case Iop_Max8Ux8:
2478             fn = (HWord)h_generic_calc_Max8Ux8; goto binnish;
2479          case Iop_Max16Sx4:
2480             fn = (HWord)h_generic_calc_Max16Sx4; goto binnish;
2481          case Iop_Min8Ux8:
2482             fn = (HWord)h_generic_calc_Min8Ux8; goto binnish;
2483          case Iop_Min16Sx4:
2484             fn = (HWord)h_generic_calc_Min16Sx4; goto binnish;
2485 
2486          case Iop_Mul16x4:
2487             fn = (HWord)h_generic_calc_Mul16x4; goto binnish;
2488          case Iop_Mul32x2:
2489             fn = (HWord)h_generic_calc_Mul32x2; goto binnish;
2490          case Iop_MulHi16Sx4:
2491             fn = (HWord)h_generic_calc_MulHi16Sx4; goto binnish;
2492          case Iop_MulHi16Ux4:
2493             fn = (HWord)h_generic_calc_MulHi16Ux4; goto binnish;
2494 
2495          case Iop_QAdd8Sx8:
2496             fn = (HWord)h_generic_calc_QAdd8Sx8; goto binnish;
2497          case Iop_QAdd16Sx4:
2498             fn = (HWord)h_generic_calc_QAdd16Sx4; goto binnish;
2499          case Iop_QAdd8Ux8:
2500             fn = (HWord)h_generic_calc_QAdd8Ux8; goto binnish;
2501          case Iop_QAdd16Ux4:
2502             fn = (HWord)h_generic_calc_QAdd16Ux4; goto binnish;
2503 
2504          case Iop_QNarrowBin32Sto16Sx4:
2505             fn = (HWord)h_generic_calc_QNarrowBin32Sto16Sx4; goto binnish;
2506          case Iop_QNarrowBin16Sto8Sx8:
2507             fn = (HWord)h_generic_calc_QNarrowBin16Sto8Sx8; goto binnish;
2508          case Iop_QNarrowBin16Sto8Ux8:
2509             fn = (HWord)h_generic_calc_QNarrowBin16Sto8Ux8; goto binnish;
2510          case Iop_NarrowBin16to8x8:
2511             fn = (HWord)h_generic_calc_NarrowBin16to8x8; goto binnish;
2512          case Iop_NarrowBin32to16x4:
2513             fn = (HWord)h_generic_calc_NarrowBin32to16x4; goto binnish;
2514 
2515          case Iop_QSub8Sx8:
2516             fn = (HWord)h_generic_calc_QSub8Sx8; goto binnish;
2517          case Iop_QSub16Sx4:
2518             fn = (HWord)h_generic_calc_QSub16Sx4; goto binnish;
2519          case Iop_QSub8Ux8:
2520             fn = (HWord)h_generic_calc_QSub8Ux8; goto binnish;
2521          case Iop_QSub16Ux4:
2522             fn = (HWord)h_generic_calc_QSub16Ux4; goto binnish;
2523 
2524          case Iop_Sub8x8:
2525             fn = (HWord)h_generic_calc_Sub8x8; goto binnish;
2526          case Iop_Sub16x4:
2527             fn = (HWord)h_generic_calc_Sub16x4; goto binnish;
2528          case Iop_Sub32x2:
2529             fn = (HWord)h_generic_calc_Sub32x2; goto binnish;
2530 
2531          binnish: {
2532             /* Note: the following assumes all helpers are of
2533                signature
2534                   ULong fn ( ULong, ULong ), and they are
2535                not marked as regparm functions.
2536             */
2537             HReg xLo, xHi, yLo, yHi;
2538             HReg tLo = newVRegI(env);
2539             HReg tHi = newVRegI(env);
2540             iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
2541             addInstr(env, X86Instr_Push(X86RMI_Reg(yHi)));
2542             addInstr(env, X86Instr_Push(X86RMI_Reg(yLo)));
2543             iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2544             addInstr(env, X86Instr_Push(X86RMI_Reg(xHi)));
2545             addInstr(env, X86Instr_Push(X86RMI_Reg(xLo)));
2546             addInstr(env, X86Instr_Call( Xcc_ALWAYS, (Addr32)fn,
2547                                          0, mk_RetLoc_simple(RLPri_2Int) ));
2548             add_to_esp(env, 4*4);
2549             addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2550             addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2551             *rHi = tHi;
2552             *rLo = tLo;
2553             return;
2554          }
2555 
2556          case Iop_ShlN32x2:
2557             fn = (HWord)h_generic_calc_ShlN32x2; goto shifty;
2558          case Iop_ShlN16x4:
2559             fn = (HWord)h_generic_calc_ShlN16x4; goto shifty;
2560          case Iop_ShlN8x8:
2561             fn = (HWord)h_generic_calc_ShlN8x8;  goto shifty;
2562          case Iop_ShrN32x2:
2563             fn = (HWord)h_generic_calc_ShrN32x2; goto shifty;
2564          case Iop_ShrN16x4:
2565             fn = (HWord)h_generic_calc_ShrN16x4; goto shifty;
2566          case Iop_SarN32x2:
2567             fn = (HWord)h_generic_calc_SarN32x2; goto shifty;
2568          case Iop_SarN16x4:
2569             fn = (HWord)h_generic_calc_SarN16x4; goto shifty;
2570          case Iop_SarN8x8:
2571             fn = (HWord)h_generic_calc_SarN8x8;  goto shifty;
2572          shifty: {
2573             /* Note: the following assumes all helpers are of
2574                signature
2575                   ULong fn ( ULong, UInt ), and they are
2576                not marked as regparm functions.
2577             */
2578             HReg xLo, xHi;
2579             HReg tLo = newVRegI(env);
2580             HReg tHi = newVRegI(env);
2581             X86RMI* y = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
2582             addInstr(env, X86Instr_Push(y));
2583             iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2584             addInstr(env, X86Instr_Push(X86RMI_Reg(xHi)));
2585             addInstr(env, X86Instr_Push(X86RMI_Reg(xLo)));
2586             addInstr(env, X86Instr_Call( Xcc_ALWAYS, (Addr32)fn,
2587                                          0, mk_RetLoc_simple(RLPri_2Int) ));
2588             add_to_esp(env, 3*4);
2589             addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2590             addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2591             *rHi = tHi;
2592             *rLo = tLo;
2593             return;
2594          }
2595 
2596          default:
2597             break;
2598       }
2599    } /* if (e->tag == Iex_Binop) */
2600 
2601 
2602    /* --------- UNARY ops --------- */
2603    if (e->tag == Iex_Unop) {
2604       switch (e->Iex.Unop.op) {
2605 
2606          /* 32Sto64(e) */
2607          case Iop_32Sto64: {
2608             HReg tLo = newVRegI(env);
2609             HReg tHi = newVRegI(env);
2610             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2611             addInstr(env, mk_iMOVsd_RR(src,tHi));
2612             addInstr(env, mk_iMOVsd_RR(src,tLo));
2613             addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tHi));
2614             *rHi = tHi;
2615             *rLo = tLo;
2616             return;
2617          }
2618 
2619          /* 32Uto64(e) */
2620          case Iop_32Uto64: {
2621             HReg tLo = newVRegI(env);
2622             HReg tHi = newVRegI(env);
2623             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2624             addInstr(env, mk_iMOVsd_RR(src,tLo));
2625             addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tHi));
2626             *rHi = tHi;
2627             *rLo = tLo;
2628             return;
2629          }
2630 
2631          /* 16Uto64(e) */
2632          case Iop_16Uto64: {
2633             HReg tLo = newVRegI(env);
2634             HReg tHi = newVRegI(env);
2635             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2636             addInstr(env, mk_iMOVsd_RR(src,tLo));
2637             addInstr(env, X86Instr_Alu32R(Xalu_AND,
2638                                           X86RMI_Imm(0xFFFF), tLo));
2639             addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tHi));
2640             *rHi = tHi;
2641             *rLo = tLo;
2642             return;
2643          }
2644 
2645          /* V128{HI}to64 */
2646          case Iop_V128HIto64:
2647          case Iop_V128to64: {
2648             Int  off = e->Iex.Unop.op==Iop_V128HIto64 ? 8 : 0;
2649             HReg tLo = newVRegI(env);
2650             HReg tHi = newVRegI(env);
2651             HReg vec = iselVecExpr(env, e->Iex.Unop.arg);
2652             X86AMode* esp0  = X86AMode_IR(0,     hregX86_ESP());
2653             X86AMode* espLO = X86AMode_IR(off,   hregX86_ESP());
2654             X86AMode* espHI = X86AMode_IR(off+4, hregX86_ESP());
2655             sub_from_esp(env, 16);
2656             addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, esp0));
2657             addInstr(env, X86Instr_Alu32R( Xalu_MOV,
2658                                            X86RMI_Mem(espLO), tLo ));
2659             addInstr(env, X86Instr_Alu32R( Xalu_MOV,
2660                                            X86RMI_Mem(espHI), tHi ));
2661             add_to_esp(env, 16);
2662             *rHi = tHi;
2663             *rLo = tLo;
2664             return;
2665          }
2666 
2667          /* could do better than this, but for now ... */
2668          case Iop_1Sto64: {
2669             HReg tLo = newVRegI(env);
2670             HReg tHi = newVRegI(env);
2671             X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
2672             addInstr(env, X86Instr_Set32(cond,tLo));
2673             addInstr(env, X86Instr_Sh32(Xsh_SHL, 31, tLo));
2674             addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tLo));
2675             addInstr(env, mk_iMOVsd_RR(tLo, tHi));
2676             *rHi = tHi;
2677             *rLo = tLo;
2678             return;
2679          }
2680 
2681          /* Not64(e) */
2682          case Iop_Not64: {
2683             HReg tLo = newVRegI(env);
2684             HReg tHi = newVRegI(env);
2685             HReg sHi, sLo;
2686             iselInt64Expr(&sHi, &sLo, env, e->Iex.Unop.arg);
2687             addInstr(env, mk_iMOVsd_RR(sHi, tHi));
2688             addInstr(env, mk_iMOVsd_RR(sLo, tLo));
2689             addInstr(env, X86Instr_Unary32(Xun_NOT,tHi));
2690             addInstr(env, X86Instr_Unary32(Xun_NOT,tLo));
2691             *rHi = tHi;
2692             *rLo = tLo;
2693             return;
2694          }
2695 
2696          /* Left64(e) */
2697          case Iop_Left64: {
2698             HReg yLo, yHi;
2699             HReg tLo = newVRegI(env);
2700             HReg tHi = newVRegI(env);
2701             /* yHi:yLo = arg */
2702             iselInt64Expr(&yHi, &yLo, env, e->Iex.Unop.arg);
2703             /* tLo = 0 - yLo, and set carry */
2704             addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tLo));
2705             addInstr(env, X86Instr_Alu32R(Xalu_SUB, X86RMI_Reg(yLo), tLo));
2706             /* tHi = 0 - yHi - carry */
2707             addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tHi));
2708             addInstr(env, X86Instr_Alu32R(Xalu_SBB, X86RMI_Reg(yHi), tHi));
2709             /* So now we have tHi:tLo = -arg.  To finish off, or 'arg'
2710                back in, so as to give the final result
2711                tHi:tLo = arg | -arg. */
2712             addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(yLo), tLo));
2713             addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(yHi), tHi));
2714             *rHi = tHi;
2715             *rLo = tLo;
2716             return;
2717          }
2718 
2719          /* --- patterns rooted at: CmpwNEZ64 --- */
2720 
2721          /* CmpwNEZ64(e) */
2722          case Iop_CmpwNEZ64: {
2723 
2724          DECLARE_PATTERN(p_CmpwNEZ64_Or64);
2725          DEFINE_PATTERN(p_CmpwNEZ64_Or64,
2726                         unop(Iop_CmpwNEZ64,binop(Iop_Or64,bind(0),bind(1))));
2727          if (matchIRExpr(&mi, p_CmpwNEZ64_Or64, e)) {
2728             /* CmpwNEZ64(Or64(x,y)) */
2729             HReg xHi,xLo,yHi,yLo;
2730             HReg xBoth = newVRegI(env);
2731             HReg merged = newVRegI(env);
2732             HReg tmp2 = newVRegI(env);
2733 
2734             iselInt64Expr(&xHi,&xLo, env, mi.bindee[0]);
2735             addInstr(env, mk_iMOVsd_RR(xHi,xBoth));
2736             addInstr(env, X86Instr_Alu32R(Xalu_OR,
2737                                           X86RMI_Reg(xLo),xBoth));
2738 
2739             iselInt64Expr(&yHi,&yLo, env, mi.bindee[1]);
2740             addInstr(env, mk_iMOVsd_RR(yHi,merged));
2741             addInstr(env, X86Instr_Alu32R(Xalu_OR,
2742                                           X86RMI_Reg(yLo),merged));
2743             addInstr(env, X86Instr_Alu32R(Xalu_OR,
2744                                              X86RMI_Reg(xBoth),merged));
2745 
2746             /* tmp2 = (merged | -merged) >>s 31 */
2747             addInstr(env, mk_iMOVsd_RR(merged,tmp2));
2748             addInstr(env, X86Instr_Unary32(Xun_NEG,tmp2));
2749             addInstr(env, X86Instr_Alu32R(Xalu_OR,
2750                                           X86RMI_Reg(merged), tmp2));
2751             addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tmp2));
2752             *rHi = tmp2;
2753             *rLo = tmp2;
2754             return;
2755          } else {
2756             /* CmpwNEZ64(e) */
2757             HReg srcLo, srcHi;
2758             HReg tmp1  = newVRegI(env);
2759             HReg tmp2  = newVRegI(env);
2760             /* srcHi:srcLo = arg */
2761             iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg);
2762             /* tmp1 = srcHi | srcLo */
2763             addInstr(env, mk_iMOVsd_RR(srcHi,tmp1));
2764             addInstr(env, X86Instr_Alu32R(Xalu_OR,
2765                                           X86RMI_Reg(srcLo), tmp1));
2766             /* tmp2 = (tmp1 | -tmp1) >>s 31 */
2767             addInstr(env, mk_iMOVsd_RR(tmp1,tmp2));
2768             addInstr(env, X86Instr_Unary32(Xun_NEG,tmp2));
2769             addInstr(env, X86Instr_Alu32R(Xalu_OR,
2770                                           X86RMI_Reg(tmp1), tmp2));
2771             addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tmp2));
2772             *rHi = tmp2;
2773             *rLo = tmp2;
2774             return;
2775          }
2776          }
2777 
2778          /* ReinterpF64asI64(e) */
2779          /* Given an IEEE754 double, produce an I64 with the same bit
2780             pattern. */
2781          case Iop_ReinterpF64asI64: {
2782             HReg rf   = iselDblExpr(env, e->Iex.Unop.arg);
2783             HReg tLo  = newVRegI(env);
2784             HReg tHi  = newVRegI(env);
2785             X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
2786             X86AMode* four_esp = X86AMode_IR(4, hregX86_ESP());
2787             /* paranoia */
2788             set_FPU_rounding_default(env);
2789             /* subl $8, %esp */
2790             sub_from_esp(env, 8);
2791             /* gstD %rf, 0(%esp) */
2792             addInstr(env,
2793                      X86Instr_FpLdSt(False/*store*/, 8, rf, zero_esp));
2794             /* movl 0(%esp), %tLo */
2795             addInstr(env,
2796                      X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(zero_esp), tLo));
2797             /* movl 4(%esp), %tHi */
2798             addInstr(env,
2799                      X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(four_esp), tHi));
2800             /* addl $8, %esp */
2801             add_to_esp(env, 8);
2802             *rHi = tHi;
2803             *rLo = tLo;
2804             return;
2805          }
2806 
2807          case Iop_CmpNEZ32x2:
2808             fn = (HWord)h_generic_calc_CmpNEZ32x2; goto unish;
2809          case Iop_CmpNEZ16x4:
2810             fn = (HWord)h_generic_calc_CmpNEZ16x4; goto unish;
2811          case Iop_CmpNEZ8x8:
2812             fn = (HWord)h_generic_calc_CmpNEZ8x8; goto unish;
2813          unish: {
2814             /* Note: the following assumes all helpers are of
2815                signature
2816                   ULong fn ( ULong ), and they are
2817                not marked as regparm functions.
2818             */
2819             HReg xLo, xHi;
2820             HReg tLo = newVRegI(env);
2821             HReg tHi = newVRegI(env);
2822             iselInt64Expr(&xHi, &xLo, env, e->Iex.Unop.arg);
2823             addInstr(env, X86Instr_Push(X86RMI_Reg(xHi)));
2824             addInstr(env, X86Instr_Push(X86RMI_Reg(xLo)));
2825             addInstr(env, X86Instr_Call( Xcc_ALWAYS, (Addr32)fn,
2826                                          0, mk_RetLoc_simple(RLPri_2Int) ));
2827             add_to_esp(env, 2*4);
2828             addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2829             addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2830             *rHi = tHi;
2831             *rLo = tLo;
2832             return;
2833          }
2834 
2835          default:
2836             break;
2837       }
2838    } /* if (e->tag == Iex_Unop) */
2839 
2840 
2841    /* --------- CCALL --------- */
2842    if (e->tag == Iex_CCall) {
2843       HReg tLo = newVRegI(env);
2844       HReg tHi = newVRegI(env);
2845 
2846       /* Marshal args, do the call, clear stack. */
2847       UInt   addToSp = 0;
2848       RetLoc rloc    = mk_RetLoc_INVALID();
2849       doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
2850                     e->Iex.CCall.cee,
2851                     e->Iex.CCall.retty, e->Iex.CCall.args );
2852       vassert(is_sane_RetLoc(rloc));
2853       vassert(rloc.pri == RLPri_2Int);
2854       vassert(addToSp == 0);
2855       /* */
2856 
2857       addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2858       addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2859       *rHi = tHi;
2860       *rLo = tLo;
2861       return;
2862    }
2863 
2864    ppIRExpr(e);
2865    vpanic("iselInt64Expr");
2866 }
2867 
2868 
2869 /*---------------------------------------------------------*/
2870 /*--- ISEL: Floating point expressions (32 bit)         ---*/
2871 /*---------------------------------------------------------*/
2872 
2873 /* Nothing interesting here; really just wrappers for
2874    64-bit stuff. */
2875 
iselFltExpr(ISelEnv * env,const IRExpr * e)2876 static HReg iselFltExpr ( ISelEnv* env, const IRExpr* e )
2877 {
2878    HReg r = iselFltExpr_wrk( env, e );
2879 #  if 0
2880    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2881 #  endif
2882    vassert(hregClass(r) == HRcFlt64); /* yes, really Flt64 */
2883    vassert(hregIsVirtual(r));
2884    return r;
2885 }
2886 
2887 /* DO NOT CALL THIS DIRECTLY */
iselFltExpr_wrk(ISelEnv * env,const IRExpr * e)2888 static HReg iselFltExpr_wrk ( ISelEnv* env, const IRExpr* e )
2889 {
2890    IRType ty = typeOfIRExpr(env->type_env,e);
2891    vassert(ty == Ity_F32);
2892 
2893    if (e->tag == Iex_RdTmp) {
2894       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
2895    }
2896 
2897    if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
2898       X86AMode* am;
2899       HReg res = newVRegF(env);
2900       vassert(e->Iex.Load.ty == Ity_F32);
2901       am = iselIntExpr_AMode(env, e->Iex.Load.addr);
2902       addInstr(env, X86Instr_FpLdSt(True/*load*/, 4, res, am));
2903       return res;
2904    }
2905 
2906    if (e->tag == Iex_Binop
2907        && e->Iex.Binop.op == Iop_F64toF32) {
2908       /* Although the result is still held in a standard FPU register,
2909          we need to round it to reflect the loss of accuracy/range
2910          entailed in casting it to a 32-bit float. */
2911       HReg dst = newVRegF(env);
2912       HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
2913       set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
2914       addInstr(env, X86Instr_Fp64to32(src,dst));
2915       set_FPU_rounding_default( env );
2916       return dst;
2917    }
2918 
2919    if (e->tag == Iex_Get) {
2920       X86AMode* am = X86AMode_IR( e->Iex.Get.offset,
2921                                   hregX86_EBP() );
2922       HReg res = newVRegF(env);
2923       addInstr(env, X86Instr_FpLdSt( True/*load*/, 4, res, am ));
2924       return res;
2925    }
2926 
2927    if (e->tag == Iex_Unop
2928        && e->Iex.Unop.op == Iop_ReinterpI32asF32) {
2929        /* Given an I32, produce an IEEE754 float with the same bit
2930           pattern. */
2931       HReg    dst = newVRegF(env);
2932       X86RMI* rmi = iselIntExpr_RMI(env, e->Iex.Unop.arg);
2933       /* paranoia */
2934       addInstr(env, X86Instr_Push(rmi));
2935       addInstr(env, X86Instr_FpLdSt(
2936                        True/*load*/, 4, dst,
2937                        X86AMode_IR(0, hregX86_ESP())));
2938       add_to_esp(env, 4);
2939       return dst;
2940    }
2941 
2942    if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF32toInt) {
2943       HReg rf  = iselFltExpr(env, e->Iex.Binop.arg2);
2944       HReg dst = newVRegF(env);
2945 
2946       /* rf now holds the value to be rounded.  The first thing to do
2947          is set the FPU's rounding mode accordingly. */
2948 
2949       /* Set host rounding mode */
2950       set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
2951 
2952       /* grndint %rf, %dst */
2953       addInstr(env, X86Instr_FpUnary(Xfp_ROUND, rf, dst));
2954 
2955       /* Restore default FPU rounding. */
2956       set_FPU_rounding_default( env );
2957 
2958       return dst;
2959    }
2960 
2961    ppIRExpr(e);
2962    vpanic("iselFltExpr_wrk");
2963 }
2964 
2965 
2966 /*---------------------------------------------------------*/
2967 /*--- ISEL: Floating point expressions (64 bit)         ---*/
2968 /*---------------------------------------------------------*/
2969 
2970 /* Compute a 64-bit floating point value into a register, the identity
2971    of which is returned.  As with iselIntExpr_R, the reg may be either
2972    real or virtual; in any case it must not be changed by subsequent
2973    code emitted by the caller.  */
2974 
2975 /* IEEE 754 formats.  From http://www.freesoft.org/CIE/RFC/1832/32.htm:
2976 
2977     Type                  S (1 bit)   E (11 bits)   F (52 bits)
2978     ----                  ---------   -----------   -----------
2979     signalling NaN        u           2047 (max)    .0uuuuu---u
2980                                                     (with at least
2981                                                      one 1 bit)
2982     quiet NaN             u           2047 (max)    .1uuuuu---u
2983 
2984     negative infinity     1           2047 (max)    .000000---0
2985 
2986     positive infinity     0           2047 (max)    .000000---0
2987 
2988     negative zero         1           0             .000000---0
2989 
2990     positive zero         0           0             .000000---0
2991 */
2992 
iselDblExpr(ISelEnv * env,const IRExpr * e)2993 static HReg iselDblExpr ( ISelEnv* env, const IRExpr* e )
2994 {
2995    HReg r = iselDblExpr_wrk( env, e );
2996 #  if 0
2997    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2998 #  endif
2999    vassert(hregClass(r) == HRcFlt64);
3000    vassert(hregIsVirtual(r));
3001    return r;
3002 }
3003 
3004 /* DO NOT CALL THIS DIRECTLY */
iselDblExpr_wrk(ISelEnv * env,const IRExpr * e)3005 static HReg iselDblExpr_wrk ( ISelEnv* env, const IRExpr* e )
3006 {
3007    IRType ty = typeOfIRExpr(env->type_env,e);
3008    vassert(e);
3009    vassert(ty == Ity_F64);
3010 
3011    if (e->tag == Iex_RdTmp) {
3012       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
3013    }
3014 
3015    if (e->tag == Iex_Const) {
3016       union { UInt u32x2[2]; ULong u64; Double f64; } u;
3017       HReg freg = newVRegF(env);
3018       vassert(sizeof(u) == 8);
3019       vassert(sizeof(u.u64) == 8);
3020       vassert(sizeof(u.f64) == 8);
3021       vassert(sizeof(u.u32x2) == 8);
3022 
3023       if (e->Iex.Const.con->tag == Ico_F64) {
3024          u.f64 = e->Iex.Const.con->Ico.F64;
3025       }
3026       else if (e->Iex.Const.con->tag == Ico_F64i) {
3027          u.u64 = e->Iex.Const.con->Ico.F64i;
3028       }
3029       else
3030          vpanic("iselDblExpr(x86): const");
3031 
3032       addInstr(env, X86Instr_Push(X86RMI_Imm(u.u32x2[1])));
3033       addInstr(env, X86Instr_Push(X86RMI_Imm(u.u32x2[0])));
3034       addInstr(env, X86Instr_FpLdSt(True/*load*/, 8, freg,
3035                                     X86AMode_IR(0, hregX86_ESP())));
3036       add_to_esp(env, 8);
3037       return freg;
3038    }
3039 
3040    if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
3041       X86AMode* am;
3042       HReg res = newVRegF(env);
3043       vassert(e->Iex.Load.ty == Ity_F64);
3044       am = iselIntExpr_AMode(env, e->Iex.Load.addr);
3045       addInstr(env, X86Instr_FpLdSt(True/*load*/, 8, res, am));
3046       return res;
3047    }
3048 
3049    if (e->tag == Iex_Get) {
3050       X86AMode* am = X86AMode_IR( e->Iex.Get.offset,
3051                                   hregX86_EBP() );
3052       HReg res = newVRegF(env);
3053       addInstr(env, X86Instr_FpLdSt( True/*load*/, 8, res, am ));
3054       return res;
3055    }
3056 
3057    if (e->tag == Iex_GetI) {
3058       X86AMode* am
3059          = genGuestArrayOffset(
3060               env, e->Iex.GetI.descr,
3061                    e->Iex.GetI.ix, e->Iex.GetI.bias );
3062       HReg res = newVRegF(env);
3063       addInstr(env, X86Instr_FpLdSt( True/*load*/, 8, res, am ));
3064       return res;
3065    }
3066 
3067    if (e->tag == Iex_Triop) {
3068       X86FpOp fpop = Xfp_INVALID;
3069       IRTriop *triop = e->Iex.Triop.details;
3070       switch (triop->op) {
3071          case Iop_AddF64:    fpop = Xfp_ADD; break;
3072          case Iop_SubF64:    fpop = Xfp_SUB; break;
3073          case Iop_MulF64:    fpop = Xfp_MUL; break;
3074          case Iop_DivF64:    fpop = Xfp_DIV; break;
3075          case Iop_ScaleF64:  fpop = Xfp_SCALE; break;
3076          case Iop_Yl2xF64:   fpop = Xfp_YL2X; break;
3077          case Iop_Yl2xp1F64: fpop = Xfp_YL2XP1; break;
3078          case Iop_AtanF64:   fpop = Xfp_ATAN; break;
3079          case Iop_PRemF64:   fpop = Xfp_PREM; break;
3080          case Iop_PRem1F64:  fpop = Xfp_PREM1; break;
3081          default: break;
3082       }
3083       if (fpop != Xfp_INVALID) {
3084          HReg res  = newVRegF(env);
3085          HReg srcL = iselDblExpr(env, triop->arg2);
3086          HReg srcR = iselDblExpr(env, triop->arg3);
3087          /* XXXROUNDINGFIXME */
3088          /* set roundingmode here */
3089          addInstr(env, X86Instr_FpBinary(fpop,srcL,srcR,res));
3090 	 if (fpop != Xfp_ADD && fpop != Xfp_SUB
3091 	     && fpop != Xfp_MUL && fpop != Xfp_DIV)
3092             roundToF64(env, res);
3093          return res;
3094       }
3095    }
3096 
3097    if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF64toInt) {
3098       HReg rf  = iselDblExpr(env, e->Iex.Binop.arg2);
3099       HReg dst = newVRegF(env);
3100 
3101       /* rf now holds the value to be rounded.  The first thing to do
3102          is set the FPU's rounding mode accordingly. */
3103 
3104       /* Set host rounding mode */
3105       set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
3106 
3107       /* grndint %rf, %dst */
3108       addInstr(env, X86Instr_FpUnary(Xfp_ROUND, rf, dst));
3109 
3110       /* Restore default FPU rounding. */
3111       set_FPU_rounding_default( env );
3112 
3113       return dst;
3114    }
3115 
3116    if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_I64StoF64) {
3117       HReg dst = newVRegF(env);
3118       HReg rHi,rLo;
3119       iselInt64Expr( &rHi, &rLo, env, e->Iex.Binop.arg2);
3120       addInstr(env, X86Instr_Push(X86RMI_Reg(rHi)));
3121       addInstr(env, X86Instr_Push(X86RMI_Reg(rLo)));
3122 
3123       /* Set host rounding mode */
3124       set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
3125 
3126       addInstr(env, X86Instr_FpLdStI(
3127                        True/*load*/, 8, dst,
3128                        X86AMode_IR(0, hregX86_ESP())));
3129 
3130       /* Restore default FPU rounding. */
3131       set_FPU_rounding_default( env );
3132 
3133       add_to_esp(env, 8);
3134       return dst;
3135    }
3136 
3137    if (e->tag == Iex_Binop) {
3138       X86FpOp fpop = Xfp_INVALID;
3139       switch (e->Iex.Binop.op) {
3140          case Iop_SinF64:  fpop = Xfp_SIN; break;
3141          case Iop_CosF64:  fpop = Xfp_COS; break;
3142          case Iop_TanF64:  fpop = Xfp_TAN; break;
3143          case Iop_2xm1F64: fpop = Xfp_2XM1; break;
3144          case Iop_SqrtF64: fpop = Xfp_SQRT; break;
3145          default: break;
3146       }
3147       if (fpop != Xfp_INVALID) {
3148          HReg res = newVRegF(env);
3149          HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
3150          /* XXXROUNDINGFIXME */
3151          /* set roundingmode here */
3152          /* Note that X86Instr_FpUnary(Xfp_TAN,..) sets the condition
3153             codes.  I don't think that matters, since this insn
3154             selector never generates such an instruction intervening
3155             between an flag-setting instruction and a flag-using
3156             instruction. */
3157          addInstr(env, X86Instr_FpUnary(fpop,src,res));
3158 	 if (fpop != Xfp_SQRT
3159              && fpop != Xfp_NEG && fpop != Xfp_ABS)
3160             roundToF64(env, res);
3161          return res;
3162       }
3163    }
3164 
3165    if (e->tag == Iex_Unop) {
3166       X86FpOp fpop = Xfp_INVALID;
3167       switch (e->Iex.Unop.op) {
3168          case Iop_NegF64:  fpop = Xfp_NEG; break;
3169          case Iop_AbsF64:  fpop = Xfp_ABS; break;
3170          default: break;
3171       }
3172       if (fpop != Xfp_INVALID) {
3173          HReg res = newVRegF(env);
3174          HReg src = iselDblExpr(env, e->Iex.Unop.arg);
3175          addInstr(env, X86Instr_FpUnary(fpop,src,res));
3176          /* No need to do roundToF64(env,res) for Xfp_NEG or Xfp_ABS,
3177             but might need to do that for other unary ops. */
3178          return res;
3179       }
3180    }
3181 
3182    if (e->tag == Iex_Unop) {
3183       switch (e->Iex.Unop.op) {
3184          case Iop_I32StoF64: {
3185             HReg dst = newVRegF(env);
3186             HReg ri  = iselIntExpr_R(env, e->Iex.Unop.arg);
3187             addInstr(env, X86Instr_Push(X86RMI_Reg(ri)));
3188             set_FPU_rounding_default(env);
3189             addInstr(env, X86Instr_FpLdStI(
3190                              True/*load*/, 4, dst,
3191                              X86AMode_IR(0, hregX86_ESP())));
3192 	    add_to_esp(env, 4);
3193             return dst;
3194          }
3195          case Iop_ReinterpI64asF64: {
3196             /* Given an I64, produce an IEEE754 double with the same
3197                bit pattern. */
3198             HReg dst = newVRegF(env);
3199             HReg rHi, rLo;
3200 	    iselInt64Expr( &rHi, &rLo, env, e->Iex.Unop.arg);
3201             /* paranoia */
3202             set_FPU_rounding_default(env);
3203             addInstr(env, X86Instr_Push(X86RMI_Reg(rHi)));
3204             addInstr(env, X86Instr_Push(X86RMI_Reg(rLo)));
3205             addInstr(env, X86Instr_FpLdSt(
3206                              True/*load*/, 8, dst,
3207                              X86AMode_IR(0, hregX86_ESP())));
3208 	    add_to_esp(env, 8);
3209             return dst;
3210 	 }
3211          case Iop_F32toF64: {
3212             /* this is a no-op */
3213             HReg res = iselFltExpr(env, e->Iex.Unop.arg);
3214             return res;
3215 	 }
3216          default:
3217             break;
3218       }
3219    }
3220 
3221    /* --------- MULTIPLEX --------- */
3222    if (e->tag == Iex_ITE) { // VFD
3223      if (ty == Ity_F64
3224          && typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) {
3225         HReg r1  = iselDblExpr(env, e->Iex.ITE.iftrue);
3226         HReg r0  = iselDblExpr(env, e->Iex.ITE.iffalse);
3227         HReg dst = newVRegF(env);
3228         addInstr(env, X86Instr_FpUnary(Xfp_MOV,r1,dst));
3229         X86CondCode cc = iselCondCode(env, e->Iex.ITE.cond);
3230         addInstr(env, X86Instr_FpCMov(cc ^ 1, r0, dst));
3231         return dst;
3232       }
3233    }
3234 
3235    ppIRExpr(e);
3236    vpanic("iselDblExpr_wrk");
3237 }
3238 
3239 
3240 /*---------------------------------------------------------*/
3241 /*--- ISEL: SIMD (Vector) expressions, 128 bit.         ---*/
3242 /*---------------------------------------------------------*/
3243 
iselVecExpr(ISelEnv * env,const IRExpr * e)3244 static HReg iselVecExpr ( ISelEnv* env, const IRExpr* e )
3245 {
3246    HReg r = iselVecExpr_wrk( env, e );
3247 #  if 0
3248    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3249 #  endif
3250    vassert(hregClass(r) == HRcVec128);
3251    vassert(hregIsVirtual(r));
3252    return r;
3253 }
3254 
3255 
3256 /* DO NOT CALL THIS DIRECTLY */
iselVecExpr_wrk(ISelEnv * env,const IRExpr * e)3257 static HReg iselVecExpr_wrk ( ISelEnv* env, const IRExpr* e )
3258 {
3259 
3260 #  define REQUIRE_SSE1                                    \
3261       do { if (env->hwcaps == 0/*baseline, no sse*/       \
3262                ||  env->hwcaps == VEX_HWCAPS_X86_MMXEXT /*Integer SSE*/) \
3263               goto vec_fail;                              \
3264       } while (0)
3265 
3266 #  define REQUIRE_SSE2                                    \
3267       do { if (0 == (env->hwcaps & VEX_HWCAPS_X86_SSE2))  \
3268               goto vec_fail;                              \
3269       } while (0)
3270 
3271 #  define SSE2_OR_ABOVE                                   \
3272        (env->hwcaps & VEX_HWCAPS_X86_SSE2)
3273 
3274    HWord     fn = 0; /* address of helper fn, if required */
3275    MatchInfo mi;
3276    Bool      arg1isEReg = False;
3277    X86SseOp  op = Xsse_INVALID;
3278    IRType    ty = typeOfIRExpr(env->type_env,e);
3279    vassert(e);
3280    vassert(ty == Ity_V128);
3281 
3282    REQUIRE_SSE1;
3283 
3284    if (e->tag == Iex_RdTmp) {
3285       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
3286    }
3287 
3288    if (e->tag == Iex_Get) {
3289       HReg dst = newVRegV(env);
3290       addInstr(env, X86Instr_SseLdSt(
3291                        True/*load*/,
3292                        dst,
3293                        X86AMode_IR(e->Iex.Get.offset, hregX86_EBP())
3294                     )
3295               );
3296       return dst;
3297    }
3298 
3299    if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
3300       HReg      dst = newVRegV(env);
3301       X86AMode* am  = iselIntExpr_AMode(env, e->Iex.Load.addr);
3302       addInstr(env, X86Instr_SseLdSt( True/*load*/, dst, am ));
3303       return dst;
3304    }
3305 
3306    if (e->tag == Iex_Const) {
3307       HReg dst = newVRegV(env);
3308       vassert(e->Iex.Const.con->tag == Ico_V128);
3309       addInstr(env, X86Instr_SseConst(e->Iex.Const.con->Ico.V128, dst));
3310       return dst;
3311    }
3312 
3313    if (e->tag == Iex_Unop) {
3314 
3315    if (SSE2_OR_ABOVE) {
3316       /* 64UtoV128(LDle:I64(addr)) */
3317       DECLARE_PATTERN(p_zwiden_load64);
3318       DEFINE_PATTERN(p_zwiden_load64,
3319                      unop(Iop_64UtoV128,
3320                           IRExpr_Load(Iend_LE,Ity_I64,bind(0))));
3321       if (matchIRExpr(&mi, p_zwiden_load64, e)) {
3322          X86AMode* am = iselIntExpr_AMode(env, mi.bindee[0]);
3323          HReg dst = newVRegV(env);
3324          addInstr(env, X86Instr_SseLdzLO(8, dst, am));
3325          return dst;
3326       }
3327    }
3328 
3329    switch (e->Iex.Unop.op) {
3330 
3331       case Iop_NotV128: {
3332          HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3333          return do_sse_Not128(env, arg);
3334       }
3335 
3336       case Iop_CmpNEZ64x2: {
3337          /* We can use SSE2 instructions for this. */
3338          /* Ideally, we want to do a 64Ix2 comparison against zero of
3339             the operand.  Problem is no such insn exists.  Solution
3340             therefore is to do a 32Ix4 comparison instead, and bitwise-
3341             negate (NOT) the result.  Let a,b,c,d be 32-bit lanes, and
3342             let the not'd result of this initial comparison be a:b:c:d.
3343             What we need to compute is (a|b):(a|b):(c|d):(c|d).  So, use
3344             pshufd to create a value b:a:d:c, and OR that with a:b:c:d,
3345             giving the required result.
3346 
3347             The required selection sequence is 2,3,0,1, which
3348             according to Intel's documentation means the pshufd
3349             literal value is 0xB1, that is,
3350             (2 << 6) | (3 << 4) | (0 << 2) | (1 << 0)
3351          */
3352          HReg arg  = iselVecExpr(env, e->Iex.Unop.arg);
3353          HReg tmp  = newVRegV(env);
3354          HReg dst  = newVRegV(env);
3355          REQUIRE_SSE2;
3356          addInstr(env, X86Instr_SseReRg(Xsse_XOR, tmp, tmp));
3357          addInstr(env, X86Instr_SseReRg(Xsse_CMPEQ32, arg, tmp));
3358          tmp = do_sse_Not128(env, tmp);
3359          addInstr(env, X86Instr_SseShuf(0xB1, tmp, dst));
3360          addInstr(env, X86Instr_SseReRg(Xsse_OR, tmp, dst));
3361          return dst;
3362       }
3363 
3364       case Iop_CmpNEZ32x4: {
3365          /* Sigh, we have to generate lousy code since this has to
3366             work on SSE1 hosts */
3367          /* basically, the idea is: for each lane:
3368                movl lane, %r ; negl %r   (now CF = lane==0 ? 0 : 1)
3369                sbbl %r, %r               (now %r = 1Sto32(CF))
3370                movl %r, lane
3371          */
3372          Int       i;
3373          X86AMode* am;
3374          X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3375          HReg      arg  = iselVecExpr(env, e->Iex.Unop.arg);
3376          HReg      dst  = newVRegV(env);
3377          HReg      r32  = newVRegI(env);
3378          sub_from_esp(env, 16);
3379          addInstr(env, X86Instr_SseLdSt(False/*store*/, arg, esp0));
3380          for (i = 0; i < 4; i++) {
3381             am = X86AMode_IR(i*4, hregX86_ESP());
3382             addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(am), r32));
3383             addInstr(env, X86Instr_Unary32(Xun_NEG, r32));
3384             addInstr(env, X86Instr_Alu32R(Xalu_SBB, X86RMI_Reg(r32), r32));
3385             addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r32), am));
3386          }
3387          addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0));
3388          add_to_esp(env, 16);
3389          return dst;
3390       }
3391 
3392       case Iop_CmpNEZ8x16:
3393       case Iop_CmpNEZ16x8: {
3394          /* We can use SSE2 instructions for this. */
3395          HReg arg;
3396          HReg vec0 = newVRegV(env);
3397          HReg vec1 = newVRegV(env);
3398          HReg dst  = newVRegV(env);
3399          X86SseOp cmpOp
3400             = e->Iex.Unop.op==Iop_CmpNEZ16x8 ? Xsse_CMPEQ16
3401                                              : Xsse_CMPEQ8;
3402          REQUIRE_SSE2;
3403          addInstr(env, X86Instr_SseReRg(Xsse_XOR, vec0, vec0));
3404          addInstr(env, mk_vMOVsd_RR(vec0, vec1));
3405          addInstr(env, X86Instr_Sse32Fx4(Xsse_CMPEQF, vec1, vec1));
3406          /* defer arg computation to here so as to give CMPEQF as long
3407             as possible to complete */
3408          arg = iselVecExpr(env, e->Iex.Unop.arg);
3409          /* vec0 is all 0s; vec1 is all 1s */
3410          addInstr(env, mk_vMOVsd_RR(arg, dst));
3411          /* 16x8 or 8x16 comparison == */
3412          addInstr(env, X86Instr_SseReRg(cmpOp, vec0, dst));
3413          /* invert result */
3414          addInstr(env, X86Instr_SseReRg(Xsse_XOR, vec1, dst));
3415          return dst;
3416       }
3417 
3418       case Iop_RecipEst32Fx4: op = Xsse_RCPF;   goto do_32Fx4_unary;
3419       case Iop_RSqrtEst32Fx4: op = Xsse_RSQRTF; goto do_32Fx4_unary;
3420       do_32Fx4_unary:
3421       {
3422          HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3423          HReg dst = newVRegV(env);
3424          addInstr(env, X86Instr_Sse32Fx4(op, arg, dst));
3425          return dst;
3426       }
3427 
3428       case Iop_RecipEst32F0x4: op = Xsse_RCPF;   goto do_32F0x4_unary;
3429       case Iop_RSqrtEst32F0x4: op = Xsse_RSQRTF; goto do_32F0x4_unary;
3430       case Iop_Sqrt32F0x4:     op = Xsse_SQRTF;  goto do_32F0x4_unary;
3431       do_32F0x4_unary:
3432       {
3433          /* A bit subtle.  We have to copy the arg to the result
3434             register first, because actually doing the SSE scalar insn
3435             leaves the upper 3/4 of the destination register
3436             unchanged.  Whereas the required semantics of these
3437             primops is that the upper 3/4 is simply copied in from the
3438             argument. */
3439          HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3440          HReg dst = newVRegV(env);
3441          addInstr(env, mk_vMOVsd_RR(arg, dst));
3442          addInstr(env, X86Instr_Sse32FLo(op, arg, dst));
3443          return dst;
3444       }
3445 
3446       case Iop_Sqrt64F0x2:  op = Xsse_SQRTF;  goto do_64F0x2_unary;
3447       do_64F0x2_unary:
3448       {
3449          /* A bit subtle.  We have to copy the arg to the result
3450             register first, because actually doing the SSE scalar insn
3451             leaves the upper half of the destination register
3452             unchanged.  Whereas the required semantics of these
3453             primops is that the upper half is simply copied in from the
3454             argument. */
3455          HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3456          HReg dst = newVRegV(env);
3457          REQUIRE_SSE2;
3458          addInstr(env, mk_vMOVsd_RR(arg, dst));
3459          addInstr(env, X86Instr_Sse64FLo(op, arg, dst));
3460          return dst;
3461       }
3462 
3463       case Iop_32UtoV128: {
3464          HReg      dst  = newVRegV(env);
3465          X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3466          X86RMI*   rmi  = iselIntExpr_RMI(env, e->Iex.Unop.arg);
3467          addInstr(env, X86Instr_Push(rmi));
3468 	 addInstr(env, X86Instr_SseLdzLO(4, dst, esp0));
3469          add_to_esp(env, 4);
3470          return dst;
3471       }
3472 
3473       case Iop_64UtoV128: {
3474          HReg      rHi, rLo;
3475          HReg      dst  = newVRegV(env);
3476          X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3477          iselInt64Expr(&rHi, &rLo, env, e->Iex.Unop.arg);
3478          addInstr(env, X86Instr_Push(X86RMI_Reg(rHi)));
3479          addInstr(env, X86Instr_Push(X86RMI_Reg(rLo)));
3480 	 addInstr(env, X86Instr_SseLdzLO(8, dst, esp0));
3481          add_to_esp(env, 8);
3482          return dst;
3483       }
3484 
3485       default:
3486          break;
3487    } /* switch (e->Iex.Unop.op) */
3488    } /* if (e->tag == Iex_Unop) */
3489 
3490    if (e->tag == Iex_Binop) {
3491    switch (e->Iex.Binop.op) {
3492 
3493       case Iop_Sqrt64Fx2:
3494          REQUIRE_SSE2;
3495          /* fallthrough */
3496       case Iop_Sqrt32Fx4: {
3497          /* :: (rmode, vec) -> vec */
3498          HReg arg = iselVecExpr(env, e->Iex.Binop.arg2);
3499          HReg dst = newVRegV(env);
3500          /* XXXROUNDINGFIXME */
3501          /* set roundingmode here */
3502          addInstr(env, (e->Iex.Binop.op == Iop_Sqrt64Fx2
3503                            ? X86Instr_Sse64Fx2 : X86Instr_Sse32Fx4)
3504                        (Xsse_SQRTF, arg, dst));
3505          return dst;
3506       }
3507 
3508       case Iop_SetV128lo32: {
3509          HReg dst = newVRegV(env);
3510          HReg srcV = iselVecExpr(env, e->Iex.Binop.arg1);
3511          HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2);
3512          X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3513          sub_from_esp(env, 16);
3514          addInstr(env, X86Instr_SseLdSt(False/*store*/, srcV, esp0));
3515          addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(srcI), esp0));
3516          addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0));
3517          add_to_esp(env, 16);
3518          return dst;
3519       }
3520 
3521       case Iop_SetV128lo64: {
3522          HReg dst = newVRegV(env);
3523          HReg srcV = iselVecExpr(env, e->Iex.Binop.arg1);
3524          HReg srcIhi, srcIlo;
3525          X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3526          X86AMode* esp4 = advance4(esp0);
3527          iselInt64Expr(&srcIhi, &srcIlo, env, e->Iex.Binop.arg2);
3528          sub_from_esp(env, 16);
3529          addInstr(env, X86Instr_SseLdSt(False/*store*/, srcV, esp0));
3530          addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(srcIlo), esp0));
3531          addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(srcIhi), esp4));
3532          addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0));
3533          add_to_esp(env, 16);
3534          return dst;
3535       }
3536 
3537       case Iop_64HLtoV128: {
3538          HReg r3, r2, r1, r0;
3539          X86AMode* esp0  = X86AMode_IR(0, hregX86_ESP());
3540          X86AMode* esp4  = advance4(esp0);
3541          X86AMode* esp8  = advance4(esp4);
3542          X86AMode* esp12 = advance4(esp8);
3543          HReg dst = newVRegV(env);
3544 	 /* do this via the stack (easy, convenient, etc) */
3545          sub_from_esp(env, 16);
3546          /* Do the less significant 64 bits */
3547          iselInt64Expr(&r1, &r0, env, e->Iex.Binop.arg2);
3548          addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r0), esp0));
3549          addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r1), esp4));
3550          /* Do the more significant 64 bits */
3551          iselInt64Expr(&r3, &r2, env, e->Iex.Binop.arg1);
3552          addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r2), esp8));
3553          addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r3), esp12));
3554 	 /* Fetch result back from stack. */
3555          addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0));
3556          add_to_esp(env, 16);
3557          return dst;
3558       }
3559 
3560       case Iop_CmpEQ32Fx4: op = Xsse_CMPEQF; goto do_32Fx4;
3561       case Iop_CmpLT32Fx4: op = Xsse_CMPLTF; goto do_32Fx4;
3562       case Iop_CmpLE32Fx4: op = Xsse_CMPLEF; goto do_32Fx4;
3563       case Iop_CmpUN32Fx4: op = Xsse_CMPUNF; goto do_32Fx4;
3564       case Iop_Max32Fx4:   op = Xsse_MAXF;   goto do_32Fx4;
3565       case Iop_Min32Fx4:   op = Xsse_MINF;   goto do_32Fx4;
3566       do_32Fx4:
3567       {
3568          HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3569          HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3570          HReg dst = newVRegV(env);
3571          addInstr(env, mk_vMOVsd_RR(argL, dst));
3572          addInstr(env, X86Instr_Sse32Fx4(op, argR, dst));
3573          return dst;
3574       }
3575 
3576       case Iop_CmpEQ64Fx2: op = Xsse_CMPEQF; goto do_64Fx2;
3577       case Iop_CmpLT64Fx2: op = Xsse_CMPLTF; goto do_64Fx2;
3578       case Iop_CmpLE64Fx2: op = Xsse_CMPLEF; goto do_64Fx2;
3579       case Iop_CmpUN64Fx2: op = Xsse_CMPUNF; goto do_64Fx2;
3580       case Iop_Max64Fx2:   op = Xsse_MAXF;   goto do_64Fx2;
3581       case Iop_Min64Fx2:   op = Xsse_MINF;   goto do_64Fx2;
3582       do_64Fx2:
3583       {
3584          HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3585          HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3586          HReg dst = newVRegV(env);
3587          REQUIRE_SSE2;
3588          addInstr(env, mk_vMOVsd_RR(argL, dst));
3589          addInstr(env, X86Instr_Sse64Fx2(op, argR, dst));
3590          return dst;
3591       }
3592 
3593       case Iop_CmpEQ32F0x4: op = Xsse_CMPEQF; goto do_32F0x4;
3594       case Iop_CmpLT32F0x4: op = Xsse_CMPLTF; goto do_32F0x4;
3595       case Iop_CmpLE32F0x4: op = Xsse_CMPLEF; goto do_32F0x4;
3596       case Iop_CmpUN32F0x4: op = Xsse_CMPUNF; goto do_32F0x4;
3597       case Iop_Add32F0x4:   op = Xsse_ADDF;   goto do_32F0x4;
3598       case Iop_Div32F0x4:   op = Xsse_DIVF;   goto do_32F0x4;
3599       case Iop_Max32F0x4:   op = Xsse_MAXF;   goto do_32F0x4;
3600       case Iop_Min32F0x4:   op = Xsse_MINF;   goto do_32F0x4;
3601       case Iop_Mul32F0x4:   op = Xsse_MULF;   goto do_32F0x4;
3602       case Iop_Sub32F0x4:   op = Xsse_SUBF;   goto do_32F0x4;
3603       do_32F0x4: {
3604          HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3605          HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3606          HReg dst = newVRegV(env);
3607          addInstr(env, mk_vMOVsd_RR(argL, dst));
3608          addInstr(env, X86Instr_Sse32FLo(op, argR, dst));
3609          return dst;
3610       }
3611 
3612       case Iop_CmpEQ64F0x2: op = Xsse_CMPEQF; goto do_64F0x2;
3613       case Iop_CmpLT64F0x2: op = Xsse_CMPLTF; goto do_64F0x2;
3614       case Iop_CmpLE64F0x2: op = Xsse_CMPLEF; goto do_64F0x2;
3615       case Iop_CmpUN64F0x2: op = Xsse_CMPUNF; goto do_64F0x2;
3616       case Iop_Add64F0x2:   op = Xsse_ADDF;   goto do_64F0x2;
3617       case Iop_Div64F0x2:   op = Xsse_DIVF;   goto do_64F0x2;
3618       case Iop_Max64F0x2:   op = Xsse_MAXF;   goto do_64F0x2;
3619       case Iop_Min64F0x2:   op = Xsse_MINF;   goto do_64F0x2;
3620       case Iop_Mul64F0x2:   op = Xsse_MULF;   goto do_64F0x2;
3621       case Iop_Sub64F0x2:   op = Xsse_SUBF;   goto do_64F0x2;
3622       do_64F0x2: {
3623          HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3624          HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3625          HReg dst = newVRegV(env);
3626          REQUIRE_SSE2;
3627          addInstr(env, mk_vMOVsd_RR(argL, dst));
3628          addInstr(env, X86Instr_Sse64FLo(op, argR, dst));
3629          return dst;
3630       }
3631 
3632       case Iop_QNarrowBin32Sto16Sx8:
3633          op = Xsse_PACKSSD; arg1isEReg = True; goto do_SseReRg;
3634       case Iop_QNarrowBin16Sto8Sx16:
3635          op = Xsse_PACKSSW; arg1isEReg = True; goto do_SseReRg;
3636       case Iop_QNarrowBin16Sto8Ux16:
3637          op = Xsse_PACKUSW; arg1isEReg = True; goto do_SseReRg;
3638 
3639       case Iop_InterleaveHI8x16:
3640          op = Xsse_UNPCKHB; arg1isEReg = True; goto do_SseReRg;
3641       case Iop_InterleaveHI16x8:
3642          op = Xsse_UNPCKHW; arg1isEReg = True; goto do_SseReRg;
3643       case Iop_InterleaveHI32x4:
3644          op = Xsse_UNPCKHD; arg1isEReg = True; goto do_SseReRg;
3645       case Iop_InterleaveHI64x2:
3646          op = Xsse_UNPCKHQ; arg1isEReg = True; goto do_SseReRg;
3647 
3648       case Iop_InterleaveLO8x16:
3649          op = Xsse_UNPCKLB; arg1isEReg = True; goto do_SseReRg;
3650       case Iop_InterleaveLO16x8:
3651          op = Xsse_UNPCKLW; arg1isEReg = True; goto do_SseReRg;
3652       case Iop_InterleaveLO32x4:
3653          op = Xsse_UNPCKLD; arg1isEReg = True; goto do_SseReRg;
3654       case Iop_InterleaveLO64x2:
3655          op = Xsse_UNPCKLQ; arg1isEReg = True; goto do_SseReRg;
3656 
3657       case Iop_AndV128:    op = Xsse_AND;      goto do_SseReRg;
3658       case Iop_OrV128:     op = Xsse_OR;       goto do_SseReRg;
3659       case Iop_XorV128:    op = Xsse_XOR;      goto do_SseReRg;
3660       case Iop_Add8x16:    op = Xsse_ADD8;     goto do_SseReRg;
3661       case Iop_Add16x8:    op = Xsse_ADD16;    goto do_SseReRg;
3662       case Iop_Add32x4:    op = Xsse_ADD32;    goto do_SseReRg;
3663       case Iop_Add64x2:    op = Xsse_ADD64;    goto do_SseReRg;
3664       case Iop_QAdd8Sx16:  op = Xsse_QADD8S;   goto do_SseReRg;
3665       case Iop_QAdd16Sx8:  op = Xsse_QADD16S;  goto do_SseReRg;
3666       case Iop_QAdd8Ux16:  op = Xsse_QADD8U;   goto do_SseReRg;
3667       case Iop_QAdd16Ux8:  op = Xsse_QADD16U;  goto do_SseReRg;
3668       case Iop_Avg8Ux16:   op = Xsse_AVG8U;    goto do_SseReRg;
3669       case Iop_Avg16Ux8:   op = Xsse_AVG16U;   goto do_SseReRg;
3670       case Iop_CmpEQ8x16:  op = Xsse_CMPEQ8;   goto do_SseReRg;
3671       case Iop_CmpEQ16x8:  op = Xsse_CMPEQ16;  goto do_SseReRg;
3672       case Iop_CmpEQ32x4:  op = Xsse_CMPEQ32;  goto do_SseReRg;
3673       case Iop_CmpGT8Sx16: op = Xsse_CMPGT8S;  goto do_SseReRg;
3674       case Iop_CmpGT16Sx8: op = Xsse_CMPGT16S; goto do_SseReRg;
3675       case Iop_CmpGT32Sx4: op = Xsse_CMPGT32S; goto do_SseReRg;
3676       case Iop_Max16Sx8:   op = Xsse_MAX16S;   goto do_SseReRg;
3677       case Iop_Max8Ux16:   op = Xsse_MAX8U;    goto do_SseReRg;
3678       case Iop_Min16Sx8:   op = Xsse_MIN16S;   goto do_SseReRg;
3679       case Iop_Min8Ux16:   op = Xsse_MIN8U;    goto do_SseReRg;
3680       case Iop_MulHi16Ux8: op = Xsse_MULHI16U; goto do_SseReRg;
3681       case Iop_MulHi16Sx8: op = Xsse_MULHI16S; goto do_SseReRg;
3682       case Iop_Mul16x8:    op = Xsse_MUL16;    goto do_SseReRg;
3683       case Iop_Sub8x16:    op = Xsse_SUB8;     goto do_SseReRg;
3684       case Iop_Sub16x8:    op = Xsse_SUB16;    goto do_SseReRg;
3685       case Iop_Sub32x4:    op = Xsse_SUB32;    goto do_SseReRg;
3686       case Iop_Sub64x2:    op = Xsse_SUB64;    goto do_SseReRg;
3687       case Iop_QSub8Sx16:  op = Xsse_QSUB8S;   goto do_SseReRg;
3688       case Iop_QSub16Sx8:  op = Xsse_QSUB16S;  goto do_SseReRg;
3689       case Iop_QSub8Ux16:  op = Xsse_QSUB8U;   goto do_SseReRg;
3690       case Iop_QSub16Ux8:  op = Xsse_QSUB16U;  goto do_SseReRg;
3691       do_SseReRg: {
3692          HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1);
3693          HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2);
3694          HReg dst = newVRegV(env);
3695          if (op != Xsse_OR && op != Xsse_AND && op != Xsse_XOR)
3696             REQUIRE_SSE2;
3697          if (arg1isEReg) {
3698             addInstr(env, mk_vMOVsd_RR(arg2, dst));
3699             addInstr(env, X86Instr_SseReRg(op, arg1, dst));
3700          } else {
3701             addInstr(env, mk_vMOVsd_RR(arg1, dst));
3702             addInstr(env, X86Instr_SseReRg(op, arg2, dst));
3703          }
3704          return dst;
3705       }
3706 
3707       case Iop_ShlN16x8: op = Xsse_SHL16; goto do_SseShift;
3708       case Iop_ShlN32x4: op = Xsse_SHL32; goto do_SseShift;
3709       case Iop_ShlN64x2: op = Xsse_SHL64; goto do_SseShift;
3710       case Iop_SarN16x8: op = Xsse_SAR16; goto do_SseShift;
3711       case Iop_SarN32x4: op = Xsse_SAR32; goto do_SseShift;
3712       case Iop_ShrN16x8: op = Xsse_SHR16; goto do_SseShift;
3713       case Iop_ShrN32x4: op = Xsse_SHR32; goto do_SseShift;
3714       case Iop_ShrN64x2: op = Xsse_SHR64; goto do_SseShift;
3715       do_SseShift: {
3716          HReg      greg = iselVecExpr(env, e->Iex.Binop.arg1);
3717          X86RMI*   rmi  = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
3718          X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3719          HReg      ereg = newVRegV(env);
3720          HReg      dst  = newVRegV(env);
3721          REQUIRE_SSE2;
3722          addInstr(env, X86Instr_Push(X86RMI_Imm(0)));
3723          addInstr(env, X86Instr_Push(X86RMI_Imm(0)));
3724          addInstr(env, X86Instr_Push(X86RMI_Imm(0)));
3725          addInstr(env, X86Instr_Push(rmi));
3726          addInstr(env, X86Instr_SseLdSt(True/*load*/, ereg, esp0));
3727 	 addInstr(env, mk_vMOVsd_RR(greg, dst));
3728          addInstr(env, X86Instr_SseReRg(op, ereg, dst));
3729          add_to_esp(env, 16);
3730          return dst;
3731       }
3732 
3733       case Iop_NarrowBin32to16x8:
3734          fn = (HWord)h_generic_calc_NarrowBin32to16x8;
3735          goto do_SseAssistedBinary;
3736       case Iop_NarrowBin16to8x16:
3737          fn = (HWord)h_generic_calc_NarrowBin16to8x16;
3738          goto do_SseAssistedBinary;
3739       do_SseAssistedBinary: {
3740          /* As with the amd64 case (where this is copied from) we
3741             generate pretty bad code. */
3742          vassert(fn != 0);
3743          HReg dst = newVRegV(env);
3744          HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3745          HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3746          HReg argp = newVRegI(env);
3747          /* subl $112, %esp         -- make a space */
3748          sub_from_esp(env, 112);
3749          /* leal 48(%esp), %r_argp  -- point into it */
3750          addInstr(env, X86Instr_Lea32(X86AMode_IR(48, hregX86_ESP()),
3751                                       argp));
3752          /* andl $-16, %r_argp      -- 16-align the pointer */
3753          addInstr(env, X86Instr_Alu32R(Xalu_AND,
3754                                        X86RMI_Imm( ~(UInt)15 ),
3755                                        argp));
3756          /* Prepare 3 arg regs:
3757             leal  0(%r_argp), %eax
3758             leal 16(%r_argp), %edx
3759             leal 32(%r_argp), %ecx
3760          */
3761          addInstr(env, X86Instr_Lea32(X86AMode_IR(0, argp),
3762                                       hregX86_EAX()));
3763          addInstr(env, X86Instr_Lea32(X86AMode_IR(16, argp),
3764                                       hregX86_EDX()));
3765          addInstr(env, X86Instr_Lea32(X86AMode_IR(32, argp),
3766                                       hregX86_ECX()));
3767          /* Store the two args, at (%edx) and (%ecx):
3768             movupd  %argL, 0(%edx)
3769             movupd  %argR, 0(%ecx)
3770          */
3771          addInstr(env, X86Instr_SseLdSt(False/*!isLoad*/, argL,
3772                                         X86AMode_IR(0, hregX86_EDX())));
3773          addInstr(env, X86Instr_SseLdSt(False/*!isLoad*/, argR,
3774                                         X86AMode_IR(0, hregX86_ECX())));
3775          /* call the helper */
3776          addInstr(env, X86Instr_Call( Xcc_ALWAYS, (Addr32)fn,
3777                                       3, mk_RetLoc_simple(RLPri_None) ));
3778          /* fetch the result from memory, using %r_argp, which the
3779             register allocator will keep alive across the call. */
3780          addInstr(env, X86Instr_SseLdSt(True/*isLoad*/, dst,
3781                                         X86AMode_IR(0, argp)));
3782          /* and finally, clear the space */
3783          add_to_esp(env, 112);
3784          return dst;
3785       }
3786 
3787       default:
3788          break;
3789    } /* switch (e->Iex.Binop.op) */
3790    } /* if (e->tag == Iex_Binop) */
3791 
3792 
3793    if (e->tag == Iex_Triop) {
3794    IRTriop *triop = e->Iex.Triop.details;
3795    switch (triop->op) {
3796 
3797       case Iop_Add32Fx4: op = Xsse_ADDF; goto do_32Fx4_w_rm;
3798       case Iop_Sub32Fx4: op = Xsse_SUBF; goto do_32Fx4_w_rm;
3799       case Iop_Mul32Fx4: op = Xsse_MULF; goto do_32Fx4_w_rm;
3800       case Iop_Div32Fx4: op = Xsse_DIVF; goto do_32Fx4_w_rm;
3801       do_32Fx4_w_rm:
3802       {
3803          HReg argL = iselVecExpr(env, triop->arg2);
3804          HReg argR = iselVecExpr(env, triop->arg3);
3805          HReg dst = newVRegV(env);
3806          addInstr(env, mk_vMOVsd_RR(argL, dst));
3807          /* XXXROUNDINGFIXME */
3808          /* set roundingmode here */
3809          addInstr(env, X86Instr_Sse32Fx4(op, argR, dst));
3810          return dst;
3811       }
3812 
3813       case Iop_Add64Fx2: op = Xsse_ADDF; goto do_64Fx2_w_rm;
3814       case Iop_Sub64Fx2: op = Xsse_SUBF; goto do_64Fx2_w_rm;
3815       case Iop_Mul64Fx2: op = Xsse_MULF; goto do_64Fx2_w_rm;
3816       case Iop_Div64Fx2: op = Xsse_DIVF; goto do_64Fx2_w_rm;
3817       do_64Fx2_w_rm:
3818       {
3819          HReg argL = iselVecExpr(env, triop->arg2);
3820          HReg argR = iselVecExpr(env, triop->arg3);
3821          HReg dst = newVRegV(env);
3822          REQUIRE_SSE2;
3823          addInstr(env, mk_vMOVsd_RR(argL, dst));
3824          /* XXXROUNDINGFIXME */
3825          /* set roundingmode here */
3826          addInstr(env, X86Instr_Sse64Fx2(op, argR, dst));
3827          return dst;
3828       }
3829 
3830       default:
3831          break;
3832    } /* switch (triop->op) */
3833    } /* if (e->tag == Iex_Triop) */
3834 
3835 
3836    if (e->tag == Iex_ITE) { // VFD
3837       HReg r1  = iselVecExpr(env, e->Iex.ITE.iftrue);
3838       HReg r0  = iselVecExpr(env, e->Iex.ITE.iffalse);
3839       HReg dst = newVRegV(env);
3840       addInstr(env, mk_vMOVsd_RR(r1,dst));
3841       X86CondCode cc = iselCondCode(env, e->Iex.ITE.cond);
3842       addInstr(env, X86Instr_SseCMov(cc ^ 1, r0, dst));
3843       return dst;
3844    }
3845 
3846    vec_fail:
3847    vex_printf("iselVecExpr (hwcaps = %s): can't reduce\n",
3848               LibVEX_ppVexHwCaps(VexArchX86,env->hwcaps));
3849    ppIRExpr(e);
3850    vpanic("iselVecExpr_wrk");
3851 
3852 #  undef REQUIRE_SSE1
3853 #  undef REQUIRE_SSE2
3854 #  undef SSE2_OR_ABOVE
3855 }
3856 
3857 
3858 /*---------------------------------------------------------*/
3859 /*--- ISEL: Statements                                  ---*/
3860 /*---------------------------------------------------------*/
3861 
iselStmt(ISelEnv * env,IRStmt * stmt)3862 static void iselStmt ( ISelEnv* env, IRStmt* stmt )
3863 {
3864    if (vex_traceflags & VEX_TRACE_VCODE) {
3865       vex_printf("\n-- ");
3866       ppIRStmt(stmt);
3867       vex_printf("\n");
3868    }
3869 
3870    switch (stmt->tag) {
3871 
3872    /* --------- STORE --------- */
3873    case Ist_Store: {
3874       IRType    tya   = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
3875       IRType    tyd   = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
3876       IREndness end   = stmt->Ist.Store.end;
3877 
3878       if (tya != Ity_I32 || end != Iend_LE)
3879          goto stmt_fail;
3880 
3881       if (tyd == Ity_I32) {
3882          X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
3883          X86RI* ri = iselIntExpr_RI(env, stmt->Ist.Store.data);
3884          addInstr(env, X86Instr_Alu32M(Xalu_MOV,ri,am));
3885          return;
3886       }
3887       if (tyd == Ity_I8 || tyd == Ity_I16) {
3888          X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
3889          HReg r = iselIntExpr_R(env, stmt->Ist.Store.data);
3890          addInstr(env, X86Instr_Store( toUChar(tyd==Ity_I8 ? 1 : 2),
3891                                        r,am ));
3892          return;
3893       }
3894       if (tyd == Ity_F64) {
3895          X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
3896          HReg r = iselDblExpr(env, stmt->Ist.Store.data);
3897          addInstr(env, X86Instr_FpLdSt(False/*store*/, 8, r, am));
3898          return;
3899       }
3900       if (tyd == Ity_F32) {
3901          X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
3902          HReg r = iselFltExpr(env, stmt->Ist.Store.data);
3903          addInstr(env, X86Instr_FpLdSt(False/*store*/, 4, r, am));
3904          return;
3905       }
3906       if (tyd == Ity_I64) {
3907          HReg vHi, vLo, rA;
3908          iselInt64Expr(&vHi, &vLo, env, stmt->Ist.Store.data);
3909          rA = iselIntExpr_R(env, stmt->Ist.Store.addr);
3910          addInstr(env, X86Instr_Alu32M(
3911                           Xalu_MOV, X86RI_Reg(vLo), X86AMode_IR(0, rA)));
3912          addInstr(env, X86Instr_Alu32M(
3913                           Xalu_MOV, X86RI_Reg(vHi), X86AMode_IR(4, rA)));
3914          return;
3915       }
3916       if (tyd == Ity_V128) {
3917          X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
3918          HReg r = iselVecExpr(env, stmt->Ist.Store.data);
3919          addInstr(env, X86Instr_SseLdSt(False/*store*/, r, am));
3920          return;
3921       }
3922       break;
3923    }
3924 
3925    /* --------- PUT --------- */
3926    case Ist_Put: {
3927       IRType ty = typeOfIRExpr(env->type_env, stmt->Ist.Put.data);
3928       if (ty == Ity_I32) {
3929          /* We're going to write to memory, so compute the RHS into an
3930             X86RI. */
3931          X86RI* ri = iselIntExpr_RI(env, stmt->Ist.Put.data);
3932          addInstr(env,
3933                   X86Instr_Alu32M(
3934                      Xalu_MOV,
3935                      ri,
3936                      X86AMode_IR(stmt->Ist.Put.offset,hregX86_EBP())
3937                  ));
3938          return;
3939       }
3940       if (ty == Ity_I8 || ty == Ity_I16) {
3941          HReg r = iselIntExpr_R(env, stmt->Ist.Put.data);
3942          addInstr(env, X86Instr_Store(
3943                           toUChar(ty==Ity_I8 ? 1 : 2),
3944                           r,
3945                           X86AMode_IR(stmt->Ist.Put.offset,
3946                                       hregX86_EBP())));
3947          return;
3948       }
3949       if (ty == Ity_I64) {
3950          HReg vHi, vLo;
3951          X86AMode* am  = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP());
3952          X86AMode* am4 = advance4(am);
3953          iselInt64Expr(&vHi, &vLo, env, stmt->Ist.Put.data);
3954          addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(vLo), am ));
3955          addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(vHi), am4 ));
3956          return;
3957       }
3958       if (ty == Ity_V128) {
3959          HReg      vec = iselVecExpr(env, stmt->Ist.Put.data);
3960          X86AMode* am  = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP());
3961          addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, am));
3962          return;
3963       }
3964       if (ty == Ity_F32) {
3965          HReg f32 = iselFltExpr(env, stmt->Ist.Put.data);
3966          X86AMode* am  = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP());
3967          set_FPU_rounding_default(env); /* paranoia */
3968          addInstr(env, X86Instr_FpLdSt( False/*store*/, 4, f32, am ));
3969          return;
3970       }
3971       if (ty == Ity_F64) {
3972          HReg f64 = iselDblExpr(env, stmt->Ist.Put.data);
3973          X86AMode* am  = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP());
3974          set_FPU_rounding_default(env); /* paranoia */
3975          addInstr(env, X86Instr_FpLdSt( False/*store*/, 8, f64, am ));
3976          return;
3977       }
3978       break;
3979    }
3980 
3981    /* --------- Indexed PUT --------- */
3982    case Ist_PutI: {
3983       IRPutI *puti = stmt->Ist.PutI.details;
3984 
3985       X86AMode* am
3986          = genGuestArrayOffset(
3987               env, puti->descr,
3988                    puti->ix, puti->bias );
3989 
3990       IRType ty = typeOfIRExpr(env->type_env, puti->data);
3991       if (ty == Ity_F64) {
3992          HReg val = iselDblExpr(env, puti->data);
3993          addInstr(env, X86Instr_FpLdSt( False/*store*/, 8, val, am ));
3994          return;
3995       }
3996       if (ty == Ity_I8) {
3997          HReg r = iselIntExpr_R(env, puti->data);
3998          addInstr(env, X86Instr_Store( 1, r, am ));
3999          return;
4000       }
4001       if (ty == Ity_I32) {
4002          HReg r = iselIntExpr_R(env, puti->data);
4003          addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(r), am ));
4004          return;
4005       }
4006       if (ty == Ity_I64) {
4007          HReg rHi, rLo;
4008          X86AMode* am4 = advance4(am);
4009          iselInt64Expr(&rHi, &rLo, env, puti->data);
4010          addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(rLo), am ));
4011          addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(rHi), am4 ));
4012          return;
4013       }
4014       break;
4015    }
4016 
4017    /* --------- TMP --------- */
4018    case Ist_WrTmp: {
4019       IRTemp tmp = stmt->Ist.WrTmp.tmp;
4020       IRType ty = typeOfIRTemp(env->type_env, tmp);
4021 
4022       /* optimisation: if stmt->Ist.WrTmp.data is Add32(..,..),
4023          compute it into an AMode and then use LEA.  This usually
4024          produces fewer instructions, often because (for memcheck
4025          created IR) we get t = address-expression, (t is later used
4026          twice) and so doing this naturally turns address-expression
4027          back into an X86 amode. */
4028       if (ty == Ity_I32
4029           && stmt->Ist.WrTmp.data->tag == Iex_Binop
4030           && stmt->Ist.WrTmp.data->Iex.Binop.op == Iop_Add32) {
4031          X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.WrTmp.data);
4032          HReg dst = lookupIRTemp(env, tmp);
4033          if (am->tag == Xam_IR && am->Xam.IR.imm == 0) {
4034             /* Hmm, iselIntExpr_AMode wimped out and just computed the
4035                value into a register.  Just emit a normal reg-reg move
4036                so reg-alloc can coalesce it away in the usual way. */
4037             HReg src = am->Xam.IR.reg;
4038             addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Reg(src), dst));
4039          } else {
4040             addInstr(env, X86Instr_Lea32(am,dst));
4041          }
4042          return;
4043       }
4044 
4045       if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
4046          X86RMI* rmi = iselIntExpr_RMI(env, stmt->Ist.WrTmp.data);
4047          HReg dst = lookupIRTemp(env, tmp);
4048          addInstr(env, X86Instr_Alu32R(Xalu_MOV,rmi,dst));
4049          return;
4050       }
4051       if (ty == Ity_I64) {
4052          HReg rHi, rLo, dstHi, dstLo;
4053          iselInt64Expr(&rHi,&rLo, env, stmt->Ist.WrTmp.data);
4054          lookupIRTemp64( &dstHi, &dstLo, env, tmp);
4055          addInstr(env, mk_iMOVsd_RR(rHi,dstHi) );
4056          addInstr(env, mk_iMOVsd_RR(rLo,dstLo) );
4057          return;
4058       }
4059       if (ty == Ity_I1) {
4060          X86CondCode cond = iselCondCode(env, stmt->Ist.WrTmp.data);
4061          HReg dst = lookupIRTemp(env, tmp);
4062          addInstr(env, X86Instr_Set32(cond, dst));
4063          return;
4064       }
4065       if (ty == Ity_F64) {
4066          HReg dst = lookupIRTemp(env, tmp);
4067          HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data);
4068          addInstr(env, X86Instr_FpUnary(Xfp_MOV,src,dst));
4069          return;
4070       }
4071       if (ty == Ity_F32) {
4072          HReg dst = lookupIRTemp(env, tmp);
4073          HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data);
4074          addInstr(env, X86Instr_FpUnary(Xfp_MOV,src,dst));
4075          return;
4076       }
4077       if (ty == Ity_V128) {
4078          HReg dst = lookupIRTemp(env, tmp);
4079          HReg src = iselVecExpr(env, stmt->Ist.WrTmp.data);
4080          addInstr(env, mk_vMOVsd_RR(src,dst));
4081          return;
4082       }
4083       break;
4084    }
4085 
4086    /* --------- Call to DIRTY helper --------- */
4087    case Ist_Dirty: {
4088       IRDirty* d = stmt->Ist.Dirty.details;
4089 
4090       /* Figure out the return type, if any. */
4091       IRType retty = Ity_INVALID;
4092       if (d->tmp != IRTemp_INVALID)
4093          retty = typeOfIRTemp(env->type_env, d->tmp);
4094 
4095       Bool retty_ok = False;
4096       switch (retty) {
4097          case Ity_INVALID: /* function doesn't return anything */
4098          case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
4099          case Ity_V128:
4100             retty_ok = True; break;
4101          default:
4102             break;
4103       }
4104       if (!retty_ok)
4105          break; /* will go to stmt_fail: */
4106 
4107       /* Marshal args, do the call, and set the return value to
4108          0x555..555 if this is a conditional call that returns a value
4109          and the call is skipped. */
4110       UInt   addToSp = 0;
4111       RetLoc rloc    = mk_RetLoc_INVALID();
4112       doHelperCall( &addToSp, &rloc, env, d->guard, d->cee, retty, d->args );
4113       vassert(is_sane_RetLoc(rloc));
4114 
4115       /* Now figure out what to do with the returned value, if any. */
4116       switch (retty) {
4117          case Ity_INVALID: {
4118             /* No return value.  Nothing to do. */
4119             vassert(d->tmp == IRTemp_INVALID);
4120             vassert(rloc.pri == RLPri_None);
4121             vassert(addToSp == 0);
4122             return;
4123          }
4124          case Ity_I32: case Ity_I16: case Ity_I8: {
4125             /* The returned value is in %eax.  Park it in the register
4126                associated with tmp. */
4127             vassert(rloc.pri == RLPri_Int);
4128             vassert(addToSp == 0);
4129             HReg dst = lookupIRTemp(env, d->tmp);
4130             addInstr(env, mk_iMOVsd_RR(hregX86_EAX(),dst) );
4131             return;
4132          }
4133          case Ity_I64: {
4134             /* The returned value is in %edx:%eax.  Park it in the
4135                register-pair associated with tmp. */
4136             vassert(rloc.pri == RLPri_2Int);
4137             vassert(addToSp == 0);
4138             HReg dstHi, dstLo;
4139             lookupIRTemp64( &dstHi, &dstLo, env, d->tmp);
4140             addInstr(env, mk_iMOVsd_RR(hregX86_EDX(),dstHi) );
4141             addInstr(env, mk_iMOVsd_RR(hregX86_EAX(),dstLo) );
4142             return;
4143          }
4144          case Ity_V128: {
4145             /* The returned value is on the stack, and *retloc tells
4146                us where.  Fish it off the stack and then move the
4147                stack pointer upwards to clear it, as directed by
4148                doHelperCall. */
4149             vassert(rloc.pri == RLPri_V128SpRel);
4150             vassert(addToSp >= 16);
4151             HReg      dst = lookupIRTemp(env, d->tmp);
4152             X86AMode* am  = X86AMode_IR(rloc.spOff, hregX86_ESP());
4153             addInstr(env, X86Instr_SseLdSt( True/*load*/, dst, am ));
4154             add_to_esp(env, addToSp);
4155             return;
4156          }
4157          default:
4158             /*NOTREACHED*/
4159             vassert(0);
4160       }
4161       break;
4162    }
4163 
4164    /* --------- MEM FENCE --------- */
4165    case Ist_MBE:
4166       switch (stmt->Ist.MBE.event) {
4167          case Imbe_Fence:
4168             addInstr(env, X86Instr_MFence(env->hwcaps));
4169             return;
4170          default:
4171             break;
4172       }
4173       break;
4174 
4175    /* --------- ACAS --------- */
4176    case Ist_CAS:
4177       if (stmt->Ist.CAS.details->oldHi == IRTemp_INVALID) {
4178          /* "normal" singleton CAS */
4179          UChar  sz;
4180          IRCAS* cas = stmt->Ist.CAS.details;
4181          IRType ty  = typeOfIRExpr(env->type_env, cas->dataLo);
4182          /* get: cas->expdLo into %eax, and cas->dataLo into %ebx */
4183          X86AMode* am = iselIntExpr_AMode(env, cas->addr);
4184          HReg rDataLo = iselIntExpr_R(env, cas->dataLo);
4185          HReg rExpdLo = iselIntExpr_R(env, cas->expdLo);
4186          HReg rOldLo  = lookupIRTemp(env, cas->oldLo);
4187          vassert(cas->expdHi == NULL);
4188          vassert(cas->dataHi == NULL);
4189          addInstr(env, mk_iMOVsd_RR(rExpdLo, rOldLo));
4190          addInstr(env, mk_iMOVsd_RR(rExpdLo, hregX86_EAX()));
4191          addInstr(env, mk_iMOVsd_RR(rDataLo, hregX86_EBX()));
4192          switch (ty) {
4193             case Ity_I32: sz = 4; break;
4194             case Ity_I16: sz = 2; break;
4195             case Ity_I8:  sz = 1; break;
4196             default: goto unhandled_cas;
4197          }
4198          addInstr(env, X86Instr_ACAS(am, sz));
4199          addInstr(env,
4200                   X86Instr_CMov32(Xcc_NZ,
4201                                   X86RM_Reg(hregX86_EAX()), rOldLo));
4202          return;
4203       } else {
4204          /* double CAS */
4205          IRCAS* cas = stmt->Ist.CAS.details;
4206          IRType ty  = typeOfIRExpr(env->type_env, cas->dataLo);
4207          /* only 32-bit allowed in this case */
4208          /* get: cas->expdLo into %eax, and cas->dataLo into %ebx */
4209          /* get: cas->expdHi into %edx, and cas->dataHi into %ecx */
4210          X86AMode* am = iselIntExpr_AMode(env, cas->addr);
4211          HReg rDataHi = iselIntExpr_R(env, cas->dataHi);
4212          HReg rDataLo = iselIntExpr_R(env, cas->dataLo);
4213          HReg rExpdHi = iselIntExpr_R(env, cas->expdHi);
4214          HReg rExpdLo = iselIntExpr_R(env, cas->expdLo);
4215          HReg rOldHi  = lookupIRTemp(env, cas->oldHi);
4216          HReg rOldLo  = lookupIRTemp(env, cas->oldLo);
4217          if (ty != Ity_I32)
4218             goto unhandled_cas;
4219          addInstr(env, mk_iMOVsd_RR(rExpdHi, rOldHi));
4220          addInstr(env, mk_iMOVsd_RR(rExpdLo, rOldLo));
4221          addInstr(env, mk_iMOVsd_RR(rExpdHi, hregX86_EDX()));
4222          addInstr(env, mk_iMOVsd_RR(rExpdLo, hregX86_EAX()));
4223          addInstr(env, mk_iMOVsd_RR(rDataHi, hregX86_ECX()));
4224          addInstr(env, mk_iMOVsd_RR(rDataLo, hregX86_EBX()));
4225          addInstr(env, X86Instr_DACAS(am));
4226          addInstr(env,
4227                   X86Instr_CMov32(Xcc_NZ,
4228                                   X86RM_Reg(hregX86_EDX()), rOldHi));
4229          addInstr(env,
4230                   X86Instr_CMov32(Xcc_NZ,
4231                                   X86RM_Reg(hregX86_EAX()), rOldLo));
4232          return;
4233       }
4234       unhandled_cas:
4235       break;
4236 
4237    /* --------- INSTR MARK --------- */
4238    /* Doesn't generate any executable code ... */
4239    case Ist_IMark:
4240        return;
4241 
4242    /* --------- NO-OP --------- */
4243    /* Fairly self-explanatory, wouldn't you say? */
4244    case Ist_NoOp:
4245        return;
4246 
4247    /* --------- EXIT --------- */
4248    case Ist_Exit: {
4249       if (stmt->Ist.Exit.dst->tag != Ico_U32)
4250          vpanic("iselStmt(x86): Ist_Exit: dst is not a 32-bit value");
4251 
4252       X86CondCode cc    = iselCondCode(env, stmt->Ist.Exit.guard);
4253       X86AMode*   amEIP = X86AMode_IR(stmt->Ist.Exit.offsIP,
4254                                       hregX86_EBP());
4255 
4256       /* Case: boring transfer to known address */
4257       if (stmt->Ist.Exit.jk == Ijk_Boring) {
4258          if (env->chainingAllowed) {
4259             /* .. almost always true .. */
4260             /* Skip the event check at the dst if this is a forwards
4261                edge. */
4262             Bool toFastEP
4263                = ((Addr32)stmt->Ist.Exit.dst->Ico.U32) > env->max_ga;
4264             if (0) vex_printf("%s", toFastEP ? "Y" : ",");
4265             addInstr(env, X86Instr_XDirect(stmt->Ist.Exit.dst->Ico.U32,
4266                                            amEIP, cc, toFastEP));
4267          } else {
4268             /* .. very occasionally .. */
4269             /* We can't use chaining, so ask for an assisted transfer,
4270                as that's the only alternative that is allowable. */
4271             HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
4272             addInstr(env, X86Instr_XAssisted(r, amEIP, cc, Ijk_Boring));
4273          }
4274          return;
4275       }
4276 
4277       /* Case: assisted transfer to arbitrary address */
4278       switch (stmt->Ist.Exit.jk) {
4279          /* Keep this list in sync with that in iselNext below */
4280          case Ijk_ClientReq:
4281          case Ijk_EmWarn:
4282          case Ijk_MapFail:
4283          case Ijk_NoDecode:
4284          case Ijk_NoRedir:
4285          case Ijk_SigSEGV:
4286          case Ijk_SigTRAP:
4287          case Ijk_Sys_int128:
4288          case Ijk_Sys_int129:
4289          case Ijk_Sys_int130:
4290          case Ijk_Sys_int145:
4291          case Ijk_Sys_int210:
4292          case Ijk_Sys_syscall:
4293          case Ijk_Sys_sysenter:
4294          case Ijk_InvalICache:
4295          case Ijk_Yield:
4296          {
4297             HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
4298             addInstr(env, X86Instr_XAssisted(r, amEIP, cc, stmt->Ist.Exit.jk));
4299             return;
4300          }
4301          default:
4302             break;
4303       }
4304 
4305       /* Do we ever expect to see any other kind? */
4306       goto stmt_fail;
4307    }
4308 
4309    default: break;
4310    }
4311   stmt_fail:
4312    ppIRStmt(stmt);
4313    vpanic("iselStmt");
4314 }
4315 
4316 
4317 /*---------------------------------------------------------*/
4318 /*--- ISEL: Basic block terminators (Nexts)             ---*/
4319 /*---------------------------------------------------------*/
4320 
iselNext(ISelEnv * env,IRExpr * next,IRJumpKind jk,Int offsIP)4321 static void iselNext ( ISelEnv* env,
4322                        IRExpr* next, IRJumpKind jk, Int offsIP )
4323 {
4324    if (vex_traceflags & VEX_TRACE_VCODE) {
4325       vex_printf( "\n-- PUT(%d) = ", offsIP);
4326       ppIRExpr( next );
4327       vex_printf( "; exit-");
4328       ppIRJumpKind(jk);
4329       vex_printf( "\n");
4330    }
4331 
4332    /* Case: boring transfer to known address */
4333    if (next->tag == Iex_Const) {
4334       IRConst* cdst = next->Iex.Const.con;
4335       vassert(cdst->tag == Ico_U32);
4336       if (jk == Ijk_Boring || jk == Ijk_Call) {
4337          /* Boring transfer to known address */
4338          X86AMode* amEIP = X86AMode_IR(offsIP, hregX86_EBP());
4339          if (env->chainingAllowed) {
4340             /* .. almost always true .. */
4341             /* Skip the event check at the dst if this is a forwards
4342                edge. */
4343             Bool toFastEP
4344                = ((Addr32)cdst->Ico.U32) > env->max_ga;
4345             if (0) vex_printf("%s", toFastEP ? "X" : ".");
4346             addInstr(env, X86Instr_XDirect(cdst->Ico.U32,
4347                                            amEIP, Xcc_ALWAYS,
4348                                            toFastEP));
4349          } else {
4350             /* .. very occasionally .. */
4351             /* We can't use chaining, so ask for an assisted transfer,
4352                as that's the only alternative that is allowable. */
4353             HReg r = iselIntExpr_R(env, next);
4354             addInstr(env, X86Instr_XAssisted(r, amEIP, Xcc_ALWAYS,
4355                                              Ijk_Boring));
4356          }
4357          return;
4358       }
4359    }
4360 
4361    /* Case: call/return (==boring) transfer to any address */
4362    switch (jk) {
4363       case Ijk_Boring: case Ijk_Ret: case Ijk_Call: {
4364          HReg      r     = iselIntExpr_R(env, next);
4365          X86AMode* amEIP = X86AMode_IR(offsIP, hregX86_EBP());
4366          if (env->chainingAllowed) {
4367             addInstr(env, X86Instr_XIndir(r, amEIP, Xcc_ALWAYS));
4368          } else {
4369             addInstr(env, X86Instr_XAssisted(r, amEIP, Xcc_ALWAYS,
4370                                                Ijk_Boring));
4371          }
4372          return;
4373       }
4374       default:
4375          break;
4376    }
4377 
4378    /* Case: assisted transfer to arbitrary address */
4379    switch (jk) {
4380       /* Keep this list in sync with that for Ist_Exit above */
4381       case Ijk_ClientReq:
4382       case Ijk_EmWarn:
4383       case Ijk_MapFail:
4384       case Ijk_NoDecode:
4385       case Ijk_NoRedir:
4386       case Ijk_SigSEGV:
4387       case Ijk_SigTRAP:
4388       case Ijk_Sys_int128:
4389       case Ijk_Sys_int129:
4390       case Ijk_Sys_int130:
4391       case Ijk_Sys_int145:
4392       case Ijk_Sys_int210:
4393       case Ijk_Sys_syscall:
4394       case Ijk_Sys_sysenter:
4395       case Ijk_InvalICache:
4396       case Ijk_Yield:
4397       {
4398          HReg      r     = iselIntExpr_R(env, next);
4399          X86AMode* amEIP = X86AMode_IR(offsIP, hregX86_EBP());
4400          addInstr(env, X86Instr_XAssisted(r, amEIP, Xcc_ALWAYS, jk));
4401          return;
4402       }
4403       default:
4404          break;
4405    }
4406 
4407    vex_printf( "\n-- PUT(%d) = ", offsIP);
4408    ppIRExpr( next );
4409    vex_printf( "; exit-");
4410    ppIRJumpKind(jk);
4411    vex_printf( "\n");
4412    vassert(0); // are we expecting any other kind?
4413 }
4414 
4415 
4416 /*---------------------------------------------------------*/
4417 /*--- Insn selector top-level                           ---*/
4418 /*---------------------------------------------------------*/
4419 
4420 /* Translate an entire SB to x86 code. */
4421 
iselSB_X86(const IRSB * bb,VexArch arch_host,const VexArchInfo * archinfo_host,const VexAbiInfo * vbi,Int offs_Host_EvC_Counter,Int offs_Host_EvC_FailAddr,Bool chainingAllowed,Bool addProfInc,Addr max_ga)4422 HInstrArray* iselSB_X86 ( const IRSB* bb,
4423                           VexArch      arch_host,
4424                           const VexArchInfo* archinfo_host,
4425                           const VexAbiInfo*  vbi/*UNUSED*/,
4426                           Int offs_Host_EvC_Counter,
4427                           Int offs_Host_EvC_FailAddr,
4428                           Bool chainingAllowed,
4429                           Bool addProfInc,
4430                           Addr max_ga )
4431 {
4432    Int      i, j;
4433    HReg     hreg, hregHI;
4434    ISelEnv* env;
4435    UInt     hwcaps_host = archinfo_host->hwcaps;
4436    X86AMode *amCounter, *amFailAddr;
4437 
4438    /* sanity ... */
4439    vassert(arch_host == VexArchX86);
4440    vassert(0 == (hwcaps_host
4441                  & ~(VEX_HWCAPS_X86_MMXEXT
4442                      | VEX_HWCAPS_X86_SSE1
4443                      | VEX_HWCAPS_X86_SSE2
4444                      | VEX_HWCAPS_X86_SSE3
4445                      | VEX_HWCAPS_X86_LZCNT)));
4446 
4447    /* Check that the host's endianness is as expected. */
4448    vassert(archinfo_host->endness == VexEndnessLE);
4449 
4450    /* Make up an initial environment to use. */
4451    env = LibVEX_Alloc_inline(sizeof(ISelEnv));
4452    env->vreg_ctr = 0;
4453 
4454    /* Set up output code array. */
4455    env->code = newHInstrArray();
4456 
4457    /* Copy BB's type env. */
4458    env->type_env = bb->tyenv;
4459 
4460    /* Make up an IRTemp -> virtual HReg mapping.  This doesn't
4461       change as we go along. */
4462    env->n_vregmap = bb->tyenv->types_used;
4463    env->vregmap   = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
4464    env->vregmapHI = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
4465 
4466    /* and finally ... */
4467    env->chainingAllowed = chainingAllowed;
4468    env->hwcaps          = hwcaps_host;
4469    env->max_ga          = max_ga;
4470 
4471    /* For each IR temporary, allocate a suitably-kinded virtual
4472       register. */
4473    j = 0;
4474    for (i = 0; i < env->n_vregmap; i++) {
4475       hregHI = hreg = INVALID_HREG;
4476       switch (bb->tyenv->types[i]) {
4477          case Ity_I1:
4478          case Ity_I8:
4479          case Ity_I16:
4480          case Ity_I32:  hreg   = mkHReg(True, HRcInt32,  0, j++); break;
4481          case Ity_I64:  hreg   = mkHReg(True, HRcInt32,  0, j++);
4482                         hregHI = mkHReg(True, HRcInt32,  0, j++); break;
4483          case Ity_F32:
4484          case Ity_F64:  hreg   = mkHReg(True, HRcFlt64,  0, j++); break;
4485          case Ity_V128: hreg   = mkHReg(True, HRcVec128, 0, j++); break;
4486          default: ppIRType(bb->tyenv->types[i]);
4487                   vpanic("iselBB: IRTemp type");
4488       }
4489       env->vregmap[i]   = hreg;
4490       env->vregmapHI[i] = hregHI;
4491    }
4492    env->vreg_ctr = j;
4493 
4494    /* The very first instruction must be an event check. */
4495    amCounter  = X86AMode_IR(offs_Host_EvC_Counter,  hregX86_EBP());
4496    amFailAddr = X86AMode_IR(offs_Host_EvC_FailAddr, hregX86_EBP());
4497    addInstr(env, X86Instr_EvCheck(amCounter, amFailAddr));
4498 
4499    /* Possibly a block counter increment (for profiling).  At this
4500       point we don't know the address of the counter, so just pretend
4501       it is zero.  It will have to be patched later, but before this
4502       translation is used, by a call to LibVEX_patchProfCtr. */
4503    if (addProfInc) {
4504       addInstr(env, X86Instr_ProfInc());
4505    }
4506 
4507    /* Ok, finally we can iterate over the statements. */
4508    for (i = 0; i < bb->stmts_used; i++)
4509       iselStmt(env, bb->stmts[i]);
4510 
4511    iselNext(env, bb->next, bb->jumpkind, bb->offsIP);
4512 
4513    /* record the number of vregs we used. */
4514    env->code->n_vregs = env->vreg_ctr;
4515    return env->code;
4516 }
4517 
4518 
4519 /*---------------------------------------------------------------*/
4520 /*--- end                                     host_x86_isel.c ---*/
4521 /*---------------------------------------------------------------*/
4522