1 
2 /*---------------------------------------------------------------*/
3 /*--- begin                                   host_arm_isel.c ---*/
4 /*---------------------------------------------------------------*/
5 
6 /*
7    This file is part of Valgrind, a dynamic binary instrumentation
8    framework.
9 
10    Copyright (C) 2004-2017 OpenWorks LLP
11       info@open-works.net
12 
13    NEON support is
14    Copyright (C) 2010-2017 Samsung Electronics
15    contributed by Dmitry Zhurikhin <zhur@ispras.ru>
16               and Kirill Batuzov <batuzovk@ispras.ru>
17 
18    This program is free software; you can redistribute it and/or
19    modify it under the terms of the GNU General Public License as
20    published by the Free Software Foundation; either version 2 of the
21    License, or (at your option) any later version.
22 
23    This program is distributed in the hope that it will be useful, but
24    WITHOUT ANY WARRANTY; without even the implied warranty of
25    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
26    General Public License for more details.
27 
28    You should have received a copy of the GNU General Public License
29    along with this program; if not, write to the Free Software
30    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
31    02110-1301, USA.
32 
33    The GNU General Public License is contained in the file COPYING.
34 */
35 
36 #include "libvex_basictypes.h"
37 #include "libvex_ir.h"
38 #include "libvex.h"
39 #include "ir_match.h"
40 
41 #include "main_util.h"
42 #include "main_globals.h"
43 #include "host_generic_regs.h"
44 #include "host_generic_simd64.h"  // for 32-bit SIMD helpers
45 #include "host_arm_defs.h"
46 
47 
48 /*---------------------------------------------------------*/
49 /*--- ARMvfp control word stuff                         ---*/
50 /*---------------------------------------------------------*/
51 
52 /* Vex-generated code expects to run with the FPU set as follows: all
53    exceptions masked, round-to-nearest, non-vector mode, with the NZCV
54    flags cleared, and FZ (flush to zero) disabled.  Curiously enough,
55    this corresponds to a FPSCR value of zero.
56 
57    fpscr should therefore be zero on entry to Vex-generated code, and
58    should be unchanged at exit.  (Or at least the bottom 28 bits
59    should be zero).
60 */
61 
62 #define DEFAULT_FPSCR 0
63 
64 
65 /*---------------------------------------------------------*/
66 /*--- ISelEnv                                           ---*/
67 /*---------------------------------------------------------*/
68 
69 /* This carries around:
70 
71    - A mapping from IRTemp to IRType, giving the type of any IRTemp we
72      might encounter.  This is computed before insn selection starts,
73      and does not change.
74 
75    - A mapping from IRTemp to HReg.  This tells the insn selector
76      which virtual register(s) are associated with each IRTemp
77      temporary.  This is computed before insn selection starts, and
78      does not change.  We expect this mapping to map precisely the
79      same set of IRTemps as the type mapping does.
80 
81         - vregmap   holds the primary register for the IRTemp.
82         - vregmapHI is only used for 64-bit integer-typed
83              IRTemps.  It holds the identity of a second
84              32-bit virtual HReg, which holds the high half
85              of the value.
86 
87    - The code array, that is, the insns selected so far.
88 
89    - A counter, for generating new virtual registers.
90 
91    - The host hardware capabilities word.  This is set at the start
92      and does not change.
93 
94    - A Bool for indicating whether we may generate chain-me
95      instructions for control flow transfers, or whether we must use
96      XAssisted.
97 
98    - The maximum guest address of any guest insn in this block.
99      Actually, the address of the highest-addressed byte from any insn
100      in this block.  Is set at the start and does not change.  This is
101      used for detecting jumps which are definitely forward-edges from
102      this block, and therefore can be made (chained) to the fast entry
103      point of the destination, thereby avoiding the destination's
104      event check.
105 
106    Note, this is all (well, mostly) host-independent.
107 */
108 
109 typedef
110    struct {
111       /* Constant -- are set at the start and do not change. */
112       IRTypeEnv*   type_env;
113 
114       HReg*        vregmap;
115       HReg*        vregmapHI;
116       Int          n_vregmap;
117 
118       UInt         hwcaps;
119 
120       Bool         chainingAllowed;
121       Addr32       max_ga;
122 
123       /* These are modified as we go along. */
124       HInstrArray* code;
125       Int          vreg_ctr;
126    }
127    ISelEnv;
128 
lookupIRTemp(ISelEnv * env,IRTemp tmp)129 static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp )
130 {
131    vassert(tmp >= 0);
132    vassert(tmp < env->n_vregmap);
133    return env->vregmap[tmp];
134 }
135 
lookupIRTemp64(HReg * vrHI,HReg * vrLO,ISelEnv * env,IRTemp tmp)136 static void lookupIRTemp64 ( HReg* vrHI, HReg* vrLO, ISelEnv* env, IRTemp tmp )
137 {
138    vassert(tmp >= 0);
139    vassert(tmp < env->n_vregmap);
140    vassert(! hregIsInvalid(env->vregmapHI[tmp]));
141    *vrLO = env->vregmap[tmp];
142    *vrHI = env->vregmapHI[tmp];
143 }
144 
addInstr(ISelEnv * env,ARMInstr * instr)145 static void addInstr ( ISelEnv* env, ARMInstr* instr )
146 {
147    addHInstr(env->code, instr);
148    if (vex_traceflags & VEX_TRACE_VCODE) {
149       ppARMInstr(instr);
150       vex_printf("\n");
151    }
152 }
153 
newVRegI(ISelEnv * env)154 static HReg newVRegI ( ISelEnv* env )
155 {
156    HReg reg = mkHReg(True/*virtual reg*/, HRcInt32, 0/*enc*/, env->vreg_ctr);
157    env->vreg_ctr++;
158    return reg;
159 }
160 
newVRegD(ISelEnv * env)161 static HReg newVRegD ( ISelEnv* env )
162 {
163    HReg reg = mkHReg(True/*virtual reg*/, HRcFlt64, 0/*enc*/, env->vreg_ctr);
164    env->vreg_ctr++;
165    return reg;
166 }
167 
newVRegF(ISelEnv * env)168 static HReg newVRegF ( ISelEnv* env )
169 {
170    HReg reg = mkHReg(True/*virtual reg*/, HRcFlt32, 0/*enc*/, env->vreg_ctr);
171    env->vreg_ctr++;
172    return reg;
173 }
174 
newVRegV(ISelEnv * env)175 static HReg newVRegV ( ISelEnv* env )
176 {
177    HReg reg = mkHReg(True/*virtual reg*/, HRcVec128, 0/*enc*/, env->vreg_ctr);
178    env->vreg_ctr++;
179    return reg;
180 }
181 
182 /* These are duplicated in guest_arm_toIR.c */
unop(IROp op,IRExpr * a)183 static IRExpr* unop ( IROp op, IRExpr* a )
184 {
185    return IRExpr_Unop(op, a);
186 }
187 
binop(IROp op,IRExpr * a1,IRExpr * a2)188 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
189 {
190    return IRExpr_Binop(op, a1, a2);
191 }
192 
bind(Int binder)193 static IRExpr* bind ( Int binder )
194 {
195    return IRExpr_Binder(binder);
196 }
197 
198 
199 /*---------------------------------------------------------*/
200 /*--- ISEL: Forward declarations                        ---*/
201 /*---------------------------------------------------------*/
202 
203 /* These are organised as iselXXX and iselXXX_wrk pairs.  The
204    iselXXX_wrk do the real work, but are not to be called directly.
205    For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
206    checks that all returned registers are virtual.  You should not
207    call the _wrk version directly.
208 */
209 static ARMAMode1*  iselIntExpr_AMode1_wrk ( ISelEnv* env, IRExpr* e );
210 static ARMAMode1*  iselIntExpr_AMode1     ( ISelEnv* env, IRExpr* e );
211 
212 static ARMAMode2*  iselIntExpr_AMode2_wrk ( ISelEnv* env, IRExpr* e );
213 static ARMAMode2*  iselIntExpr_AMode2     ( ISelEnv* env, IRExpr* e );
214 
215 static ARMAModeV*  iselIntExpr_AModeV_wrk ( ISelEnv* env, IRExpr* e );
216 static ARMAModeV*  iselIntExpr_AModeV     ( ISelEnv* env, IRExpr* e );
217 
218 static ARMAModeN*  iselIntExpr_AModeN_wrk ( ISelEnv* env, IRExpr* e );
219 static ARMAModeN*  iselIntExpr_AModeN     ( ISelEnv* env, IRExpr* e );
220 
221 static ARMRI84*    iselIntExpr_RI84_wrk
222         ( /*OUT*/Bool* didInv, Bool mayInv, ISelEnv* env, IRExpr* e );
223 static ARMRI84*    iselIntExpr_RI84
224         ( /*OUT*/Bool* didInv, Bool mayInv, ISelEnv* env, IRExpr* e );
225 
226 static ARMRI5*     iselIntExpr_RI5_wrk    ( ISelEnv* env, IRExpr* e );
227 static ARMRI5*     iselIntExpr_RI5        ( ISelEnv* env, IRExpr* e );
228 
229 static ARMCondCode iselCondCode_wrk       ( ISelEnv* env, IRExpr* e );
230 static ARMCondCode iselCondCode           ( ISelEnv* env, IRExpr* e );
231 
232 static HReg        iselIntExpr_R_wrk      ( ISelEnv* env, IRExpr* e );
233 static HReg        iselIntExpr_R          ( ISelEnv* env, IRExpr* e );
234 
235 static void        iselInt64Expr_wrk      ( HReg* rHi, HReg* rLo,
236                                             ISelEnv* env, const IRExpr* e );
237 static void        iselInt64Expr          ( HReg* rHi, HReg* rLo,
238                                             ISelEnv* env, const IRExpr* e );
239 
240 static HReg        iselDblExpr_wrk        ( ISelEnv* env, IRExpr* e );
241 static HReg        iselDblExpr            ( ISelEnv* env, IRExpr* e );
242 
243 static HReg        iselFltExpr_wrk        ( ISelEnv* env, IRExpr* e );
244 static HReg        iselFltExpr            ( ISelEnv* env, IRExpr* e );
245 
246 static HReg        iselNeon64Expr_wrk     ( ISelEnv* env, const IRExpr* e );
247 static HReg        iselNeon64Expr         ( ISelEnv* env, const IRExpr* e );
248 
249 static HReg        iselNeonExpr_wrk       ( ISelEnv* env, const IRExpr* e );
250 static HReg        iselNeonExpr           ( ISelEnv* env, const IRExpr* e );
251 
252 /*---------------------------------------------------------*/
253 /*--- ISEL: Misc helpers                                ---*/
254 /*---------------------------------------------------------*/
255 
ROR32(UInt x,UInt sh)256 static UInt ROR32 ( UInt x, UInt sh ) {
257    vassert(sh >= 0 && sh < 32);
258    if (sh == 0)
259       return x;
260    else
261       return (x << (32-sh)) | (x >> sh);
262 }
263 
264 /* Figure out if 'u' fits in the special shifter-operand 8x4 immediate
265    form, and if so return the components. */
fitsIn8x4(UInt * u8,UInt * u4,UInt u)266 static Bool fitsIn8x4 ( /*OUT*/UInt* u8, /*OUT*/UInt* u4, UInt u )
267 {
268    UInt i;
269    for (i = 0; i < 16; i++) {
270       if (0 == (u & 0xFFFFFF00)) {
271          *u8 = u;
272          *u4 = i;
273          return True;
274       }
275       u = ROR32(u, 30);
276    }
277    vassert(i == 16);
278    return False;
279 }
280 
281 /* Make a int reg-reg move. */
mk_iMOVds_RR(HReg dst,HReg src)282 static ARMInstr* mk_iMOVds_RR ( HReg dst, HReg src )
283 {
284    vassert(hregClass(src) == HRcInt32);
285    vassert(hregClass(dst) == HRcInt32);
286    return ARMInstr_Mov(dst, ARMRI84_R(src));
287 }
288 
289 /* Set the VFP unit's rounding mode to default (round to nearest). */
set_VFP_rounding_default(ISelEnv * env)290 static void set_VFP_rounding_default ( ISelEnv* env )
291 {
292    /* mov rTmp, #DEFAULT_FPSCR
293       fmxr fpscr, rTmp
294    */
295    HReg rTmp = newVRegI(env);
296    addInstr(env, ARMInstr_Imm32(rTmp, DEFAULT_FPSCR));
297    addInstr(env, ARMInstr_FPSCR(True/*toFPSCR*/, rTmp));
298 }
299 
300 /* Mess with the VFP unit's rounding mode: 'mode' is an I32-typed
301    expression denoting a value in the range 0 .. 3, indicating a round
302    mode encoded as per type IRRoundingMode.  Set FPSCR to have the
303    same rounding.
304 */
305 static
set_VFP_rounding_mode(ISelEnv * env,IRExpr * mode)306 void set_VFP_rounding_mode ( ISelEnv* env, IRExpr* mode )
307 {
308    /* This isn't simple, because 'mode' carries an IR rounding
309       encoding, and we need to translate that to an ARMvfp one:
310       The IR encoding:
311          00  to nearest (the default)
312          10  to +infinity
313          01  to -infinity
314          11  to zero
315       The ARMvfp encoding:
316          00  to nearest
317          01  to +infinity
318          10  to -infinity
319          11  to zero
320       Easy enough to do; just swap the two bits.
321    */
322    HReg irrm = iselIntExpr_R(env, mode);
323    HReg tL   = newVRegI(env);
324    HReg tR   = newVRegI(env);
325    HReg t3   = newVRegI(env);
326    /* tL = irrm << 1;
327       tR = irrm >> 1;  if we're lucky, these will issue together
328       tL &= 2;
329       tR &= 1;         ditto
330       t3 = tL | tR;
331       t3 <<= 22;
332       fmxr fpscr, t3
333    */
334    addInstr(env, ARMInstr_Shift(ARMsh_SHL, tL, irrm, ARMRI5_I5(1)));
335    addInstr(env, ARMInstr_Shift(ARMsh_SHR, tR, irrm, ARMRI5_I5(1)));
336    addInstr(env, ARMInstr_Alu(ARMalu_AND, tL, tL, ARMRI84_I84(2,0)));
337    addInstr(env, ARMInstr_Alu(ARMalu_AND, tR, tR, ARMRI84_I84(1,0)));
338    addInstr(env, ARMInstr_Alu(ARMalu_OR, t3, tL, ARMRI84_R(tR)));
339    addInstr(env, ARMInstr_Shift(ARMsh_SHL, t3, t3, ARMRI5_I5(22)));
340    addInstr(env, ARMInstr_FPSCR(True/*toFPSCR*/, t3));
341 }
342 
343 
344 /*---------------------------------------------------------*/
345 /*--- ISEL: Function call helpers                       ---*/
346 /*---------------------------------------------------------*/
347 
348 /* Used only in doHelperCall.  See big comment in doHelperCall re
349    handling of register-parameter args.  This function figures out
350    whether evaluation of an expression might require use of a fixed
351    register.  If in doubt return True (safe but suboptimal).
352 */
353 static
mightRequireFixedRegs(IRExpr * e)354 Bool mightRequireFixedRegs ( IRExpr* e )
355 {
356    if (UNLIKELY(is_IRExpr_VECRET_or_GSPTR(e))) {
357       // These are always "safe" -- either a copy of r13(sp) in some
358       // arbitrary vreg, or a copy of r8, respectively.
359       return False;
360    }
361    /* Else it's a "normal" expression. */
362    switch (e->tag) {
363    case Iex_RdTmp: case Iex_Const: case Iex_Get:
364       return False;
365    default:
366       return True;
367    }
368 }
369 
370 
371 static
doHelperCallWithArgsOnStack(UInt * stackAdjustAfterCall,RetLoc * retloc,ISelEnv * env,IRExpr * guard,IRCallee * cee,IRType retTy,IRExpr ** args)372 Bool doHelperCallWithArgsOnStack ( /*OUT*/UInt*   stackAdjustAfterCall,
373                                    /*OUT*/RetLoc* retloc,
374                                    ISelEnv* env,
375                                    IRExpr* guard,
376                                    IRCallee* cee, IRType retTy, IRExpr** args )
377 {
378    /* This function deals just with the case where the arg sequence is:
379       VECRET followed by between 4 and 12 Ity_I32 values.  So far no other
380       cases are necessary or supported. */
381 
382    /* Check this matches the required format. */
383    if (args[0] == NULL || args[0]->tag != Iex_VECRET)
384       goto no_match;
385 
386    UInt i;
387    UInt n_real_args = 0;
388    for (i = 1; args[i]; i++) {
389       IRExpr* arg = args[i];
390       if (UNLIKELY(is_IRExpr_VECRET_or_GSPTR(arg)))
391          goto no_match;
392       IRType argTy = typeOfIRExpr(env->type_env, arg);
393       if (UNLIKELY(argTy != Ity_I32))
394          goto no_match;
395       n_real_args++;
396    }
397 
398    /* We expect to pass at least some args on the stack. */
399    if (n_real_args <= 3)
400       goto no_match;
401 
402    /* But not too many. */
403    if (n_real_args > 12)
404       goto no_match;
405 
406    /* General rules for a call:
407 
408       Args 1 .. 4 go in R0 .. R3.  The rest are pushed R to L on the
409       stack; that is, arg 5 is at the lowest address, arg 6 at the
410       next lowest, etc.
411 
412       The stack is to be kept 8 aligned.
413 
414       It appears (for unclear reasons) that the highest 3 words made
415       available when moving SP downwards are not to be used.  For
416       example, if 5 args are to go on the stack, then SP must be moved
417       down 32 bytes, and the area at SP+20 .. SP+31 is not to be used
418       by the caller.
419    */
420 
421    /* For this particular case, we use the following layout:
422 
423         ------ original SP
424         112 bytes
425         ------
426         return value
427         ------ original SP - 128
428         space
429         args words, between 1 and 11
430         ------ new SP = original_SP - 256
431 
432       Using 256 bytes is overkill, but it is simple and good enough.
433    */
434 
435    /* This should really be
436         HReg argVRegs[n_real_args];
437       but that makes it impossible to do 'goto's forward past.
438       Hence the following kludge. */
439    vassert(n_real_args <= 12);
440    HReg argVRegs[12];
441    for (i = 0; i < 12; i++)
442       argVRegs[i] = INVALID_HREG;
443 
444    /* Compute args into vregs. */
445    for (i = 0; i < n_real_args; i++) {
446       argVRegs[i] = iselIntExpr_R(env, args[i+1]);
447    }
448 
449    /* Now we can compute the condition.  We can't do it earlier
450       because the argument computations could trash the condition
451       codes.  Be a bit clever to handle the common case where the
452       guard is 1:Bit. */
453    ARMCondCode cc = ARMcc_AL;
454    if (guard) {
455       if (guard->tag == Iex_Const
456           && guard->Iex.Const.con->tag == Ico_U1
457           && guard->Iex.Const.con->Ico.U1 == True) {
458          /* unconditional -- do nothing */
459       } else {
460          goto no_match; //ATC
461          cc = iselCondCode( env, guard );
462       }
463    }
464 
465    HReg r0 = hregARM_R0();
466    HReg sp = hregARM_R13();
467 
468    ARMRI84* c256 = ARMRI84_I84(64, 15); // 64 `ror` (15 * 2)
469 
470    addInstr(env, ARMInstr_Alu(ARMalu_SUB, r0, sp, ARMRI84_I84(128, 0)));
471 
472    addInstr(env, mk_iMOVds_RR(hregARM_R1(), argVRegs[0]));
473    addInstr(env, mk_iMOVds_RR(hregARM_R2(), argVRegs[1]));
474    addInstr(env, mk_iMOVds_RR(hregARM_R3(), argVRegs[2]));
475 
476    addInstr(env, ARMInstr_Alu(ARMalu_SUB, sp, sp, c256));
477 
478    for (i = 3; i < n_real_args; i++) {
479       addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*store*/, argVRegs[i],
480                                     ARMAMode1_RI(sp, (i-3) * 4)));
481    }
482 
483    vassert(*stackAdjustAfterCall == 0);
484    vassert(is_RetLoc_INVALID(*retloc));
485 
486    *stackAdjustAfterCall = 256;
487    *retloc = mk_RetLoc_spRel(RLPri_V128SpRel, 128);
488 
489    Addr32 target = (Addr)cee->addr;
490    addInstr(env, ARMInstr_Call( cc, target, 4, *retloc ));
491 
492    return True; /* success */
493 
494   no_match:
495    return False;
496 }
497 
498 
499 /* Do a complete function call.  |guard| is a Ity_Bit expression
500    indicating whether or not the call happens.  If guard==NULL, the
501    call is unconditional.  |retloc| is set to indicate where the
502    return value is after the call.  The caller (of this fn) must
503    generate code to add |stackAdjustAfterCall| to the stack pointer
504    after the call is done.  Returns True iff it managed to handle this
505    combination of arg/return types, else returns False. */
506 
507 static
doHelperCall(UInt * stackAdjustAfterCall,RetLoc * retloc,ISelEnv * env,IRExpr * guard,IRCallee * cee,IRType retTy,IRExpr ** args)508 Bool doHelperCall ( /*OUT*/UInt*   stackAdjustAfterCall,
509                     /*OUT*/RetLoc* retloc,
510                     ISelEnv* env,
511                     IRExpr* guard,
512                     IRCallee* cee, IRType retTy, IRExpr** args )
513 {
514    ARMCondCode cc;
515    HReg        argregs[ARM_N_ARGREGS];
516    HReg        tmpregs[ARM_N_ARGREGS];
517    Bool        go_fast;
518    Int         n_args, i, nextArgReg;
519    Addr32      target;
520 
521    vassert(ARM_N_ARGREGS == 4);
522 
523    /* Set default returns.  We'll update them later if needed. */
524    *stackAdjustAfterCall = 0;
525    *retloc               = mk_RetLoc_INVALID();
526 
527    /* These are used for cross-checking that IR-level constraints on
528       the use of IRExpr_VECRET() and IRExpr_GSPTR() are observed. */
529    UInt nVECRETs = 0;
530    UInt nGSPTRs  = 0;
531 
532    /* Marshal args for a call and do the call.
533 
534       This function only deals with a tiny set of possibilities, which
535       cover all helpers in practice.  The restrictions are that only
536       arguments in registers are supported, hence only ARM_N_REGPARMS
537       x 32 integer bits in total can be passed.  In fact the only
538       supported arg types are I32 and I64.
539 
540       The return type can be I{64,32} or V128.  In the V128 case, it
541       is expected that |args| will contain the special node
542       IRExpr_VECRET(), in which case this routine generates code to
543       allocate space on the stack for the vector return value.  Since
544       we are not passing any scalars on the stack, it is enough to
545       preallocate the return space before marshalling any arguments,
546       in this case.
547 
548       |args| may also contain IRExpr_GSPTR(), in which case the
549       value in r8 is passed as the corresponding argument.
550 
551       Generating code which is both efficient and correct when
552       parameters are to be passed in registers is difficult, for the
553       reasons elaborated in detail in comments attached to
554       doHelperCall() in priv/host-x86/isel.c.  Here, we use a variant
555       of the method described in those comments.
556 
557       The problem is split into two cases: the fast scheme and the
558       slow scheme.  In the fast scheme, arguments are computed
559       directly into the target (real) registers.  This is only safe
560       when we can be sure that computation of each argument will not
561       trash any real registers set by computation of any other
562       argument.
563 
564       In the slow scheme, all args are first computed into vregs, and
565       once they are all done, they are moved to the relevant real
566       regs.  This always gives correct code, but it also gives a bunch
567       of vreg-to-rreg moves which are usually redundant but are hard
568       for the register allocator to get rid of.
569 
570       To decide which scheme to use, all argument expressions are
571       first examined.  If they are all so simple that it is clear they
572       will be evaluated without use of any fixed registers, use the
573       fast scheme, else use the slow scheme.  Note also that only
574       unconditional calls may use the fast scheme, since having to
575       compute a condition expression could itself trash real
576       registers.
577 
578       Note this requires being able to examine an expression and
579       determine whether or not evaluation of it might use a fixed
580       register.  That requires knowledge of how the rest of this insn
581       selector works.  Currently just the following 3 are regarded as
582       safe -- hopefully they cover the majority of arguments in
583       practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
584    */
585 
586    /* Note that the cee->regparms field is meaningless on ARM hosts
587       (since there is only one calling convention) and so we always
588       ignore it. */
589 
590    n_args = 0;
591    for (i = 0; args[i]; i++) {
592       IRExpr* arg = args[i];
593       if (UNLIKELY(arg->tag == Iex_VECRET)) {
594          nVECRETs++;
595       } else if (UNLIKELY(arg->tag == Iex_GSPTR)) {
596          nGSPTRs++;
597       }
598       n_args++;
599    }
600 
601    /* If there are more than 4 args, we are going to have to pass
602       some via memory.  Use a different function to (possibly) deal with
603       that; dealing with it here is too complex. */
604    if (n_args > ARM_N_ARGREGS) {
605       return doHelperCallWithArgsOnStack(stackAdjustAfterCall, retloc,
606                                          env, guard, cee, retTy, args );
607 
608    }
609 
610    /* After this point we make no attempt to pass args on the stack,
611       and just give up if that case (which is OK because it never
612       happens).  Even if there are for example only 3 args, it might
613       still be necessary to pass some of them on the stack if for example
614       two or more of them are 64-bit integers. */
615 
616    argregs[0] = hregARM_R0();
617    argregs[1] = hregARM_R1();
618    argregs[2] = hregARM_R2();
619    argregs[3] = hregARM_R3();
620 
621    tmpregs[0] = tmpregs[1] = tmpregs[2] =
622    tmpregs[3] = INVALID_HREG;
623 
624    /* First decide which scheme (slow or fast) is to be used.  First
625       assume the fast scheme, and select slow if any contraindications
626       (wow) appear. */
627 
628    go_fast = True;
629 
630    if (guard) {
631       if (guard->tag == Iex_Const
632           && guard->Iex.Const.con->tag == Ico_U1
633           && guard->Iex.Const.con->Ico.U1 == True) {
634          /* unconditional */
635       } else {
636          /* Not manifestly unconditional -- be conservative. */
637          go_fast = False;
638       }
639    }
640 
641    if (go_fast) {
642       for (i = 0; i < n_args; i++) {
643          if (mightRequireFixedRegs(args[i])) {
644             go_fast = False;
645             break;
646          }
647       }
648    }
649 
650    if (go_fast) {
651       if (retTy == Ity_V128 || retTy == Ity_V256)
652          go_fast = False;
653    }
654 
655    /* At this point the scheme to use has been established.  Generate
656       code to get the arg values into the argument rregs.  If we run
657       out of arg regs, give up. */
658 
659    if (go_fast) {
660 
661       /* FAST SCHEME */
662       nextArgReg = 0;
663 
664       for (i = 0; i < n_args; i++) {
665          IRExpr* arg = args[i];
666 
667          IRType  aTy = Ity_INVALID;
668          if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg)))
669             aTy = typeOfIRExpr(env->type_env, arg);
670 
671          if (nextArgReg >= ARM_N_ARGREGS)
672             return False; /* out of argregs */
673 
674          if (aTy == Ity_I32) {
675             addInstr(env, mk_iMOVds_RR( argregs[nextArgReg],
676                                         iselIntExpr_R(env, arg) ));
677             nextArgReg++;
678          }
679          else if (aTy == Ity_I64) {
680             /* 64-bit args must be passed in an a reg-pair of the form
681                n:n+1, where n is even.  Hence either r0:r1 or r2:r3.
682                On a little-endian host, the less significant word is
683                passed in the lower-numbered register. */
684             if (nextArgReg & 1) {
685                if (nextArgReg >= ARM_N_ARGREGS)
686                   return False; /* out of argregs */
687                addInstr(env, ARMInstr_Imm32( argregs[nextArgReg], 0xAA ));
688                nextArgReg++;
689             }
690             if (nextArgReg >= ARM_N_ARGREGS)
691                return False; /* out of argregs */
692             HReg raHi, raLo;
693             iselInt64Expr(&raHi, &raLo, env, arg);
694             addInstr(env, mk_iMOVds_RR( argregs[nextArgReg], raLo ));
695             nextArgReg++;
696             addInstr(env, mk_iMOVds_RR( argregs[nextArgReg], raHi ));
697             nextArgReg++;
698          }
699          else if (arg->tag == Iex_GSPTR) {
700             vassert(0); //ATC
701             addInstr(env, mk_iMOVds_RR( argregs[nextArgReg],
702                                         hregARM_R8() ));
703             nextArgReg++;
704          }
705          else if (arg->tag == Iex_VECRET) {
706             // If this happens, it denotes ill-formed IR
707             vassert(0);
708          }
709          else
710             return False; /* unhandled arg type */
711       }
712 
713       /* Fast scheme only applies for unconditional calls.  Hence: */
714       cc = ARMcc_AL;
715 
716    } else {
717 
718       /* SLOW SCHEME; move via temporaries */
719       nextArgReg = 0;
720 
721       for (i = 0; i < n_args; i++) {
722          IRExpr* arg = args[i];
723 
724          IRType  aTy = Ity_INVALID;
725          if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg)))
726             aTy  = typeOfIRExpr(env->type_env, arg);
727 
728          if (nextArgReg >= ARM_N_ARGREGS)
729             return False; /* out of argregs */
730 
731          if (aTy == Ity_I32) {
732             tmpregs[nextArgReg] = iselIntExpr_R(env, args[i]);
733             nextArgReg++;
734          }
735          else if (aTy == Ity_I64) {
736             /* Same comment applies as in the Fast-scheme case. */
737             if (nextArgReg & 1)
738                nextArgReg++;
739             if (nextArgReg + 1 >= ARM_N_ARGREGS)
740                return False; /* out of argregs */
741             HReg raHi, raLo;
742             iselInt64Expr(&raHi, &raLo, env, args[i]);
743             tmpregs[nextArgReg] = raLo;
744             nextArgReg++;
745             tmpregs[nextArgReg] = raHi;
746             nextArgReg++;
747          }
748          else if (arg->tag == Iex_GSPTR) {
749             vassert(0); //ATC
750             tmpregs[nextArgReg] = hregARM_R8();
751             nextArgReg++;
752          }
753          else if (arg->tag == Iex_VECRET) {
754             // If this happens, it denotes ill-formed IR
755             vassert(0);
756          }
757          else
758             return False; /* unhandled arg type */
759       }
760 
761       /* Now we can compute the condition.  We can't do it earlier
762          because the argument computations could trash the condition
763          codes.  Be a bit clever to handle the common case where the
764          guard is 1:Bit. */
765       cc = ARMcc_AL;
766       if (guard) {
767          if (guard->tag == Iex_Const
768              && guard->Iex.Const.con->tag == Ico_U1
769              && guard->Iex.Const.con->Ico.U1 == True) {
770             /* unconditional -- do nothing */
771          } else {
772             cc = iselCondCode( env, guard );
773          }
774       }
775 
776       /* Move the args to their final destinations. */
777       for (i = 0; i < nextArgReg; i++) {
778          if (hregIsInvalid(tmpregs[i])) { // Skip invalid regs
779             addInstr(env, ARMInstr_Imm32( argregs[i], 0xAA ));
780             continue;
781          }
782          /* None of these insns, including any spill code that might
783             be generated, may alter the condition codes. */
784          addInstr( env, mk_iMOVds_RR( argregs[i], tmpregs[i] ) );
785       }
786 
787    }
788 
789    /* Should be assured by checks above */
790    vassert(nextArgReg <= ARM_N_ARGREGS);
791 
792    /* Do final checks, set the return values, and generate the call
793       instruction proper. */
794    vassert(nGSPTRs == 0 || nGSPTRs == 1);
795    vassert(nVECRETs == ((retTy == Ity_V128 || retTy == Ity_V256) ? 1 : 0));
796    vassert(*stackAdjustAfterCall == 0);
797    vassert(is_RetLoc_INVALID(*retloc));
798    switch (retTy) {
799       case Ity_INVALID:
800          /* Function doesn't return a value. */
801          *retloc = mk_RetLoc_simple(RLPri_None);
802          break;
803       case Ity_I64:
804          *retloc = mk_RetLoc_simple(RLPri_2Int);
805          break;
806       case Ity_I32: case Ity_I16: case Ity_I8:
807          *retloc = mk_RetLoc_simple(RLPri_Int);
808          break;
809       case Ity_V128:
810          vassert(0); // ATC
811          *retloc = mk_RetLoc_spRel(RLPri_V128SpRel, 0);
812          *stackAdjustAfterCall = 16;
813          break;
814       case Ity_V256:
815          vassert(0); // ATC
816          *retloc = mk_RetLoc_spRel(RLPri_V256SpRel, 0);
817          *stackAdjustAfterCall = 32;
818          break;
819       default:
820          /* IR can denote other possible return types, but we don't
821             handle those here. */
822          vassert(0);
823    }
824 
825    /* Finally, generate the call itself.  This needs the *retloc value
826       set in the switch above, which is why it's at the end. */
827 
828    /* nextArgReg doles out argument registers.  Since these are
829       assigned in the order r0, r1, r2, r3, its numeric value at this
830       point, which must be between 0 and 4 inclusive, is going to be
831       equal to the number of arg regs in use for the call.  Hence bake
832       that number into the call (we'll need to know it when doing
833       register allocation, to know what regs the call reads.)
834 
835       There is a bit of a twist -- harmless but worth recording.
836       Suppose the arg types are (Ity_I32, Ity_I64).  Then we will have
837       the first arg in r0 and the second in r3:r2, but r1 isn't used.
838       We nevertheless have nextArgReg==4 and bake that into the call
839       instruction.  This will mean the register allocator wil believe
840       this insn reads r1 when in fact it doesn't.  But that's
841       harmless; it just artificially extends the live range of r1
842       unnecessarily.  The best fix would be to put into the
843       instruction, a bitmask indicating which of r0/1/2/3 carry live
844       values.  But that's too much hassle. */
845 
846    target = (Addr)cee->addr;
847    addInstr(env, ARMInstr_Call( cc, target, nextArgReg, *retloc ));
848 
849    return True; /* success */
850 }
851 
852 
853 /*---------------------------------------------------------*/
854 /*--- ISEL: Integer expressions (32/16/8 bit)           ---*/
855 /*---------------------------------------------------------*/
856 
857 /* Select insns for an integer-typed expression, and add them to the
858    code list.  Return a reg holding the result.  This reg will be a
859    virtual register.  THE RETURNED REG MUST NOT BE MODIFIED.  If you
860    want to modify it, ask for a new vreg, copy it in there, and modify
861    the copy.  The register allocator will do its best to map both
862    vregs to the same real register, so the copies will often disappear
863    later in the game.
864 
865    This should handle expressions of 32, 16 and 8-bit type.  All
866    results are returned in a 32-bit register.  For 16- and 8-bit
867    expressions, the upper 16/24 bits are arbitrary, so you should mask
868    or sign extend partial values if necessary.
869 */
870 
871 /* --------------------- AMode1 --------------------- */
872 
873 /* Return an AMode1 which computes the value of the specified
874    expression, possibly also adding insns to the code list as a
875    result.  The expression may only be a 32-bit one.
876 */
877 
sane_AMode1(ARMAMode1 * am)878 static Bool sane_AMode1 ( ARMAMode1* am )
879 {
880    switch (am->tag) {
881       case ARMam1_RI:
882          return
883             toBool( hregClass(am->ARMam1.RI.reg) == HRcInt32
884                     && (hregIsVirtual(am->ARMam1.RI.reg)
885                         || sameHReg(am->ARMam1.RI.reg, hregARM_R8()))
886                     && am->ARMam1.RI.simm13 >= -4095
887                     && am->ARMam1.RI.simm13 <= 4095 );
888       case ARMam1_RRS:
889          return
890             toBool( hregClass(am->ARMam1.RRS.base) == HRcInt32
891                     && hregIsVirtual(am->ARMam1.RRS.base)
892                     && hregClass(am->ARMam1.RRS.index) == HRcInt32
893                     && hregIsVirtual(am->ARMam1.RRS.index)
894                     && am->ARMam1.RRS.shift >= 0
895                     && am->ARMam1.RRS.shift <= 3 );
896       default:
897          vpanic("sane_AMode: unknown ARM AMode1 tag");
898    }
899 }
900 
iselIntExpr_AMode1(ISelEnv * env,IRExpr * e)901 static ARMAMode1* iselIntExpr_AMode1 ( ISelEnv* env, IRExpr* e )
902 {
903    ARMAMode1* am = iselIntExpr_AMode1_wrk(env, e);
904    vassert(sane_AMode1(am));
905    return am;
906 }
907 
iselIntExpr_AMode1_wrk(ISelEnv * env,IRExpr * e)908 static ARMAMode1* iselIntExpr_AMode1_wrk ( ISelEnv* env, IRExpr* e )
909 {
910    IRType ty = typeOfIRExpr(env->type_env,e);
911    vassert(ty == Ity_I32);
912 
913    /* FIXME: add RRS matching */
914 
915    /* {Add32,Sub32}(expr,simm13) */
916    if (e->tag == Iex_Binop
917        && (e->Iex.Binop.op == Iop_Add32 || e->Iex.Binop.op == Iop_Sub32)
918        && e->Iex.Binop.arg2->tag == Iex_Const
919        && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
920       Int simm = (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
921       if (simm >= -4095 && simm <= 4095) {
922          HReg reg;
923          if (e->Iex.Binop.op == Iop_Sub32)
924             simm = -simm;
925          reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
926          return ARMAMode1_RI(reg, simm);
927       }
928    }
929 
930    /* Doesn't match anything in particular.  Generate it into
931       a register and use that. */
932    {
933       HReg reg = iselIntExpr_R(env, e);
934       return ARMAMode1_RI(reg, 0);
935    }
936 
937 }
938 
939 
940 /* --------------------- AMode2 --------------------- */
941 
942 /* Return an AMode2 which computes the value of the specified
943    expression, possibly also adding insns to the code list as a
944    result.  The expression may only be a 32-bit one.
945 */
946 
sane_AMode2(ARMAMode2 * am)947 static Bool sane_AMode2 ( ARMAMode2* am )
948 {
949    switch (am->tag) {
950       case ARMam2_RI:
951          return
952             toBool( hregClass(am->ARMam2.RI.reg) == HRcInt32
953                     && hregIsVirtual(am->ARMam2.RI.reg)
954                     && am->ARMam2.RI.simm9 >= -255
955                     && am->ARMam2.RI.simm9 <= 255 );
956       case ARMam2_RR:
957          return
958             toBool( hregClass(am->ARMam2.RR.base) == HRcInt32
959                     && hregIsVirtual(am->ARMam2.RR.base)
960                     && hregClass(am->ARMam2.RR.index) == HRcInt32
961                     && hregIsVirtual(am->ARMam2.RR.index) );
962       default:
963          vpanic("sane_AMode: unknown ARM AMode2 tag");
964    }
965 }
966 
iselIntExpr_AMode2(ISelEnv * env,IRExpr * e)967 static ARMAMode2* iselIntExpr_AMode2 ( ISelEnv* env, IRExpr* e )
968 {
969    ARMAMode2* am = iselIntExpr_AMode2_wrk(env, e);
970    vassert(sane_AMode2(am));
971    return am;
972 }
973 
iselIntExpr_AMode2_wrk(ISelEnv * env,IRExpr * e)974 static ARMAMode2* iselIntExpr_AMode2_wrk ( ISelEnv* env, IRExpr* e )
975 {
976    IRType ty = typeOfIRExpr(env->type_env,e);
977    vassert(ty == Ity_I32);
978 
979    /* FIXME: add RR matching */
980 
981    /* {Add32,Sub32}(expr,simm8) */
982    if (e->tag == Iex_Binop
983        && (e->Iex.Binop.op == Iop_Add32 || e->Iex.Binop.op == Iop_Sub32)
984        && e->Iex.Binop.arg2->tag == Iex_Const
985        && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
986       Int simm = (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
987       if (simm >= -255 && simm <= 255) {
988          HReg reg;
989          if (e->Iex.Binop.op == Iop_Sub32)
990             simm = -simm;
991          reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
992          return ARMAMode2_RI(reg, simm);
993       }
994    }
995 
996    /* Doesn't match anything in particular.  Generate it into
997       a register and use that. */
998    {
999       HReg reg = iselIntExpr_R(env, e);
1000       return ARMAMode2_RI(reg, 0);
1001    }
1002 
1003 }
1004 
1005 
1006 /* --------------------- AModeV --------------------- */
1007 
1008 /* Return an AModeV which computes the value of the specified
1009    expression, possibly also adding insns to the code list as a
1010    result.  The expression may only be a 32-bit one.
1011 */
1012 
sane_AModeV(ARMAModeV * am)1013 static Bool sane_AModeV ( ARMAModeV* am )
1014 {
1015   return toBool( hregClass(am->reg) == HRcInt32
1016                  && hregIsVirtual(am->reg)
1017                  && am->simm11 >= -1020 && am->simm11 <= 1020
1018                  && 0 == (am->simm11 & 3) );
1019 }
1020 
iselIntExpr_AModeV(ISelEnv * env,IRExpr * e)1021 static ARMAModeV* iselIntExpr_AModeV ( ISelEnv* env, IRExpr* e )
1022 {
1023    ARMAModeV* am = iselIntExpr_AModeV_wrk(env, e);
1024    vassert(sane_AModeV(am));
1025    return am;
1026 }
1027 
iselIntExpr_AModeV_wrk(ISelEnv * env,IRExpr * e)1028 static ARMAModeV* iselIntExpr_AModeV_wrk ( ISelEnv* env, IRExpr* e )
1029 {
1030    IRType ty = typeOfIRExpr(env->type_env,e);
1031    vassert(ty == Ity_I32);
1032 
1033    /* {Add32,Sub32}(expr, simm8 << 2) */
1034    if (e->tag == Iex_Binop
1035        && (e->Iex.Binop.op == Iop_Add32 || e->Iex.Binop.op == Iop_Sub32)
1036        && e->Iex.Binop.arg2->tag == Iex_Const
1037        && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
1038       Int simm = (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
1039       if (simm >= -1020 && simm <= 1020 && 0 == (simm & 3)) {
1040          HReg reg;
1041          if (e->Iex.Binop.op == Iop_Sub32)
1042             simm = -simm;
1043          reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
1044          return mkARMAModeV(reg, simm);
1045       }
1046    }
1047 
1048    /* Doesn't match anything in particular.  Generate it into
1049       a register and use that. */
1050    {
1051       HReg reg = iselIntExpr_R(env, e);
1052       return mkARMAModeV(reg, 0);
1053    }
1054 
1055 }
1056 
1057 /* -------------------- AModeN -------------------- */
1058 
iselIntExpr_AModeN(ISelEnv * env,IRExpr * e)1059 static ARMAModeN* iselIntExpr_AModeN ( ISelEnv* env, IRExpr* e )
1060 {
1061    return iselIntExpr_AModeN_wrk(env, e);
1062 }
1063 
iselIntExpr_AModeN_wrk(ISelEnv * env,IRExpr * e)1064 static ARMAModeN* iselIntExpr_AModeN_wrk ( ISelEnv* env, IRExpr* e )
1065 {
1066    HReg reg = iselIntExpr_R(env, e);
1067    return mkARMAModeN_R(reg);
1068 }
1069 
1070 
1071 /* --------------------- RI84 --------------------- */
1072 
1073 /* Select instructions to generate 'e' into a RI84.  If mayInv is
1074    true, then the caller will also accept an I84 form that denotes
1075    'not e'.  In this case didInv may not be NULL, and *didInv is set
1076    to True.  This complication is so as to allow generation of an RI84
1077    which is suitable for use in either an AND or BIC instruction,
1078    without knowing (before this call) which one.
1079 */
iselIntExpr_RI84(Bool * didInv,Bool mayInv,ISelEnv * env,IRExpr * e)1080 static ARMRI84* iselIntExpr_RI84 ( /*OUT*/Bool* didInv, Bool mayInv,
1081                                    ISelEnv* env, IRExpr* e )
1082 {
1083    ARMRI84* ri;
1084    if (mayInv)
1085       vassert(didInv != NULL);
1086    ri = iselIntExpr_RI84_wrk(didInv, mayInv, env, e);
1087    /* sanity checks ... */
1088    switch (ri->tag) {
1089       case ARMri84_I84:
1090          return ri;
1091       case ARMri84_R:
1092          vassert(hregClass(ri->ARMri84.R.reg) == HRcInt32);
1093          vassert(hregIsVirtual(ri->ARMri84.R.reg));
1094          return ri;
1095       default:
1096          vpanic("iselIntExpr_RI84: unknown arm RI84 tag");
1097    }
1098 }
1099 
1100 /* DO NOT CALL THIS DIRECTLY ! */
iselIntExpr_RI84_wrk(Bool * didInv,Bool mayInv,ISelEnv * env,IRExpr * e)1101 static ARMRI84* iselIntExpr_RI84_wrk ( /*OUT*/Bool* didInv, Bool mayInv,
1102                                        ISelEnv* env, IRExpr* e )
1103 {
1104    IRType ty = typeOfIRExpr(env->type_env,e);
1105    vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1106 
1107    if (didInv) *didInv = False;
1108 
1109    /* special case: immediate */
1110    if (e->tag == Iex_Const) {
1111       UInt u, u8 = 0x100, u4 = 0x10; /* both invalid */
1112       switch (e->Iex.Const.con->tag) {
1113          case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
1114          case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
1115          case Ico_U8:  u = 0xFF   & (e->Iex.Const.con->Ico.U8); break;
1116          default: vpanic("iselIntExpr_RI84.Iex_Const(armh)");
1117       }
1118       if (fitsIn8x4(&u8, &u4, u)) {
1119          return ARMRI84_I84( (UShort)u8, (UShort)u4 );
1120       }
1121       if (mayInv && fitsIn8x4(&u8, &u4, ~u)) {
1122          vassert(didInv);
1123          *didInv = True;
1124          return ARMRI84_I84( (UShort)u8, (UShort)u4 );
1125       }
1126       /* else fail, fall through to default case */
1127    }
1128 
1129    /* default case: calculate into a register and return that */
1130    {
1131       HReg r = iselIntExpr_R ( env, e );
1132       return ARMRI84_R(r);
1133    }
1134 }
1135 
1136 
1137 /* --------------------- RI5 --------------------- */
1138 
1139 /* Select instructions to generate 'e' into a RI5. */
1140 
iselIntExpr_RI5(ISelEnv * env,IRExpr * e)1141 static ARMRI5* iselIntExpr_RI5 ( ISelEnv* env, IRExpr* e )
1142 {
1143    ARMRI5* ri = iselIntExpr_RI5_wrk(env, e);
1144    /* sanity checks ... */
1145    switch (ri->tag) {
1146       case ARMri5_I5:
1147          return ri;
1148       case ARMri5_R:
1149          vassert(hregClass(ri->ARMri5.R.reg) == HRcInt32);
1150          vassert(hregIsVirtual(ri->ARMri5.R.reg));
1151          return ri;
1152       default:
1153          vpanic("iselIntExpr_RI5: unknown arm RI5 tag");
1154    }
1155 }
1156 
1157 /* DO NOT CALL THIS DIRECTLY ! */
iselIntExpr_RI5_wrk(ISelEnv * env,IRExpr * e)1158 static ARMRI5* iselIntExpr_RI5_wrk ( ISelEnv* env, IRExpr* e )
1159 {
1160    IRType ty = typeOfIRExpr(env->type_env,e);
1161    vassert(ty == Ity_I32 || ty == Ity_I8);
1162 
1163    /* special case: immediate */
1164    if (e->tag == Iex_Const) {
1165       UInt u; /* both invalid */
1166       switch (e->Iex.Const.con->tag) {
1167          case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
1168          case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
1169          case Ico_U8:  u = 0xFF   & (e->Iex.Const.con->Ico.U8); break;
1170          default: vpanic("iselIntExpr_RI5.Iex_Const(armh)");
1171       }
1172       if (u >= 1 && u <= 31) {
1173          return ARMRI5_I5(u);
1174       }
1175       /* else fail, fall through to default case */
1176    }
1177 
1178    /* default case: calculate into a register and return that */
1179    {
1180       HReg r = iselIntExpr_R ( env, e );
1181       return ARMRI5_R(r);
1182    }
1183 }
1184 
1185 
1186 /* ------------------- CondCode ------------------- */
1187 
1188 /* Generate code to evaluated a bit-typed expression, returning the
1189    condition code which would correspond when the expression would
1190    notionally have returned 1. */
1191 
iselCondCode(ISelEnv * env,IRExpr * e)1192 static ARMCondCode iselCondCode ( ISelEnv* env, IRExpr* e )
1193 {
1194    ARMCondCode cc = iselCondCode_wrk(env,e);
1195    vassert(cc != ARMcc_NV);
1196    return cc;
1197 }
1198 
iselCondCode_wrk(ISelEnv * env,IRExpr * e)1199 static ARMCondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e )
1200 {
1201    vassert(e);
1202    vassert(typeOfIRExpr(env->type_env,e) == Ity_I1);
1203 
1204    /* var */
1205    if (e->tag == Iex_RdTmp) {
1206       HReg rTmp = lookupIRTemp(env, e->Iex.RdTmp.tmp);
1207       /* CmpOrTst doesn't modify rTmp; so this is OK. */
1208       ARMRI84* one  = ARMRI84_I84(1,0);
1209       addInstr(env, ARMInstr_CmpOrTst(False/*test*/, rTmp, one));
1210       return ARMcc_NE;
1211    }
1212 
1213    /* Not1(e) */
1214    if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) {
1215       /* Generate code for the arg, and negate the test condition */
1216       return 1 ^ iselCondCode(env, e->Iex.Unop.arg);
1217    }
1218 
1219    /* --- patterns rooted at: 32to1 --- */
1220 
1221    if (e->tag == Iex_Unop
1222        && e->Iex.Unop.op == Iop_32to1) {
1223       HReg     rTmp = iselIntExpr_R(env, e->Iex.Unop.arg);
1224       ARMRI84* one  = ARMRI84_I84(1,0);
1225       addInstr(env, ARMInstr_CmpOrTst(False/*test*/, rTmp, one));
1226       return ARMcc_NE;
1227    }
1228 
1229    /* --- patterns rooted at: CmpNEZ8 --- */
1230 
1231    if (e->tag == Iex_Unop
1232        && e->Iex.Unop.op == Iop_CmpNEZ8) {
1233       HReg     r1   = iselIntExpr_R(env, e->Iex.Unop.arg);
1234       ARMRI84* xFF  = ARMRI84_I84(0xFF,0);
1235       addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r1, xFF));
1236       return ARMcc_NE;
1237    }
1238 
1239    /* --- patterns rooted at: CmpNEZ32 --- */
1240 
1241    if (e->tag == Iex_Unop
1242        && e->Iex.Unop.op == Iop_CmpNEZ32) {
1243       HReg     r1   = iselIntExpr_R(env, e->Iex.Unop.arg);
1244       ARMRI84* zero = ARMRI84_I84(0,0);
1245       addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, r1, zero));
1246       return ARMcc_NE;
1247    }
1248 
1249    /* --- patterns rooted at: CmpNEZ64 --- */
1250 
1251    if (e->tag == Iex_Unop
1252        && e->Iex.Unop.op == Iop_CmpNEZ64) {
1253       HReg     tHi, tLo;
1254       HReg     tmp  = newVRegI(env);
1255       ARMRI84* zero = ARMRI84_I84(0,0);
1256       iselInt64Expr(&tHi, &tLo, env, e->Iex.Unop.arg);
1257       addInstr(env, ARMInstr_Alu(ARMalu_OR, tmp, tHi, ARMRI84_R(tLo)));
1258       addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, tmp, zero));
1259       return ARMcc_NE;
1260    }
1261 
1262    /* --- Cmp*32*(x,y) --- */
1263    if (e->tag == Iex_Binop
1264        && (e->Iex.Binop.op == Iop_CmpEQ32
1265            || e->Iex.Binop.op == Iop_CmpNE32
1266            || e->Iex.Binop.op == Iop_CmpLT32S
1267            || e->Iex.Binop.op == Iop_CmpLT32U
1268            || e->Iex.Binop.op == Iop_CmpLE32S
1269            || e->Iex.Binop.op == Iop_CmpLE32U)) {
1270       HReg     argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1271       ARMRI84* argR = iselIntExpr_RI84(NULL,False,
1272                                        env, e->Iex.Binop.arg2);
1273       addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, argL, argR));
1274       switch (e->Iex.Binop.op) {
1275          case Iop_CmpEQ32:  return ARMcc_EQ;
1276          case Iop_CmpNE32:  return ARMcc_NE;
1277          case Iop_CmpLT32S: return ARMcc_LT;
1278          case Iop_CmpLT32U: return ARMcc_LO;
1279          case Iop_CmpLE32S: return ARMcc_LE;
1280          case Iop_CmpLE32U: return ARMcc_LS;
1281          default: vpanic("iselCondCode(arm): CmpXX32");
1282       }
1283    }
1284 
1285    /* const */
1286    /* Constant 1:Bit */
1287    if (e->tag == Iex_Const) {
1288       HReg r;
1289       vassert(e->Iex.Const.con->tag == Ico_U1);
1290       vassert(e->Iex.Const.con->Ico.U1 == True
1291               || e->Iex.Const.con->Ico.U1 == False);
1292       r = newVRegI(env);
1293       addInstr(env, ARMInstr_Imm32(r, 0));
1294       addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, r, ARMRI84_R(r)));
1295       return e->Iex.Const.con->Ico.U1 ? ARMcc_EQ : ARMcc_NE;
1296    }
1297 
1298    // JRS 2013-Jan-03: this seems completely nonsensical
1299    /* --- CasCmpEQ* --- */
1300    /* Ist_Cas has a dummy argument to compare with, so comparison is
1301       always true. */
1302    //if (e->tag == Iex_Binop
1303    //    && (e->Iex.Binop.op == Iop_CasCmpEQ32
1304    //        || e->Iex.Binop.op == Iop_CasCmpEQ16
1305    //        || e->Iex.Binop.op == Iop_CasCmpEQ8)) {
1306    //   return ARMcc_AL;
1307    //}
1308 
1309    ppIRExpr(e);
1310    vpanic("iselCondCode");
1311 }
1312 
1313 
1314 /* --------------------- Reg --------------------- */
1315 
iselIntExpr_R(ISelEnv * env,IRExpr * e)1316 static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e )
1317 {
1318    HReg r = iselIntExpr_R_wrk(env, e);
1319    /* sanity checks ... */
1320 #  if 0
1321    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
1322 #  endif
1323    vassert(hregClass(r) == HRcInt32);
1324    vassert(hregIsVirtual(r));
1325    return r;
1326 }
1327 
1328 /* DO NOT CALL THIS DIRECTLY ! */
iselIntExpr_R_wrk(ISelEnv * env,IRExpr * e)1329 static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
1330 {
1331    IRType ty = typeOfIRExpr(env->type_env,e);
1332    vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1333 
1334    switch (e->tag) {
1335 
1336    /* --------- TEMP --------- */
1337    case Iex_RdTmp: {
1338       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
1339    }
1340 
1341    /* --------- LOAD --------- */
1342    case Iex_Load: {
1343       HReg dst  = newVRegI(env);
1344 
1345       if (e->Iex.Load.end != Iend_LE)
1346          goto irreducible;
1347 
1348       if (ty == Ity_I32) {
1349          ARMAMode1* amode = iselIntExpr_AMode1 ( env, e->Iex.Load.addr );
1350          addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/, dst, amode));
1351          return dst;
1352       }
1353       if (ty == Ity_I16) {
1354          ARMAMode2* amode = iselIntExpr_AMode2 ( env, e->Iex.Load.addr );
1355          addInstr(env, ARMInstr_LdSt16(ARMcc_AL,
1356                                        True/*isLoad*/, False/*!signedLoad*/,
1357                                        dst, amode));
1358          return dst;
1359       }
1360       if (ty == Ity_I8) {
1361          ARMAMode1* amode = iselIntExpr_AMode1 ( env, e->Iex.Load.addr );
1362          addInstr(env, ARMInstr_LdSt8U(ARMcc_AL, True/*isLoad*/, dst, amode));
1363          return dst;
1364       }
1365       break;
1366    }
1367 
1368 //zz   /* --------- TERNARY OP --------- */
1369 //zz   case Iex_Triop: {
1370 //zz      IRTriop *triop = e->Iex.Triop.details;
1371 //zz      /* C3210 flags following FPU partial remainder (fprem), both
1372 //zz         IEEE compliant (PREM1) and non-IEEE compliant (PREM). */
1373 //zz      if (triop->op == Iop_PRemC3210F64
1374 //zz          || triop->op == Iop_PRem1C3210F64) {
1375 //zz         HReg junk = newVRegF(env);
1376 //zz         HReg dst  = newVRegI(env);
1377 //zz         HReg srcL = iselDblExpr(env, triop->arg2);
1378 //zz         HReg srcR = iselDblExpr(env, triop->arg3);
1379 //zz         /* XXXROUNDINGFIXME */
1380 //zz         /* set roundingmode here */
1381 //zz         addInstr(env, X86Instr_FpBinary(
1382 //zz                           e->Iex.Binop.op==Iop_PRemC3210F64
1383 //zz                              ? Xfp_PREM : Xfp_PREM1,
1384 //zz                           srcL,srcR,junk
1385 //zz                 ));
1386 //zz         /* The previous pseudo-insn will have left the FPU's C3210
1387 //zz            flags set correctly.  So bag them. */
1388 //zz         addInstr(env, X86Instr_FpStSW_AX());
1389 //zz         addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst));
1390 //zz         addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0x4700), dst));
1391 //zz         return dst;
1392 //zz      }
1393 //zz
1394 //zz      break;
1395 //zz   }
1396 
1397    /* --------- BINARY OP --------- */
1398    case Iex_Binop: {
1399 
1400       ARMAluOp   aop = 0; /* invalid */
1401       ARMShiftOp sop = 0; /* invalid */
1402 
1403       /* ADD/SUB/AND/OR/XOR */
1404       switch (e->Iex.Binop.op) {
1405          case Iop_And32: {
1406             Bool     didInv = False;
1407             HReg     dst    = newVRegI(env);
1408             HReg     argL   = iselIntExpr_R(env, e->Iex.Binop.arg1);
1409             ARMRI84* argR   = iselIntExpr_RI84(&didInv, True/*mayInv*/,
1410                                                env, e->Iex.Binop.arg2);
1411             addInstr(env, ARMInstr_Alu(didInv ? ARMalu_BIC : ARMalu_AND,
1412                                        dst, argL, argR));
1413             return dst;
1414          }
1415          case Iop_Or32:  aop = ARMalu_OR;  goto std_binop;
1416          case Iop_Xor32: aop = ARMalu_XOR; goto std_binop;
1417          case Iop_Sub32: aop = ARMalu_SUB; goto std_binop;
1418          case Iop_Add32: aop = ARMalu_ADD; goto std_binop;
1419          std_binop: {
1420             HReg     dst  = newVRegI(env);
1421             HReg     argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1422             ARMRI84* argR = iselIntExpr_RI84(NULL, False/*mayInv*/,
1423                                              env, e->Iex.Binop.arg2);
1424             addInstr(env, ARMInstr_Alu(aop, dst, argL, argR));
1425             return dst;
1426          }
1427          default: break;
1428       }
1429 
1430       /* SHL/SHR/SAR */
1431       switch (e->Iex.Binop.op) {
1432          case Iop_Shl32: sop = ARMsh_SHL; goto sh_binop;
1433          case Iop_Shr32: sop = ARMsh_SHR; goto sh_binop;
1434          case Iop_Sar32: sop = ARMsh_SAR; goto sh_binop;
1435          sh_binop: {
1436             HReg    dst  = newVRegI(env);
1437             HReg    argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1438             ARMRI5* argR = iselIntExpr_RI5(env, e->Iex.Binop.arg2);
1439             addInstr(env, ARMInstr_Shift(sop, dst, argL, argR));
1440             vassert(ty == Ity_I32); /* else the IR is ill-typed */
1441             return dst;
1442          }
1443          default: break;
1444       }
1445 
1446       /* MUL */
1447       if (e->Iex.Binop.op == Iop_Mul32) {
1448          HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1449          HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1450          HReg dst  = newVRegI(env);
1451          addInstr(env, mk_iMOVds_RR(hregARM_R2(), argL));
1452          addInstr(env, mk_iMOVds_RR(hregARM_R3(), argR));
1453          addInstr(env, ARMInstr_Mul(ARMmul_PLAIN));
1454          addInstr(env, mk_iMOVds_RR(dst, hregARM_R0()));
1455          return dst;
1456       }
1457 
1458       /* Handle misc other ops. */
1459 
1460       if (e->Iex.Binop.op == Iop_Max32U) {
1461          HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1462          HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1463          HReg dst  = newVRegI(env);
1464          addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, argL,
1465                                          ARMRI84_R(argR)));
1466          addInstr(env, mk_iMOVds_RR(dst, argL));
1467          addInstr(env, ARMInstr_CMov(ARMcc_LO, dst, ARMRI84_R(argR)));
1468          return dst;
1469       }
1470 
1471       if (e->Iex.Binop.op == Iop_CmpF64) {
1472          HReg dL = iselDblExpr(env, e->Iex.Binop.arg1);
1473          HReg dR = iselDblExpr(env, e->Iex.Binop.arg2);
1474          HReg dst = newVRegI(env);
1475          /* Do the compare (FCMPD) and set NZCV in FPSCR.  Then also do
1476             FMSTAT, so we can examine the results directly. */
1477          addInstr(env, ARMInstr_VCmpD(dL, dR));
1478          /* Create in dst, the IRCmpF64Result encoded result. */
1479          addInstr(env, ARMInstr_Imm32(dst, 0));
1480          addInstr(env, ARMInstr_CMov(ARMcc_EQ, dst, ARMRI84_I84(0x40,0))); //EQ
1481          addInstr(env, ARMInstr_CMov(ARMcc_MI, dst, ARMRI84_I84(0x01,0))); //LT
1482          addInstr(env, ARMInstr_CMov(ARMcc_GT, dst, ARMRI84_I84(0x00,0))); //GT
1483          addInstr(env, ARMInstr_CMov(ARMcc_VS, dst, ARMRI84_I84(0x45,0))); //UN
1484          return dst;
1485       }
1486 
1487       if (e->Iex.Binop.op == Iop_F64toI32S
1488           || e->Iex.Binop.op == Iop_F64toI32U) {
1489          /* Wretched uglyness all round, due to having to deal
1490             with rounding modes.  Oh well. */
1491          /* FIXME: if arg1 is a constant indicating round-to-zero,
1492             then we could skip all this arsing around with FPSCR and
1493             simply emit FTO{S,U}IZD. */
1494          Bool syned = e->Iex.Binop.op == Iop_F64toI32S;
1495          HReg valD  = iselDblExpr(env, e->Iex.Binop.arg2);
1496          set_VFP_rounding_mode(env, e->Iex.Binop.arg1);
1497          /* FTO{S,U}ID valF, valD */
1498          HReg valF = newVRegF(env);
1499          addInstr(env, ARMInstr_VCvtID(False/*!iToD*/, syned,
1500                                        valF, valD));
1501          set_VFP_rounding_default(env);
1502          /* VMOV dst, valF */
1503          HReg dst = newVRegI(env);
1504          addInstr(env, ARMInstr_VXferS(False/*!toS*/, valF, dst));
1505          return dst;
1506       }
1507 
1508       if (e->Iex.Binop.op == Iop_GetElem8x8
1509           || e->Iex.Binop.op == Iop_GetElem16x4
1510           || e->Iex.Binop.op == Iop_GetElem32x2) {
1511          if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
1512             HReg res = newVRegI(env);
1513             HReg arg = iselNeon64Expr(env, e->Iex.Binop.arg1);
1514             UInt index, size;
1515             if (e->Iex.Binop.arg2->tag != Iex_Const ||
1516                 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
1517                vpanic("ARM target supports GetElem with constant "
1518                       "second argument only (neon)\n");
1519             }
1520             index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1521             switch (e->Iex.Binop.op) {
1522                case Iop_GetElem8x8: vassert(index < 8); size = 0; break;
1523                case Iop_GetElem16x4: vassert(index < 4); size = 1; break;
1524                case Iop_GetElem32x2: vassert(index < 2); size = 2; break;
1525                default: vassert(0);
1526             }
1527             addInstr(env, ARMInstr_NUnaryS(ARMneon_GETELEMS,
1528                                            mkARMNRS(ARMNRS_Reg, res, 0),
1529                                            mkARMNRS(ARMNRS_Scalar, arg, index),
1530                                            size, False));
1531             return res;
1532          }
1533       }
1534 
1535       if (e->Iex.Binop.op == Iop_GetElem32x2
1536           && e->Iex.Binop.arg2->tag == Iex_Const
1537           && !(env->hwcaps & VEX_HWCAPS_ARM_NEON)) {
1538          /* We may have to do GetElem32x2 on a non-NEON capable
1539             target. */
1540          IRConst* con = e->Iex.Binop.arg2->Iex.Const.con;
1541          vassert(con->tag == Ico_U8); /* else IR is ill-typed */
1542          UInt index = con->Ico.U8;
1543          if (index >= 0 && index <= 1) {
1544             HReg rHi, rLo;
1545             iselInt64Expr(&rHi, &rLo, env, e->Iex.Binop.arg1);
1546             return index == 0 ? rLo : rHi;
1547          }
1548       }
1549 
1550       if (e->Iex.Binop.op == Iop_GetElem8x16
1551           || e->Iex.Binop.op == Iop_GetElem16x8
1552           || e->Iex.Binop.op == Iop_GetElem32x4) {
1553          if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
1554             HReg res = newVRegI(env);
1555             HReg arg = iselNeonExpr(env, e->Iex.Binop.arg1);
1556             UInt index, size;
1557             if (e->Iex.Binop.arg2->tag != Iex_Const ||
1558                 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
1559                vpanic("ARM target supports GetElem with constant "
1560                       "second argument only (neon)\n");
1561             }
1562             index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1563             switch (e->Iex.Binop.op) {
1564                case Iop_GetElem8x16: vassert(index < 16); size = 0; break;
1565                case Iop_GetElem16x8: vassert(index < 8); size = 1; break;
1566                case Iop_GetElem32x4: vassert(index < 4); size = 2; break;
1567                default: vassert(0);
1568             }
1569             addInstr(env, ARMInstr_NUnaryS(ARMneon_GETELEMS,
1570                                            mkARMNRS(ARMNRS_Reg, res, 0),
1571                                            mkARMNRS(ARMNRS_Scalar, arg, index),
1572                                            size, True));
1573             return res;
1574          }
1575       }
1576 
1577       /* All cases involving host-side helper calls. */
1578       void* fn = NULL;
1579       switch (e->Iex.Binop.op) {
1580          case Iop_Add16x2:
1581             fn = &h_generic_calc_Add16x2; break;
1582          case Iop_Sub16x2:
1583             fn = &h_generic_calc_Sub16x2; break;
1584          case Iop_HAdd16Ux2:
1585             fn = &h_generic_calc_HAdd16Ux2; break;
1586          case Iop_HAdd16Sx2:
1587             fn = &h_generic_calc_HAdd16Sx2; break;
1588          case Iop_HSub16Ux2:
1589             fn = &h_generic_calc_HSub16Ux2; break;
1590          case Iop_HSub16Sx2:
1591             fn = &h_generic_calc_HSub16Sx2; break;
1592          case Iop_QAdd16Sx2:
1593             fn = &h_generic_calc_QAdd16Sx2; break;
1594          case Iop_QAdd16Ux2:
1595             fn = &h_generic_calc_QAdd16Ux2; break;
1596          case Iop_QSub16Sx2:
1597             fn = &h_generic_calc_QSub16Sx2; break;
1598          case Iop_Add8x4:
1599             fn = &h_generic_calc_Add8x4; break;
1600          case Iop_Sub8x4:
1601             fn = &h_generic_calc_Sub8x4; break;
1602          case Iop_HAdd8Ux4:
1603             fn = &h_generic_calc_HAdd8Ux4; break;
1604          case Iop_HAdd8Sx4:
1605             fn = &h_generic_calc_HAdd8Sx4; break;
1606          case Iop_HSub8Ux4:
1607             fn = &h_generic_calc_HSub8Ux4; break;
1608          case Iop_HSub8Sx4:
1609             fn = &h_generic_calc_HSub8Sx4; break;
1610          case Iop_QAdd8Sx4:
1611             fn = &h_generic_calc_QAdd8Sx4; break;
1612          case Iop_QAdd8Ux4:
1613             fn = &h_generic_calc_QAdd8Ux4; break;
1614          case Iop_QSub8Sx4:
1615             fn = &h_generic_calc_QSub8Sx4; break;
1616          case Iop_QSub8Ux4:
1617             fn = &h_generic_calc_QSub8Ux4; break;
1618          case Iop_Sad8Ux4:
1619             fn = &h_generic_calc_Sad8Ux4; break;
1620          case Iop_QAdd32S:
1621             fn = &h_generic_calc_QAdd32S; break;
1622          case Iop_QSub32S:
1623             fn = &h_generic_calc_QSub32S; break;
1624          case Iop_QSub16Ux2:
1625             fn = &h_generic_calc_QSub16Ux2; break;
1626          case Iop_DivU32:
1627             fn = &h_calc_udiv32_w_arm_semantics; break;
1628          case Iop_DivS32:
1629             fn = &h_calc_sdiv32_w_arm_semantics; break;
1630          default:
1631             break;
1632       }
1633 
1634       if (fn) {
1635          HReg regL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1636          HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1637          HReg res  = newVRegI(env);
1638          addInstr(env, mk_iMOVds_RR(hregARM_R0(), regL));
1639          addInstr(env, mk_iMOVds_RR(hregARM_R1(), regR));
1640          addInstr(env, ARMInstr_Call( ARMcc_AL, (Addr)fn,
1641                                       2, mk_RetLoc_simple(RLPri_Int) ));
1642          addInstr(env, mk_iMOVds_RR(res, hregARM_R0()));
1643          return res;
1644       }
1645 
1646       break;
1647    }
1648 
1649    /* --------- UNARY OP --------- */
1650    case Iex_Unop: {
1651 
1652 //zz      /* 1Uto8(32to1(expr32)) */
1653 //zz      if (e->Iex.Unop.op == Iop_1Uto8) {
1654 //zz         DECLARE_PATTERN(p_32to1_then_1Uto8);
1655 //zz         DEFINE_PATTERN(p_32to1_then_1Uto8,
1656 //zz                        unop(Iop_1Uto8,unop(Iop_32to1,bind(0))));
1657 //zz         if (matchIRExpr(&mi,p_32to1_then_1Uto8,e)) {
1658 //zz            const IRExpr* expr32 = mi.bindee[0];
1659 //zz            HReg dst = newVRegI(env);
1660 //zz            HReg src = iselIntExpr_R(env, expr32);
1661 //zz            addInstr(env, mk_iMOVsd_RR(src,dst) );
1662 //zz            addInstr(env, X86Instr_Alu32R(Xalu_AND,
1663 //zz                                          X86RMI_Imm(1), dst));
1664 //zz            return dst;
1665 //zz         }
1666 //zz      }
1667 //zz
1668 //zz      /* 8Uto32(LDle(expr32)) */
1669 //zz      if (e->Iex.Unop.op == Iop_8Uto32) {
1670 //zz         DECLARE_PATTERN(p_LDle8_then_8Uto32);
1671 //zz         DEFINE_PATTERN(p_LDle8_then_8Uto32,
1672 //zz                        unop(Iop_8Uto32,
1673 //zz                             IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
1674 //zz         if (matchIRExpr(&mi,p_LDle8_then_8Uto32,e)) {
1675 //zz            HReg dst = newVRegI(env);
1676 //zz            X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1677 //zz            addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
1678 //zz            return dst;
1679 //zz         }
1680 //zz      }
1681 //zz
1682 //zz      /* 8Sto32(LDle(expr32)) */
1683 //zz      if (e->Iex.Unop.op == Iop_8Sto32) {
1684 //zz         DECLARE_PATTERN(p_LDle8_then_8Sto32);
1685 //zz         DEFINE_PATTERN(p_LDle8_then_8Sto32,
1686 //zz                        unop(Iop_8Sto32,
1687 //zz                             IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
1688 //zz         if (matchIRExpr(&mi,p_LDle8_then_8Sto32,e)) {
1689 //zz            HReg dst = newVRegI(env);
1690 //zz            X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1691 //zz            addInstr(env, X86Instr_LoadEX(1,True,amode,dst));
1692 //zz            return dst;
1693 //zz         }
1694 //zz      }
1695 //zz
1696 //zz      /* 16Uto32(LDle(expr32)) */
1697 //zz      if (e->Iex.Unop.op == Iop_16Uto32) {
1698 //zz         DECLARE_PATTERN(p_LDle16_then_16Uto32);
1699 //zz         DEFINE_PATTERN(p_LDle16_then_16Uto32,
1700 //zz                        unop(Iop_16Uto32,
1701 //zz                             IRExpr_Load(Iend_LE,Ity_I16,bind(0))) );
1702 //zz         if (matchIRExpr(&mi,p_LDle16_then_16Uto32,e)) {
1703 //zz            HReg dst = newVRegI(env);
1704 //zz            X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1705 //zz            addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
1706 //zz            return dst;
1707 //zz         }
1708 //zz      }
1709 //zz
1710 //zz      /* 8Uto32(GET:I8) */
1711 //zz      if (e->Iex.Unop.op == Iop_8Uto32) {
1712 //zz         if (e->Iex.Unop.arg->tag == Iex_Get) {
1713 //zz            HReg      dst;
1714 //zz            X86AMode* amode;
1715 //zz            vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I8);
1716 //zz            dst = newVRegI(env);
1717 //zz            amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
1718 //zz                                hregX86_EBP());
1719 //zz            addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
1720 //zz            return dst;
1721 //zz         }
1722 //zz      }
1723 //zz
1724 //zz      /* 16to32(GET:I16) */
1725 //zz      if (e->Iex.Unop.op == Iop_16Uto32) {
1726 //zz         if (e->Iex.Unop.arg->tag == Iex_Get) {
1727 //zz            HReg      dst;
1728 //zz            X86AMode* amode;
1729 //zz            vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I16);
1730 //zz            dst = newVRegI(env);
1731 //zz            amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
1732 //zz                                hregX86_EBP());
1733 //zz            addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
1734 //zz            return dst;
1735 //zz         }
1736 //zz      }
1737 
1738       switch (e->Iex.Unop.op) {
1739          case Iop_8Uto32: {
1740             HReg dst = newVRegI(env);
1741             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1742             addInstr(env, ARMInstr_Alu(ARMalu_AND,
1743                                        dst, src, ARMRI84_I84(0xFF,0)));
1744             return dst;
1745          }
1746 //zz         case Iop_8Uto16:
1747 //zz         case Iop_8Uto32:
1748 //zz         case Iop_16Uto32: {
1749 //zz            HReg dst = newVRegI(env);
1750 //zz            HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1751 //zz            UInt mask = e->Iex.Unop.op==Iop_16Uto32 ? 0xFFFF : 0xFF;
1752 //zz            addInstr(env, mk_iMOVsd_RR(src,dst) );
1753 //zz            addInstr(env, X86Instr_Alu32R(Xalu_AND,
1754 //zz                                          X86RMI_Imm(mask), dst));
1755 //zz            return dst;
1756 //zz         }
1757 //zz         case Iop_8Sto16:
1758 //zz         case Iop_8Sto32:
1759          case Iop_16Uto32: {
1760             HReg dst = newVRegI(env);
1761             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1762             ARMRI5* amt = ARMRI5_I5(16);
1763             addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, src, amt));
1764             addInstr(env, ARMInstr_Shift(ARMsh_SHR, dst, dst, amt));
1765             return dst;
1766          }
1767          case Iop_8Sto32:
1768          case Iop_16Sto32: {
1769             HReg dst = newVRegI(env);
1770             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1771             ARMRI5* amt = ARMRI5_I5(e->Iex.Unop.op==Iop_16Sto32 ? 16 : 24);
1772             addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, src, amt));
1773             addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt));
1774             return dst;
1775          }
1776 //zz         case Iop_Not8:
1777 //zz         case Iop_Not16:
1778          case Iop_Not32: {
1779             HReg dst = newVRegI(env);
1780             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1781             addInstr(env, ARMInstr_Unary(ARMun_NOT, dst, src));
1782             return dst;
1783          }
1784          case Iop_64HIto32: {
1785             HReg rHi, rLo;
1786             iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1787             return rHi; /* and abandon rLo .. poor wee thing :-) */
1788          }
1789          case Iop_64to32: {
1790             HReg rHi, rLo;
1791             iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1792             return rLo; /* similar stupid comment to the above ... */
1793          }
1794          case Iop_64to8: {
1795             HReg rHi, rLo;
1796             if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
1797                HReg tHi = newVRegI(env);
1798                HReg tLo = newVRegI(env);
1799                HReg tmp = iselNeon64Expr(env, e->Iex.Unop.arg);
1800                addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
1801                rHi = tHi;
1802                rLo = tLo;
1803             } else {
1804                iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1805             }
1806             return rLo;
1807          }
1808 
1809          case Iop_1Uto32:
1810             /* 1Uto32(tmp).  Since I1 values generated into registers
1811                are guaranteed to have value either only zero or one,
1812                we can simply return the value of the register in this
1813                case. */
1814             if (e->Iex.Unop.arg->tag == Iex_RdTmp) {
1815                HReg dst = lookupIRTemp(env, e->Iex.Unop.arg->Iex.RdTmp.tmp);
1816                return dst;
1817             }
1818             /* else fall through */
1819          case Iop_1Uto8: {
1820             HReg        dst  = newVRegI(env);
1821             ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1822             addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
1823             addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
1824             return dst;
1825          }
1826 
1827          case Iop_1Sto32: {
1828             HReg        dst  = newVRegI(env);
1829             ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1830             ARMRI5*     amt  = ARMRI5_I5(31);
1831             /* This is really rough.  We could do much better here;
1832                perhaps mvn{cond} dst, #0 as the second insn?
1833                (same applies to 1Sto64) */
1834             addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
1835             addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
1836             addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, dst, amt));
1837             addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt));
1838             return dst;
1839          }
1840 
1841 
1842 //zz         case Iop_1Sto8:
1843 //zz         case Iop_1Sto16:
1844 //zz         case Iop_1Sto32: {
1845 //zz            /* could do better than this, but for now ... */
1846 //zz            HReg dst         = newVRegI(env);
1847 //zz            X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1848 //zz            addInstr(env, X86Instr_Set32(cond,dst));
1849 //zz            addInstr(env, X86Instr_Sh32(Xsh_SHL, 31, dst));
1850 //zz            addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, dst));
1851 //zz            return dst;
1852 //zz         }
1853 //zz         case Iop_Ctz32: {
1854 //zz            /* Count trailing zeroes, implemented by x86 'bsfl' */
1855 //zz            HReg dst = newVRegI(env);
1856 //zz            HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1857 //zz            addInstr(env, X86Instr_Bsfr32(True,src,dst));
1858 //zz            return dst;
1859 //zz         }
1860          case Iop_Clz32: {
1861             /* Count leading zeroes; easy on ARM. */
1862             HReg dst = newVRegI(env);
1863             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1864             addInstr(env, ARMInstr_Unary(ARMun_CLZ, dst, src));
1865             return dst;
1866          }
1867 
1868          case Iop_CmpwNEZ32: {
1869             HReg dst = newVRegI(env);
1870             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1871             addInstr(env, ARMInstr_Unary(ARMun_NEG, dst, src));
1872             addInstr(env, ARMInstr_Alu(ARMalu_OR, dst, dst, ARMRI84_R(src)));
1873             addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, ARMRI5_I5(31)));
1874             return dst;
1875          }
1876 
1877          case Iop_Left32: {
1878             HReg dst = newVRegI(env);
1879             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1880             addInstr(env, ARMInstr_Unary(ARMun_NEG, dst, src));
1881             addInstr(env, ARMInstr_Alu(ARMalu_OR, dst, dst, ARMRI84_R(src)));
1882             return dst;
1883          }
1884 
1885 //zz         case Iop_V128to32: {
1886 //zz            HReg      dst  = newVRegI(env);
1887 //zz            HReg      vec  = iselVecExpr(env, e->Iex.Unop.arg);
1888 //zz            X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
1889 //zz            sub_from_esp(env, 16);
1890 //zz            addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, esp0));
1891 //zz            addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(esp0), dst ));
1892 //zz            add_to_esp(env, 16);
1893 //zz            return dst;
1894 //zz         }
1895 //zz
1896          case Iop_ReinterpF32asI32: {
1897             HReg dst = newVRegI(env);
1898             HReg src = iselFltExpr(env, e->Iex.Unop.arg);
1899             addInstr(env, ARMInstr_VXferS(False/*!toS*/, src, dst));
1900             return dst;
1901          }
1902 
1903 //zz
1904 //zz         case Iop_16to8:
1905          case Iop_32to8:
1906          case Iop_32to16:
1907             /* These are no-ops. */
1908             return iselIntExpr_R(env, e->Iex.Unop.arg);
1909 
1910          default:
1911             break;
1912       }
1913 
1914       /* All Unop cases involving host-side helper calls. */
1915       void* fn = NULL;
1916       switch (e->Iex.Unop.op) {
1917          case Iop_CmpNEZ16x2:
1918             fn = &h_generic_calc_CmpNEZ16x2; break;
1919          case Iop_CmpNEZ8x4:
1920             fn = &h_generic_calc_CmpNEZ8x4; break;
1921          default:
1922             break;
1923       }
1924 
1925       if (fn) {
1926          HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
1927          HReg res = newVRegI(env);
1928          addInstr(env, mk_iMOVds_RR(hregARM_R0(), arg));
1929          addInstr(env, ARMInstr_Call( ARMcc_AL, (Addr)fn,
1930                                       1, mk_RetLoc_simple(RLPri_Int) ));
1931          addInstr(env, mk_iMOVds_RR(res, hregARM_R0()));
1932          return res;
1933       }
1934 
1935       break;
1936    }
1937 
1938    /* --------- GET --------- */
1939    case Iex_Get: {
1940       if (ty == Ity_I32
1941           && 0 == (e->Iex.Get.offset & 3)
1942           && e->Iex.Get.offset < 4096-4) {
1943          HReg dst = newVRegI(env);
1944          addInstr(env, ARMInstr_LdSt32(
1945                           ARMcc_AL, True/*isLoad*/,
1946                           dst,
1947                           ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset)));
1948          return dst;
1949       }
1950 //zz      if (ty == Ity_I8 || ty == Ity_I16) {
1951 //zz         HReg dst = newVRegI(env);
1952 //zz         addInstr(env, X86Instr_LoadEX(
1953 //zz                          toUChar(ty==Ity_I8 ? 1 : 2),
1954 //zz                          False,
1955 //zz                          X86AMode_IR(e->Iex.Get.offset,hregX86_EBP()),
1956 //zz                          dst));
1957 //zz         return dst;
1958 //zz      }
1959       break;
1960    }
1961 
1962 //zz   case Iex_GetI: {
1963 //zz      X86AMode* am
1964 //zz         = genGuestArrayOffset(
1965 //zz              env, e->Iex.GetI.descr,
1966 //zz                   e->Iex.GetI.ix, e->Iex.GetI.bias );
1967 //zz      HReg dst = newVRegI(env);
1968 //zz      if (ty == Ity_I8) {
1969 //zz         addInstr(env, X86Instr_LoadEX( 1, False, am, dst ));
1970 //zz         return dst;
1971 //zz      }
1972 //zz      if (ty == Ity_I32) {
1973 //zz         addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(am), dst));
1974 //zz         return dst;
1975 //zz      }
1976 //zz      break;
1977 //zz   }
1978 
1979    /* --------- CCALL --------- */
1980    case Iex_CCall: {
1981       HReg    dst = newVRegI(env);
1982       vassert(ty == e->Iex.CCall.retty);
1983 
1984       /* be very restrictive for now.  Only 32/64-bit ints allowed for
1985          args, and 32 bits for return type.  Don't forget to change
1986          the RetLoc if more types are allowed in future. */
1987       if (e->Iex.CCall.retty != Ity_I32)
1988          goto irreducible;
1989 
1990       /* Marshal args, do the call, clear stack. */
1991       UInt   addToSp = 0;
1992       RetLoc rloc    = mk_RetLoc_INVALID();
1993       Bool   ok      = doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
1994                                      e->Iex.CCall.cee, e->Iex.CCall.retty,
1995                                      e->Iex.CCall.args );
1996       /* */
1997       if (ok) {
1998          vassert(is_sane_RetLoc(rloc));
1999          vassert(rloc.pri == RLPri_Int);
2000          vassert(addToSp == 0);
2001          addInstr(env, mk_iMOVds_RR(dst, hregARM_R0()));
2002          return dst;
2003       }
2004       /* else fall through; will hit the irreducible: label */
2005    }
2006 
2007    /* --------- LITERAL --------- */
2008    /* 32 literals */
2009    case Iex_Const: {
2010       UInt u   = 0;
2011       HReg dst = newVRegI(env);
2012       switch (e->Iex.Const.con->tag) {
2013          case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
2014          case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
2015          case Ico_U8:  u = 0xFF   & (e->Iex.Const.con->Ico.U8); break;
2016          default: ppIRExpr(e); vpanic("iselIntExpr_R.Iex_Const(arm)");
2017       }
2018       addInstr(env, ARMInstr_Imm32(dst, u));
2019       return dst;
2020    }
2021 
2022    /* --------- MULTIPLEX --------- */
2023    case Iex_ITE: { // VFD
2024       /* ITE(ccexpr, iftrue, iffalse) */
2025       if (ty == Ity_I32) {
2026          ARMCondCode cc;
2027          HReg     r1  = iselIntExpr_R(env, e->Iex.ITE.iftrue);
2028          ARMRI84* r0  = iselIntExpr_RI84(NULL, False, env, e->Iex.ITE.iffalse);
2029          HReg     dst = newVRegI(env);
2030          addInstr(env, mk_iMOVds_RR(dst, r1));
2031          cc = iselCondCode(env, e->Iex.ITE.cond);
2032          addInstr(env, ARMInstr_CMov(cc ^ 1, dst, r0));
2033          return dst;
2034       }
2035       break;
2036    }
2037 
2038    default:
2039    break;
2040    } /* switch (e->tag) */
2041 
2042    /* We get here if no pattern matched. */
2043   irreducible:
2044    ppIRExpr(e);
2045    vpanic("iselIntExpr_R: cannot reduce tree");
2046 }
2047 
2048 
2049 /* -------------------- 64-bit -------------------- */
2050 
2051 /* Compute a 64-bit value into a register pair, which is returned as
2052    the first two parameters.  As with iselIntExpr_R, these may be
2053    either real or virtual regs; in any case they must not be changed
2054    by subsequent code emitted by the caller.  */
2055 
iselInt64Expr(HReg * rHi,HReg * rLo,ISelEnv * env,const IRExpr * e)2056 static void iselInt64Expr ( HReg* rHi, HReg* rLo, ISelEnv* env,
2057                             const IRExpr* e )
2058 {
2059    iselInt64Expr_wrk(rHi, rLo, env, e);
2060 #  if 0
2061    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2062 #  endif
2063    vassert(hregClass(*rHi) == HRcInt32);
2064    vassert(hregIsVirtual(*rHi));
2065    vassert(hregClass(*rLo) == HRcInt32);
2066    vassert(hregIsVirtual(*rLo));
2067 }
2068 
2069 /* DO NOT CALL THIS DIRECTLY ! */
iselInt64Expr_wrk(HReg * rHi,HReg * rLo,ISelEnv * env,const IRExpr * e)2070 static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env,
2071                                 const IRExpr* e )
2072 {
2073    vassert(e);
2074    vassert(typeOfIRExpr(env->type_env,e) == Ity_I64);
2075 
2076    /* 64-bit literal */
2077    if (e->tag == Iex_Const) {
2078       ULong   w64 = e->Iex.Const.con->Ico.U64;
2079       UInt    wHi = toUInt(w64 >> 32);
2080       UInt    wLo = toUInt(w64);
2081       HReg    tHi = newVRegI(env);
2082       HReg    tLo = newVRegI(env);
2083       vassert(e->Iex.Const.con->tag == Ico_U64);
2084       addInstr(env, ARMInstr_Imm32(tHi, wHi));
2085       addInstr(env, ARMInstr_Imm32(tLo, wLo));
2086       *rHi = tHi;
2087       *rLo = tLo;
2088       return;
2089    }
2090 
2091    /* read 64-bit IRTemp */
2092    if (e->tag == Iex_RdTmp) {
2093       if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
2094          HReg tHi = newVRegI(env);
2095          HReg tLo = newVRegI(env);
2096          HReg tmp = iselNeon64Expr(env, e);
2097          addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
2098          *rHi = tHi;
2099          *rLo = tLo;
2100       } else {
2101          lookupIRTemp64( rHi, rLo, env, e->Iex.RdTmp.tmp);
2102       }
2103       return;
2104    }
2105 
2106    /* 64-bit load */
2107    if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
2108       HReg      tLo, tHi, rA;
2109       vassert(e->Iex.Load.ty == Ity_I64);
2110       rA  = iselIntExpr_R(env, e->Iex.Load.addr);
2111       tHi = newVRegI(env);
2112       tLo = newVRegI(env);
2113       addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/,
2114                                     tHi, ARMAMode1_RI(rA, 4)));
2115       addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/,
2116                                     tLo, ARMAMode1_RI(rA, 0)));
2117       *rHi = tHi;
2118       *rLo = tLo;
2119       return;
2120    }
2121 
2122    /* 64-bit GET */
2123    if (e->tag == Iex_Get) {
2124       ARMAMode1* am0 = ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset + 0);
2125       ARMAMode1* am4 = ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset + 4);
2126       HReg tHi = newVRegI(env);
2127       HReg tLo = newVRegI(env);
2128       addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/, tHi, am4));
2129       addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/, tLo, am0));
2130       *rHi = tHi;
2131       *rLo = tLo;
2132       return;
2133    }
2134 
2135    /* --------- BINARY ops --------- */
2136    if (e->tag == Iex_Binop) {
2137       switch (e->Iex.Binop.op) {
2138 
2139          /* 32 x 32 -> 64 multiply */
2140          case Iop_MullS32:
2141          case Iop_MullU32: {
2142             HReg     argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
2143             HReg     argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2144             HReg     tHi  = newVRegI(env);
2145             HReg     tLo  = newVRegI(env);
2146             ARMMulOp mop  = e->Iex.Binop.op == Iop_MullS32
2147                                ? ARMmul_SX : ARMmul_ZX;
2148             addInstr(env, mk_iMOVds_RR(hregARM_R2(), argL));
2149             addInstr(env, mk_iMOVds_RR(hregARM_R3(), argR));
2150             addInstr(env, ARMInstr_Mul(mop));
2151             addInstr(env, mk_iMOVds_RR(tHi, hregARM_R1()));
2152             addInstr(env, mk_iMOVds_RR(tLo, hregARM_R0()));
2153             *rHi = tHi;
2154             *rLo = tLo;
2155             return;
2156          }
2157 
2158          case Iop_Or64: {
2159             HReg xLo, xHi, yLo, yHi;
2160             HReg tHi = newVRegI(env);
2161             HReg tLo = newVRegI(env);
2162             iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2163             iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
2164             addInstr(env, ARMInstr_Alu(ARMalu_OR, tHi, xHi, ARMRI84_R(yHi)));
2165             addInstr(env, ARMInstr_Alu(ARMalu_OR, tLo, xLo, ARMRI84_R(yLo)));
2166             *rHi = tHi;
2167             *rLo = tLo;
2168             return;
2169          }
2170 
2171          case Iop_Add64: {
2172             HReg xLo, xHi, yLo, yHi;
2173             HReg tHi = newVRegI(env);
2174             HReg tLo = newVRegI(env);
2175             iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2176             iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
2177             addInstr(env, ARMInstr_Alu(ARMalu_ADDS, tLo, xLo, ARMRI84_R(yLo)));
2178             addInstr(env, ARMInstr_Alu(ARMalu_ADC,  tHi, xHi, ARMRI84_R(yHi)));
2179             *rHi = tHi;
2180             *rLo = tLo;
2181             return;
2182          }
2183 
2184          /* 32HLto64(e1,e2) */
2185          case Iop_32HLto64: {
2186             *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2187             *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2188             return;
2189          }
2190 
2191          default:
2192             break;
2193       }
2194    }
2195 
2196    /* --------- UNARY ops --------- */
2197    if (e->tag == Iex_Unop) {
2198       switch (e->Iex.Unop.op) {
2199 
2200          /* ReinterpF64asI64 */
2201          case Iop_ReinterpF64asI64: {
2202             HReg dstHi = newVRegI(env);
2203             HReg dstLo = newVRegI(env);
2204             HReg src   = iselDblExpr(env, e->Iex.Unop.arg);
2205             addInstr(env, ARMInstr_VXferD(False/*!toD*/, src, dstHi, dstLo));
2206             *rHi = dstHi;
2207             *rLo = dstLo;
2208             return;
2209          }
2210 
2211          /* Left64(e) */
2212          case Iop_Left64: {
2213             HReg yLo, yHi;
2214             HReg tHi  = newVRegI(env);
2215             HReg tLo  = newVRegI(env);
2216             HReg zero = newVRegI(env);
2217             /* yHi:yLo = arg */
2218             iselInt64Expr(&yHi, &yLo, env, e->Iex.Unop.arg);
2219             /* zero = 0 */
2220             addInstr(env, ARMInstr_Imm32(zero, 0));
2221             /* tLo = 0 - yLo, and set carry */
2222             addInstr(env, ARMInstr_Alu(ARMalu_SUBS,
2223                                        tLo, zero, ARMRI84_R(yLo)));
2224             /* tHi = 0 - yHi - carry */
2225             addInstr(env, ARMInstr_Alu(ARMalu_SBC,
2226                                        tHi, zero, ARMRI84_R(yHi)));
2227             /* So now we have tHi:tLo = -arg.  To finish off, or 'arg'
2228                back in, so as to give the final result
2229                tHi:tLo = arg | -arg. */
2230             addInstr(env, ARMInstr_Alu(ARMalu_OR, tHi, tHi, ARMRI84_R(yHi)));
2231             addInstr(env, ARMInstr_Alu(ARMalu_OR, tLo, tLo, ARMRI84_R(yLo)));
2232             *rHi = tHi;
2233             *rLo = tLo;
2234             return;
2235          }
2236 
2237          /* CmpwNEZ64(e) */
2238          case Iop_CmpwNEZ64: {
2239             HReg srcLo, srcHi;
2240             HReg tmp1 = newVRegI(env);
2241             HReg tmp2 = newVRegI(env);
2242             /* srcHi:srcLo = arg */
2243             iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg);
2244             /* tmp1 = srcHi | srcLo */
2245             addInstr(env, ARMInstr_Alu(ARMalu_OR,
2246                                        tmp1, srcHi, ARMRI84_R(srcLo)));
2247             /* tmp2 = (tmp1 | -tmp1) >>s 31 */
2248             addInstr(env, ARMInstr_Unary(ARMun_NEG, tmp2, tmp1));
2249             addInstr(env, ARMInstr_Alu(ARMalu_OR,
2250                                        tmp2, tmp2, ARMRI84_R(tmp1)));
2251             addInstr(env, ARMInstr_Shift(ARMsh_SAR,
2252                                          tmp2, tmp2, ARMRI5_I5(31)));
2253             *rHi = tmp2;
2254             *rLo = tmp2;
2255             return;
2256          }
2257 
2258          case Iop_1Sto64: {
2259             HReg        dst  = newVRegI(env);
2260             ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
2261             ARMRI5*     amt  = ARMRI5_I5(31);
2262             /* This is really rough.  We could do much better here;
2263                perhaps mvn{cond} dst, #0 as the second insn?
2264                (same applies to 1Sto32) */
2265             addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
2266             addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
2267             addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, dst, amt));
2268             addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt));
2269             *rHi = dst;
2270             *rLo = dst;
2271             return;
2272          }
2273 
2274          default:
2275             break;
2276       }
2277    } /* if (e->tag == Iex_Unop) */
2278 
2279    /* --------- MULTIPLEX --------- */
2280    if (e->tag == Iex_ITE) { // VFD
2281       IRType tyC;
2282       HReg   r1hi, r1lo, r0hi, r0lo, dstHi, dstLo;
2283       ARMCondCode cc;
2284       tyC = typeOfIRExpr(env->type_env,e->Iex.ITE.cond);
2285       vassert(tyC == Ity_I1);
2286       iselInt64Expr(&r1hi, &r1lo, env, e->Iex.ITE.iftrue);
2287       iselInt64Expr(&r0hi, &r0lo, env, e->Iex.ITE.iffalse);
2288       dstHi = newVRegI(env);
2289       dstLo = newVRegI(env);
2290       addInstr(env, mk_iMOVds_RR(dstHi, r1hi));
2291       addInstr(env, mk_iMOVds_RR(dstLo, r1lo));
2292       cc = iselCondCode(env, e->Iex.ITE.cond);
2293       addInstr(env, ARMInstr_CMov(cc ^ 1, dstHi, ARMRI84_R(r0hi)));
2294       addInstr(env, ARMInstr_CMov(cc ^ 1, dstLo, ARMRI84_R(r0lo)));
2295       *rHi = dstHi;
2296       *rLo = dstLo;
2297       return;
2298    }
2299 
2300    /* It is convenient sometimes to call iselInt64Expr even when we
2301       have NEON support (e.g. in do_helper_call we need 64-bit
2302       arguments as 2 x 32 regs). */
2303    if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
2304       HReg tHi = newVRegI(env);
2305       HReg tLo = newVRegI(env);
2306       HReg tmp = iselNeon64Expr(env, e);
2307       addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
2308       *rHi = tHi;
2309       *rLo = tLo;
2310       return ;
2311    }
2312 
2313    ppIRExpr(e);
2314    vpanic("iselInt64Expr");
2315 }
2316 
2317 
2318 /*---------------------------------------------------------*/
2319 /*--- ISEL: Vector (NEON) expressions (64 or 128 bit)   ---*/
2320 /*---------------------------------------------------------*/
2321 
iselNeon64Expr(ISelEnv * env,const IRExpr * e)2322 static HReg iselNeon64Expr ( ISelEnv* env, const IRExpr* e )
2323 {
2324    HReg r;
2325    vassert(env->hwcaps & VEX_HWCAPS_ARM_NEON);
2326    r = iselNeon64Expr_wrk( env, e );
2327    vassert(hregClass(r) == HRcFlt64);
2328    vassert(hregIsVirtual(r));
2329    return r;
2330 }
2331 
2332 /* DO NOT CALL THIS DIRECTLY */
iselNeon64Expr_wrk(ISelEnv * env,const IRExpr * e)2333 static HReg iselNeon64Expr_wrk ( ISelEnv* env, const IRExpr* e )
2334 {
2335    IRType ty = typeOfIRExpr(env->type_env, e);
2336    MatchInfo mi;
2337    vassert(e);
2338    vassert(ty == Ity_I64);
2339 
2340    if (e->tag == Iex_RdTmp) {
2341       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
2342    }
2343 
2344    if (e->tag == Iex_Const) {
2345       HReg rLo, rHi;
2346       HReg res = newVRegD(env);
2347       iselInt64Expr(&rHi, &rLo, env, e);
2348       addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
2349       return res;
2350    }
2351 
2352    /* 64-bit load */
2353    if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
2354       HReg res = newVRegD(env);
2355       ARMAModeN* am = iselIntExpr_AModeN(env, e->Iex.Load.addr);
2356       vassert(ty == Ity_I64);
2357       addInstr(env, ARMInstr_NLdStD(True, res, am));
2358       return res;
2359    }
2360 
2361    /* 64-bit GET */
2362    if (e->tag == Iex_Get) {
2363       HReg addr = newVRegI(env);
2364       HReg res = newVRegD(env);
2365       vassert(ty == Ity_I64);
2366       addInstr(env, ARMInstr_Add32(addr, hregARM_R8(), e->Iex.Get.offset));
2367       addInstr(env, ARMInstr_NLdStD(True, res, mkARMAModeN_R(addr)));
2368       return res;
2369    }
2370 
2371    /* --------- BINARY ops --------- */
2372    if (e->tag == Iex_Binop) {
2373       switch (e->Iex.Binop.op) {
2374 
2375          /* 32 x 32 -> 64 multiply */
2376          case Iop_MullS32:
2377          case Iop_MullU32: {
2378             HReg rLo, rHi;
2379             HReg res = newVRegD(env);
2380             iselInt64Expr(&rHi, &rLo, env, e);
2381             addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
2382             return res;
2383          }
2384 
2385          case Iop_And64: {
2386             HReg res = newVRegD(env);
2387             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2388             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2389             addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
2390                                            res, argL, argR, 4, False));
2391             return res;
2392          }
2393          case Iop_Or64: {
2394             HReg res = newVRegD(env);
2395             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2396             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2397             addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
2398                                            res, argL, argR, 4, False));
2399             return res;
2400          }
2401          case Iop_Xor64: {
2402             HReg res = newVRegD(env);
2403             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2404             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2405             addInstr(env, ARMInstr_NBinary(ARMneon_VXOR,
2406                                            res, argL, argR, 4, False));
2407             return res;
2408          }
2409 
2410          /* 32HLto64(e1,e2) */
2411          case Iop_32HLto64: {
2412             HReg rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2413             HReg rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2414             HReg res = newVRegD(env);
2415             addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
2416             return res;
2417          }
2418 
2419          case Iop_Add8x8:
2420          case Iop_Add16x4:
2421          case Iop_Add32x2:
2422          case Iop_Add64: {
2423             HReg res = newVRegD(env);
2424             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2425             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2426             UInt size;
2427             switch (e->Iex.Binop.op) {
2428                case Iop_Add8x8: size = 0; break;
2429                case Iop_Add16x4: size = 1; break;
2430                case Iop_Add32x2: size = 2; break;
2431                case Iop_Add64: size = 3; break;
2432                default: vassert(0);
2433             }
2434             addInstr(env, ARMInstr_NBinary(ARMneon_VADD,
2435                                            res, argL, argR, size, False));
2436             return res;
2437          }
2438          case Iop_Add32Fx2: {
2439             HReg res = newVRegD(env);
2440             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2441             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2442             UInt size = 0;
2443             addInstr(env, ARMInstr_NBinary(ARMneon_VADDFP,
2444                                            res, argL, argR, size, False));
2445             return res;
2446          }
2447          case Iop_RecipStep32Fx2: {
2448             HReg res = newVRegD(env);
2449             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2450             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2451             UInt size = 0;
2452             addInstr(env, ARMInstr_NBinary(ARMneon_VRECPS,
2453                                            res, argL, argR, size, False));
2454             return res;
2455          }
2456          case Iop_RSqrtStep32Fx2: {
2457             HReg res = newVRegD(env);
2458             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2459             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2460             UInt size = 0;
2461             addInstr(env, ARMInstr_NBinary(ARMneon_VRSQRTS,
2462                                            res, argL, argR, size, False));
2463             return res;
2464          }
2465 
2466          // These 6 verified 18 Apr 2013
2467          case Iop_InterleaveHI32x2:
2468          case Iop_InterleaveLO32x2:
2469          case Iop_InterleaveOddLanes8x8:
2470          case Iop_InterleaveEvenLanes8x8:
2471          case Iop_InterleaveOddLanes16x4:
2472          case Iop_InterleaveEvenLanes16x4: {
2473             HReg rD   = newVRegD(env);
2474             HReg rM   = newVRegD(env);
2475             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2476             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2477             UInt size;
2478             Bool resRd;  // is the result in rD or rM ?
2479             switch (e->Iex.Binop.op) {
2480                case Iop_InterleaveOddLanes8x8:   resRd = False; size = 0; break;
2481                case Iop_InterleaveEvenLanes8x8:  resRd = True;  size = 0; break;
2482                case Iop_InterleaveOddLanes16x4:  resRd = False; size = 1; break;
2483                case Iop_InterleaveEvenLanes16x4: resRd = True;  size = 1; break;
2484                case Iop_InterleaveHI32x2:        resRd = False; size = 2; break;
2485                case Iop_InterleaveLO32x2:        resRd = True;  size = 2; break;
2486                default: vassert(0);
2487             }
2488             addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, False));
2489             addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, False));
2490             addInstr(env, ARMInstr_NDual(ARMneon_TRN, rD, rM, size, False));
2491             return resRd ? rD : rM;
2492          }
2493 
2494          // These 4 verified 18 Apr 2013
2495          case Iop_InterleaveHI8x8:
2496          case Iop_InterleaveLO8x8:
2497          case Iop_InterleaveHI16x4:
2498          case Iop_InterleaveLO16x4: {
2499             HReg rD   = newVRegD(env);
2500             HReg rM   = newVRegD(env);
2501             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2502             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2503             UInt size;
2504             Bool resRd;  // is the result in rD or rM ?
2505             switch (e->Iex.Binop.op) {
2506                case Iop_InterleaveHI8x8:  resRd = False; size = 0; break;
2507                case Iop_InterleaveLO8x8:  resRd = True;  size = 0; break;
2508                case Iop_InterleaveHI16x4: resRd = False; size = 1; break;
2509                case Iop_InterleaveLO16x4: resRd = True;  size = 1; break;
2510                default: vassert(0);
2511             }
2512             addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, False));
2513             addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, False));
2514             addInstr(env, ARMInstr_NDual(ARMneon_ZIP, rD, rM, size, False));
2515             return resRd ? rD : rM;
2516          }
2517 
2518          // These 4 verified 18 Apr 2013
2519          case Iop_CatOddLanes8x8:
2520          case Iop_CatEvenLanes8x8:
2521          case Iop_CatOddLanes16x4:
2522          case Iop_CatEvenLanes16x4: {
2523             HReg rD   = newVRegD(env);
2524             HReg rM   = newVRegD(env);
2525             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2526             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2527             UInt size;
2528             Bool resRd;  // is the result in rD or rM ?
2529             switch (e->Iex.Binop.op) {
2530                case Iop_CatOddLanes8x8:   resRd = False; size = 0; break;
2531                case Iop_CatEvenLanes8x8:  resRd = True;  size = 0; break;
2532                case Iop_CatOddLanes16x4:  resRd = False; size = 1; break;
2533                case Iop_CatEvenLanes16x4: resRd = True;  size = 1; break;
2534                default: vassert(0);
2535             }
2536             addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, False));
2537             addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, False));
2538             addInstr(env, ARMInstr_NDual(ARMneon_UZP, rD, rM, size, False));
2539             return resRd ? rD : rM;
2540          }
2541 
2542          case Iop_QAdd8Ux8:
2543          case Iop_QAdd16Ux4:
2544          case Iop_QAdd32Ux2:
2545          case Iop_QAdd64Ux1: {
2546             HReg res = newVRegD(env);
2547             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2548             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2549             UInt size;
2550             switch (e->Iex.Binop.op) {
2551                case Iop_QAdd8Ux8: size = 0; break;
2552                case Iop_QAdd16Ux4: size = 1; break;
2553                case Iop_QAdd32Ux2: size = 2; break;
2554                case Iop_QAdd64Ux1: size = 3; break;
2555                default: vassert(0);
2556             }
2557             addInstr(env, ARMInstr_NBinary(ARMneon_VQADDU,
2558                                            res, argL, argR, size, False));
2559             return res;
2560          }
2561          case Iop_QAdd8Sx8:
2562          case Iop_QAdd16Sx4:
2563          case Iop_QAdd32Sx2:
2564          case Iop_QAdd64Sx1: {
2565             HReg res = newVRegD(env);
2566             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2567             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2568             UInt size;
2569             switch (e->Iex.Binop.op) {
2570                case Iop_QAdd8Sx8: size = 0; break;
2571                case Iop_QAdd16Sx4: size = 1; break;
2572                case Iop_QAdd32Sx2: size = 2; break;
2573                case Iop_QAdd64Sx1: size = 3; break;
2574                default: vassert(0);
2575             }
2576             addInstr(env, ARMInstr_NBinary(ARMneon_VQADDS,
2577                                            res, argL, argR, size, False));
2578             return res;
2579          }
2580          case Iop_Sub8x8:
2581          case Iop_Sub16x4:
2582          case Iop_Sub32x2:
2583          case Iop_Sub64: {
2584             HReg res = newVRegD(env);
2585             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2586             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2587             UInt size;
2588             switch (e->Iex.Binop.op) {
2589                case Iop_Sub8x8: size = 0; break;
2590                case Iop_Sub16x4: size = 1; break;
2591                case Iop_Sub32x2: size = 2; break;
2592                case Iop_Sub64: size = 3; break;
2593                default: vassert(0);
2594             }
2595             addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
2596                                            res, argL, argR, size, False));
2597             return res;
2598          }
2599          case Iop_Sub32Fx2: {
2600             HReg res = newVRegD(env);
2601             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2602             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2603             UInt size = 0;
2604             addInstr(env, ARMInstr_NBinary(ARMneon_VSUBFP,
2605                                            res, argL, argR, size, False));
2606             return res;
2607          }
2608          case Iop_QSub8Ux8:
2609          case Iop_QSub16Ux4:
2610          case Iop_QSub32Ux2:
2611          case Iop_QSub64Ux1: {
2612             HReg res = newVRegD(env);
2613             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2614             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2615             UInt size;
2616             switch (e->Iex.Binop.op) {
2617                case Iop_QSub8Ux8: size = 0; break;
2618                case Iop_QSub16Ux4: size = 1; break;
2619                case Iop_QSub32Ux2: size = 2; break;
2620                case Iop_QSub64Ux1: size = 3; break;
2621                default: vassert(0);
2622             }
2623             addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBU,
2624                                            res, argL, argR, size, False));
2625             return res;
2626          }
2627          case Iop_QSub8Sx8:
2628          case Iop_QSub16Sx4:
2629          case Iop_QSub32Sx2:
2630          case Iop_QSub64Sx1: {
2631             HReg res = newVRegD(env);
2632             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2633             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2634             UInt size;
2635             switch (e->Iex.Binop.op) {
2636                case Iop_QSub8Sx8: size = 0; break;
2637                case Iop_QSub16Sx4: size = 1; break;
2638                case Iop_QSub32Sx2: size = 2; break;
2639                case Iop_QSub64Sx1: size = 3; break;
2640                default: vassert(0);
2641             }
2642             addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBS,
2643                                            res, argL, argR, size, False));
2644             return res;
2645          }
2646          case Iop_Max8Ux8:
2647          case Iop_Max16Ux4:
2648          case Iop_Max32Ux2: {
2649             HReg res = newVRegD(env);
2650             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2651             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2652             UInt size;
2653             switch (e->Iex.Binop.op) {
2654                case Iop_Max8Ux8: size = 0; break;
2655                case Iop_Max16Ux4: size = 1; break;
2656                case Iop_Max32Ux2: size = 2; break;
2657                default: vassert(0);
2658             }
2659             addInstr(env, ARMInstr_NBinary(ARMneon_VMAXU,
2660                                            res, argL, argR, size, False));
2661             return res;
2662          }
2663          case Iop_Max8Sx8:
2664          case Iop_Max16Sx4:
2665          case Iop_Max32Sx2: {
2666             HReg res = newVRegD(env);
2667             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2668             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2669             UInt size;
2670             switch (e->Iex.Binop.op) {
2671                case Iop_Max8Sx8: size = 0; break;
2672                case Iop_Max16Sx4: size = 1; break;
2673                case Iop_Max32Sx2: size = 2; break;
2674                default: vassert(0);
2675             }
2676             addInstr(env, ARMInstr_NBinary(ARMneon_VMAXS,
2677                                            res, argL, argR, size, False));
2678             return res;
2679          }
2680          case Iop_Min8Ux8:
2681          case Iop_Min16Ux4:
2682          case Iop_Min32Ux2: {
2683             HReg res = newVRegD(env);
2684             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2685             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2686             UInt size;
2687             switch (e->Iex.Binop.op) {
2688                case Iop_Min8Ux8: size = 0; break;
2689                case Iop_Min16Ux4: size = 1; break;
2690                case Iop_Min32Ux2: size = 2; break;
2691                default: vassert(0);
2692             }
2693             addInstr(env, ARMInstr_NBinary(ARMneon_VMINU,
2694                                            res, argL, argR, size, False));
2695             return res;
2696          }
2697          case Iop_Min8Sx8:
2698          case Iop_Min16Sx4:
2699          case Iop_Min32Sx2: {
2700             HReg res = newVRegD(env);
2701             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2702             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2703             UInt size;
2704             switch (e->Iex.Binop.op) {
2705                case Iop_Min8Sx8: size = 0; break;
2706                case Iop_Min16Sx4: size = 1; break;
2707                case Iop_Min32Sx2: size = 2; break;
2708                default: vassert(0);
2709             }
2710             addInstr(env, ARMInstr_NBinary(ARMneon_VMINS,
2711                                            res, argL, argR, size, False));
2712             return res;
2713          }
2714          case Iop_Sar8x8:
2715          case Iop_Sar16x4:
2716          case Iop_Sar32x2: {
2717             HReg res = newVRegD(env);
2718             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2719             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2720             HReg argR2 = newVRegD(env);
2721             HReg zero = newVRegD(env);
2722             UInt size;
2723             switch (e->Iex.Binop.op) {
2724                case Iop_Sar8x8: size = 0; break;
2725                case Iop_Sar16x4: size = 1; break;
2726                case Iop_Sar32x2: size = 2; break;
2727                case Iop_Sar64: size = 3; break;
2728                default: vassert(0);
2729             }
2730             addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
2731             addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
2732                                            argR2, zero, argR, size, False));
2733             addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
2734                                           res, argL, argR2, size, False));
2735             return res;
2736          }
2737          case Iop_Sal8x8:
2738          case Iop_Sal16x4:
2739          case Iop_Sal32x2:
2740          case Iop_Sal64x1: {
2741             HReg res = newVRegD(env);
2742             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2743             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2744             UInt size;
2745             switch (e->Iex.Binop.op) {
2746                case Iop_Sal8x8: size = 0; break;
2747                case Iop_Sal16x4: size = 1; break;
2748                case Iop_Sal32x2: size = 2; break;
2749                case Iop_Sal64x1: size = 3; break;
2750                default: vassert(0);
2751             }
2752             addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
2753                                           res, argL, argR, size, False));
2754             return res;
2755          }
2756          case Iop_Shr8x8:
2757          case Iop_Shr16x4:
2758          case Iop_Shr32x2: {
2759             HReg res = newVRegD(env);
2760             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2761             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2762             HReg argR2 = newVRegD(env);
2763             HReg zero = newVRegD(env);
2764             UInt size;
2765             switch (e->Iex.Binop.op) {
2766                case Iop_Shr8x8: size = 0; break;
2767                case Iop_Shr16x4: size = 1; break;
2768                case Iop_Shr32x2: size = 2; break;
2769                default: vassert(0);
2770             }
2771             addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
2772             addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
2773                                            argR2, zero, argR, size, False));
2774             addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
2775                                           res, argL, argR2, size, False));
2776             return res;
2777          }
2778          case Iop_Shl8x8:
2779          case Iop_Shl16x4:
2780          case Iop_Shl32x2: {
2781             HReg res = newVRegD(env);
2782             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2783             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2784             UInt size;
2785             switch (e->Iex.Binop.op) {
2786                case Iop_Shl8x8: size = 0; break;
2787                case Iop_Shl16x4: size = 1; break;
2788                case Iop_Shl32x2: size = 2; break;
2789                default: vassert(0);
2790             }
2791             addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
2792                                           res, argL, argR, size, False));
2793             return res;
2794          }
2795          case Iop_QShl8x8:
2796          case Iop_QShl16x4:
2797          case Iop_QShl32x2:
2798          case Iop_QShl64x1: {
2799             HReg res = newVRegD(env);
2800             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2801             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2802             UInt size;
2803             switch (e->Iex.Binop.op) {
2804                case Iop_QShl8x8: size = 0; break;
2805                case Iop_QShl16x4: size = 1; break;
2806                case Iop_QShl32x2: size = 2; break;
2807                case Iop_QShl64x1: size = 3; break;
2808                default: vassert(0);
2809             }
2810             addInstr(env, ARMInstr_NShift(ARMneon_VQSHL,
2811                                           res, argL, argR, size, False));
2812             return res;
2813          }
2814          case Iop_QSal8x8:
2815          case Iop_QSal16x4:
2816          case Iop_QSal32x2:
2817          case Iop_QSal64x1: {
2818             HReg res = newVRegD(env);
2819             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2820             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2821             UInt size;
2822             switch (e->Iex.Binop.op) {
2823                case Iop_QSal8x8: size = 0; break;
2824                case Iop_QSal16x4: size = 1; break;
2825                case Iop_QSal32x2: size = 2; break;
2826                case Iop_QSal64x1: size = 3; break;
2827                default: vassert(0);
2828             }
2829             addInstr(env, ARMInstr_NShift(ARMneon_VQSAL,
2830                                           res, argL, argR, size, False));
2831             return res;
2832          }
2833          case Iop_QShlNsatUU8x8:
2834          case Iop_QShlNsatUU16x4:
2835          case Iop_QShlNsatUU32x2:
2836          case Iop_QShlNsatUU64x1: {
2837             HReg res = newVRegD(env);
2838             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2839             UInt size, imm;
2840             if (e->Iex.Binop.arg2->tag != Iex_Const ||
2841                 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
2842                vpanic("ARM target supports Iop_QShlNsatUUAxB with constant "
2843                       "second argument only\n");
2844             }
2845             imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
2846             switch (e->Iex.Binop.op) {
2847                case Iop_QShlNsatUU8x8: size = 8 | imm; break;
2848                case Iop_QShlNsatUU16x4: size = 16 | imm; break;
2849                case Iop_QShlNsatUU32x2: size = 32 | imm; break;
2850                case Iop_QShlNsatUU64x1: size = 64 | imm; break;
2851                default: vassert(0);
2852             }
2853             addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUU,
2854                                           res, argL, size, False));
2855             return res;
2856          }
2857          case Iop_QShlNsatSU8x8:
2858          case Iop_QShlNsatSU16x4:
2859          case Iop_QShlNsatSU32x2:
2860          case Iop_QShlNsatSU64x1: {
2861             HReg res = newVRegD(env);
2862             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2863             UInt size, imm;
2864             if (e->Iex.Binop.arg2->tag != Iex_Const ||
2865                 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
2866                vpanic("ARM target supports Iop_QShlNsatSUAxB with constant "
2867                       "second argument only\n");
2868             }
2869             imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
2870             switch (e->Iex.Binop.op) {
2871                case Iop_QShlNsatSU8x8: size = 8 | imm; break;
2872                case Iop_QShlNsatSU16x4: size = 16 | imm; break;
2873                case Iop_QShlNsatSU32x2: size = 32 | imm; break;
2874                case Iop_QShlNsatSU64x1: size = 64 | imm; break;
2875                default: vassert(0);
2876             }
2877             addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUS,
2878                                           res, argL, size, False));
2879             return res;
2880          }
2881          case Iop_QShlNsatSS8x8:
2882          case Iop_QShlNsatSS16x4:
2883          case Iop_QShlNsatSS32x2:
2884          case Iop_QShlNsatSS64x1: {
2885             HReg res = newVRegD(env);
2886             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2887             UInt size, imm;
2888             if (e->Iex.Binop.arg2->tag != Iex_Const ||
2889                 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
2890                vpanic("ARM target supports Iop_QShlNsatSSAxB with constant "
2891                       "second argument only\n");
2892             }
2893             imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
2894             switch (e->Iex.Binop.op) {
2895                case Iop_QShlNsatSS8x8: size = 8 | imm; break;
2896                case Iop_QShlNsatSS16x4: size = 16 | imm; break;
2897                case Iop_QShlNsatSS32x2: size = 32 | imm; break;
2898                case Iop_QShlNsatSS64x1: size = 64 | imm; break;
2899                default: vassert(0);
2900             }
2901             addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNSS,
2902                                           res, argL, size, False));
2903             return res;
2904          }
2905          case Iop_ShrN8x8:
2906          case Iop_ShrN16x4:
2907          case Iop_ShrN32x2:
2908          case Iop_Shr64: {
2909             HReg res = newVRegD(env);
2910             HReg tmp = newVRegD(env);
2911             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2912             HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2913             HReg argR2 = newVRegI(env);
2914             UInt size;
2915             switch (e->Iex.Binop.op) {
2916                case Iop_ShrN8x8: size = 0; break;
2917                case Iop_ShrN16x4: size = 1; break;
2918                case Iop_ShrN32x2: size = 2; break;
2919                case Iop_Shr64: size = 3; break;
2920                default: vassert(0);
2921             }
2922             addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
2923             addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, False));
2924             addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
2925                                           res, argL, tmp, size, False));
2926             return res;
2927          }
2928          case Iop_ShlN8x8:
2929          case Iop_ShlN16x4:
2930          case Iop_ShlN32x2:
2931          case Iop_Shl64: {
2932             HReg res = newVRegD(env);
2933             HReg tmp = newVRegD(env);
2934             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2935             /* special-case Shl64(x, imm8) since the Neon front
2936                end produces a lot of those for V{LD,ST}{1,2,3,4}. */
2937             if (e->Iex.Binop.op == Iop_Shl64
2938                 && e->Iex.Binop.arg2->tag == Iex_Const) {
2939                vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
2940                Int nshift = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
2941                if (nshift >= 1 && nshift <= 63) {
2942                   addInstr(env, ARMInstr_NShl64(res, argL, nshift));
2943                   return res;
2944                }
2945                /* else fall through to general case */
2946             }
2947             HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2948             UInt size;
2949             switch (e->Iex.Binop.op) {
2950                case Iop_ShlN8x8:  size = 0; break;
2951                case Iop_ShlN16x4: size = 1; break;
2952                case Iop_ShlN32x2: size = 2; break;
2953                case Iop_Shl64:    size = 3; break;
2954                default: vassert(0);
2955             }
2956             addInstr(env, ARMInstr_NUnary(ARMneon_DUP,
2957                                           tmp, argR, 0, False));
2958             addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
2959                                           res, argL, tmp, size, False));
2960             return res;
2961          }
2962          case Iop_SarN8x8:
2963          case Iop_SarN16x4:
2964          case Iop_SarN32x2:
2965          case Iop_Sar64: {
2966             HReg res = newVRegD(env);
2967             HReg tmp = newVRegD(env);
2968             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2969             HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2970             HReg argR2 = newVRegI(env);
2971             UInt size;
2972             switch (e->Iex.Binop.op) {
2973                case Iop_SarN8x8: size = 0; break;
2974                case Iop_SarN16x4: size = 1; break;
2975                case Iop_SarN32x2: size = 2; break;
2976                case Iop_Sar64: size = 3; break;
2977                default: vassert(0);
2978             }
2979             addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
2980             addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, False));
2981             addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
2982                                           res, argL, tmp, size, False));
2983             return res;
2984          }
2985          case Iop_CmpGT8Ux8:
2986          case Iop_CmpGT16Ux4:
2987          case Iop_CmpGT32Ux2: {
2988             HReg res = newVRegD(env);
2989             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2990             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2991             UInt size;
2992             switch (e->Iex.Binop.op) {
2993                case Iop_CmpGT8Ux8: size = 0; break;
2994                case Iop_CmpGT16Ux4: size = 1; break;
2995                case Iop_CmpGT32Ux2: size = 2; break;
2996                default: vassert(0);
2997             }
2998             addInstr(env, ARMInstr_NBinary(ARMneon_VCGTU,
2999                                            res, argL, argR, size, False));
3000             return res;
3001          }
3002          case Iop_CmpGT8Sx8:
3003          case Iop_CmpGT16Sx4:
3004          case Iop_CmpGT32Sx2: {
3005             HReg res = newVRegD(env);
3006             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3007             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3008             UInt size;
3009             switch (e->Iex.Binop.op) {
3010                case Iop_CmpGT8Sx8: size = 0; break;
3011                case Iop_CmpGT16Sx4: size = 1; break;
3012                case Iop_CmpGT32Sx2: size = 2; break;
3013                default: vassert(0);
3014             }
3015             addInstr(env, ARMInstr_NBinary(ARMneon_VCGTS,
3016                                            res, argL, argR, size, False));
3017             return res;
3018          }
3019          case Iop_CmpEQ8x8:
3020          case Iop_CmpEQ16x4:
3021          case Iop_CmpEQ32x2: {
3022             HReg res = newVRegD(env);
3023             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3024             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3025             UInt size;
3026             switch (e->Iex.Binop.op) {
3027                case Iop_CmpEQ8x8: size = 0; break;
3028                case Iop_CmpEQ16x4: size = 1; break;
3029                case Iop_CmpEQ32x2: size = 2; break;
3030                default: vassert(0);
3031             }
3032             addInstr(env, ARMInstr_NBinary(ARMneon_VCEQ,
3033                                            res, argL, argR, size, False));
3034             return res;
3035          }
3036          case Iop_Mul8x8:
3037          case Iop_Mul16x4:
3038          case Iop_Mul32x2: {
3039             HReg res = newVRegD(env);
3040             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3041             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3042             UInt size = 0;
3043             switch(e->Iex.Binop.op) {
3044                case Iop_Mul8x8: size = 0; break;
3045                case Iop_Mul16x4: size = 1; break;
3046                case Iop_Mul32x2: size = 2; break;
3047                default: vassert(0);
3048             }
3049             addInstr(env, ARMInstr_NBinary(ARMneon_VMUL,
3050                                            res, argL, argR, size, False));
3051             return res;
3052          }
3053          case Iop_Mul32Fx2: {
3054             HReg res = newVRegD(env);
3055             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3056             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3057             UInt size = 0;
3058             addInstr(env, ARMInstr_NBinary(ARMneon_VMULFP,
3059                                            res, argL, argR, size, False));
3060             return res;
3061          }
3062          case Iop_QDMulHi16Sx4:
3063          case Iop_QDMulHi32Sx2: {
3064             HReg res = newVRegD(env);
3065             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3066             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3067             UInt size = 0;
3068             switch(e->Iex.Binop.op) {
3069                case Iop_QDMulHi16Sx4: size = 1; break;
3070                case Iop_QDMulHi32Sx2: size = 2; break;
3071                default: vassert(0);
3072             }
3073             addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULH,
3074                                            res, argL, argR, size, False));
3075             return res;
3076          }
3077 
3078          case Iop_QRDMulHi16Sx4:
3079          case Iop_QRDMulHi32Sx2: {
3080             HReg res = newVRegD(env);
3081             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3082             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3083             UInt size = 0;
3084             switch(e->Iex.Binop.op) {
3085                case Iop_QRDMulHi16Sx4: size = 1; break;
3086                case Iop_QRDMulHi32Sx2: size = 2; break;
3087                default: vassert(0);
3088             }
3089             addInstr(env, ARMInstr_NBinary(ARMneon_VQRDMULH,
3090                                            res, argL, argR, size, False));
3091             return res;
3092          }
3093 
3094          case Iop_PwAdd8x8:
3095          case Iop_PwAdd16x4:
3096          case Iop_PwAdd32x2: {
3097             HReg res = newVRegD(env);
3098             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3099             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3100             UInt size = 0;
3101             switch(e->Iex.Binop.op) {
3102                case Iop_PwAdd8x8: size = 0; break;
3103                case Iop_PwAdd16x4: size = 1; break;
3104                case Iop_PwAdd32x2: size = 2; break;
3105                default: vassert(0);
3106             }
3107             addInstr(env, ARMInstr_NBinary(ARMneon_VPADD,
3108                                            res, argL, argR, size, False));
3109             return res;
3110          }
3111          case Iop_PwAdd32Fx2: {
3112             HReg res = newVRegD(env);
3113             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3114             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3115             UInt size = 0;
3116             addInstr(env, ARMInstr_NBinary(ARMneon_VPADDFP,
3117                                            res, argL, argR, size, False));
3118             return res;
3119          }
3120          case Iop_PwMin8Ux8:
3121          case Iop_PwMin16Ux4:
3122          case Iop_PwMin32Ux2: {
3123             HReg res = newVRegD(env);
3124             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3125             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3126             UInt size = 0;
3127             switch(e->Iex.Binop.op) {
3128                case Iop_PwMin8Ux8: size = 0; break;
3129                case Iop_PwMin16Ux4: size = 1; break;
3130                case Iop_PwMin32Ux2: size = 2; break;
3131                default: vassert(0);
3132             }
3133             addInstr(env, ARMInstr_NBinary(ARMneon_VPMINU,
3134                                            res, argL, argR, size, False));
3135             return res;
3136          }
3137          case Iop_PwMin8Sx8:
3138          case Iop_PwMin16Sx4:
3139          case Iop_PwMin32Sx2: {
3140             HReg res = newVRegD(env);
3141             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3142             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3143             UInt size = 0;
3144             switch(e->Iex.Binop.op) {
3145                case Iop_PwMin8Sx8: size = 0; break;
3146                case Iop_PwMin16Sx4: size = 1; break;
3147                case Iop_PwMin32Sx2: size = 2; break;
3148                default: vassert(0);
3149             }
3150             addInstr(env, ARMInstr_NBinary(ARMneon_VPMINS,
3151                                            res, argL, argR, size, False));
3152             return res;
3153          }
3154          case Iop_PwMax8Ux8:
3155          case Iop_PwMax16Ux4:
3156          case Iop_PwMax32Ux2: {
3157             HReg res = newVRegD(env);
3158             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3159             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3160             UInt size = 0;
3161             switch(e->Iex.Binop.op) {
3162                case Iop_PwMax8Ux8: size = 0; break;
3163                case Iop_PwMax16Ux4: size = 1; break;
3164                case Iop_PwMax32Ux2: size = 2; break;
3165                default: vassert(0);
3166             }
3167             addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXU,
3168                                            res, argL, argR, size, False));
3169             return res;
3170          }
3171          case Iop_PwMax8Sx8:
3172          case Iop_PwMax16Sx4:
3173          case Iop_PwMax32Sx2: {
3174             HReg res = newVRegD(env);
3175             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3176             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3177             UInt size = 0;
3178             switch(e->Iex.Binop.op) {
3179                case Iop_PwMax8Sx8: size = 0; break;
3180                case Iop_PwMax16Sx4: size = 1; break;
3181                case Iop_PwMax32Sx2: size = 2; break;
3182                default: vassert(0);
3183             }
3184             addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXS,
3185                                            res, argL, argR, size, False));
3186             return res;
3187          }
3188          case Iop_Perm8x8: {
3189             HReg res = newVRegD(env);
3190             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3191             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3192             addInstr(env, ARMInstr_NBinary(ARMneon_VTBL,
3193                                            res, argL, argR, 0, False));
3194             return res;
3195          }
3196          case Iop_PolynomialMul8x8: {
3197             HReg res = newVRegD(env);
3198             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3199             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3200             UInt size = 0;
3201             addInstr(env, ARMInstr_NBinary(ARMneon_VMULP,
3202                                            res, argL, argR, size, False));
3203             return res;
3204          }
3205          case Iop_Max32Fx2: {
3206             HReg res = newVRegD(env);
3207             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3208             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3209             addInstr(env, ARMInstr_NBinary(ARMneon_VMAXF,
3210                                            res, argL, argR, 2, False));
3211             return res;
3212          }
3213          case Iop_Min32Fx2: {
3214             HReg res = newVRegD(env);
3215             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3216             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3217             addInstr(env, ARMInstr_NBinary(ARMneon_VMINF,
3218                                            res, argL, argR, 2, False));
3219             return res;
3220          }
3221          case Iop_PwMax32Fx2: {
3222             HReg res = newVRegD(env);
3223             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3224             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3225             addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXF,
3226                                            res, argL, argR, 2, False));
3227             return res;
3228          }
3229          case Iop_PwMin32Fx2: {
3230             HReg res = newVRegD(env);
3231             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3232             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3233             addInstr(env, ARMInstr_NBinary(ARMneon_VPMINF,
3234                                            res, argL, argR, 2, False));
3235             return res;
3236          }
3237          case Iop_CmpGT32Fx2: {
3238             HReg res = newVRegD(env);
3239             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3240             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3241             addInstr(env, ARMInstr_NBinary(ARMneon_VCGTF,
3242                                            res, argL, argR, 2, False));
3243             return res;
3244          }
3245          case Iop_CmpGE32Fx2: {
3246             HReg res = newVRegD(env);
3247             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3248             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3249             addInstr(env, ARMInstr_NBinary(ARMneon_VCGEF,
3250                                            res, argL, argR, 2, False));
3251             return res;
3252          }
3253          case Iop_CmpEQ32Fx2: {
3254             HReg res = newVRegD(env);
3255             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3256             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3257             addInstr(env, ARMInstr_NBinary(ARMneon_VCEQF,
3258                                            res, argL, argR, 2, False));
3259             return res;
3260          }
3261          case Iop_F32ToFixed32Ux2_RZ:
3262          case Iop_F32ToFixed32Sx2_RZ:
3263          case Iop_Fixed32UToF32x2_RN:
3264          case Iop_Fixed32SToF32x2_RN: {
3265             HReg res = newVRegD(env);
3266             HReg arg = iselNeon64Expr(env, e->Iex.Binop.arg1);
3267             ARMNeonUnOp op;
3268             UInt imm6;
3269             if (e->Iex.Binop.arg2->tag != Iex_Const ||
3270                typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
3271                   vpanic("ARM supports FP <-> Fixed conversion with constant "
3272                          "second argument less than 33 only\n");
3273             }
3274             imm6 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
3275             vassert(imm6 <= 32 && imm6 > 0);
3276             imm6 = 64 - imm6;
3277             switch(e->Iex.Binop.op) {
3278                case Iop_F32ToFixed32Ux2_RZ: op = ARMneon_VCVTFtoFixedU; break;
3279                case Iop_F32ToFixed32Sx2_RZ: op = ARMneon_VCVTFtoFixedS; break;
3280                case Iop_Fixed32UToF32x2_RN: op = ARMneon_VCVTFixedUtoF; break;
3281                case Iop_Fixed32SToF32x2_RN: op = ARMneon_VCVTFixedStoF; break;
3282                default: vassert(0);
3283             }
3284             addInstr(env, ARMInstr_NUnary(op, res, arg, imm6, False));
3285             return res;
3286          }
3287          /*
3288          FIXME: is this here or not?
3289          case Iop_VDup8x8:
3290          case Iop_VDup16x4:
3291          case Iop_VDup32x2: {
3292             HReg res = newVRegD(env);
3293             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3294             UInt index;
3295             UInt imm4;
3296             UInt size = 0;
3297             if (e->Iex.Binop.arg2->tag != Iex_Const ||
3298                typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
3299                   vpanic("ARM supports Iop_VDup with constant "
3300                          "second argument less than 16 only\n");
3301             }
3302             index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
3303             switch(e->Iex.Binop.op) {
3304                case Iop_VDup8x8: imm4 = (index << 1) + 1; break;
3305                case Iop_VDup16x4: imm4 = (index << 2) + 2; break;
3306                case Iop_VDup32x2: imm4 = (index << 3) + 4; break;
3307                default: vassert(0);
3308             }
3309             if (imm4 >= 16) {
3310                vpanic("ARM supports Iop_VDup with constant "
3311                       "second argument less than 16 only\n");
3312             }
3313             addInstr(env, ARMInstr_NUnary(ARMneon_VDUP,
3314                                           res, argL, imm4, False));
3315             return res;
3316          }
3317          */
3318          default:
3319             break;
3320       }
3321    }
3322 
3323    /* --------- UNARY ops --------- */
3324    if (e->tag == Iex_Unop) {
3325       switch (e->Iex.Unop.op) {
3326 
3327          /* 32Uto64 */
3328          case Iop_32Uto64: {
3329             HReg rLo = iselIntExpr_R(env, e->Iex.Unop.arg);
3330             HReg rHi = newVRegI(env);
3331             HReg res = newVRegD(env);
3332             addInstr(env, ARMInstr_Imm32(rHi, 0));
3333             addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
3334             return res;
3335          }
3336 
3337          /* 32Sto64 */
3338          case Iop_32Sto64: {
3339             HReg rLo = iselIntExpr_R(env, e->Iex.Unop.arg);
3340             HReg rHi = newVRegI(env);
3341             addInstr(env, mk_iMOVds_RR(rHi, rLo));
3342             addInstr(env, ARMInstr_Shift(ARMsh_SAR, rHi, rHi, ARMRI5_I5(31)));
3343             HReg res = newVRegD(env);
3344             addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
3345             return res;
3346          }
3347 
3348          /* The next 3 are pass-throughs */
3349          /* ReinterpF64asI64 */
3350          case Iop_ReinterpF64asI64:
3351          /* Left64(e) */
3352          case Iop_Left64:
3353          /* CmpwNEZ64(e) */
3354          case Iop_1Sto64: {
3355             HReg rLo, rHi;
3356             HReg res = newVRegD(env);
3357             iselInt64Expr(&rHi, &rLo, env, e);
3358             addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
3359             return res;
3360          }
3361 
3362          case Iop_Not64: {
3363             DECLARE_PATTERN(p_veqz_8x8);
3364             DECLARE_PATTERN(p_veqz_16x4);
3365             DECLARE_PATTERN(p_veqz_32x2);
3366             DECLARE_PATTERN(p_vcge_8sx8);
3367             DECLARE_PATTERN(p_vcge_16sx4);
3368             DECLARE_PATTERN(p_vcge_32sx2);
3369             DECLARE_PATTERN(p_vcge_8ux8);
3370             DECLARE_PATTERN(p_vcge_16ux4);
3371             DECLARE_PATTERN(p_vcge_32ux2);
3372             DEFINE_PATTERN(p_veqz_8x8,
3373                   unop(Iop_Not64, unop(Iop_CmpNEZ8x8, bind(0))));
3374             DEFINE_PATTERN(p_veqz_16x4,
3375                   unop(Iop_Not64, unop(Iop_CmpNEZ16x4, bind(0))));
3376             DEFINE_PATTERN(p_veqz_32x2,
3377                   unop(Iop_Not64, unop(Iop_CmpNEZ32x2, bind(0))));
3378             DEFINE_PATTERN(p_vcge_8sx8,
3379                   unop(Iop_Not64, binop(Iop_CmpGT8Sx8, bind(1), bind(0))));
3380             DEFINE_PATTERN(p_vcge_16sx4,
3381                   unop(Iop_Not64, binop(Iop_CmpGT16Sx4, bind(1), bind(0))));
3382             DEFINE_PATTERN(p_vcge_32sx2,
3383                   unop(Iop_Not64, binop(Iop_CmpGT32Sx2, bind(1), bind(0))));
3384             DEFINE_PATTERN(p_vcge_8ux8,
3385                   unop(Iop_Not64, binop(Iop_CmpGT8Ux8, bind(1), bind(0))));
3386             DEFINE_PATTERN(p_vcge_16ux4,
3387                   unop(Iop_Not64, binop(Iop_CmpGT16Ux4, bind(1), bind(0))));
3388             DEFINE_PATTERN(p_vcge_32ux2,
3389                   unop(Iop_Not64, binop(Iop_CmpGT32Ux2, bind(1), bind(0))));
3390             if (matchIRExpr(&mi, p_veqz_8x8, e)) {
3391                HReg res = newVRegD(env);
3392                HReg arg = iselNeon64Expr(env, mi.bindee[0]);
3393                addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 0, False));
3394                return res;
3395             } else if (matchIRExpr(&mi, p_veqz_16x4, e)) {
3396                HReg res = newVRegD(env);
3397                HReg arg = iselNeon64Expr(env, mi.bindee[0]);
3398                addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 1, False));
3399                return res;
3400             } else if (matchIRExpr(&mi, p_veqz_32x2, e)) {
3401                HReg res = newVRegD(env);
3402                HReg arg = iselNeon64Expr(env, mi.bindee[0]);
3403                addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 2, False));
3404                return res;
3405             } else if (matchIRExpr(&mi, p_vcge_8sx8, e)) {
3406                HReg res = newVRegD(env);
3407                HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3408                HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3409                addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3410                                               res, argL, argR, 0, False));
3411                return res;
3412             } else if (matchIRExpr(&mi, p_vcge_16sx4, e)) {
3413                HReg res = newVRegD(env);
3414                HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3415                HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3416                addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3417                                               res, argL, argR, 1, False));
3418                return res;
3419             } else if (matchIRExpr(&mi, p_vcge_32sx2, e)) {
3420                HReg res = newVRegD(env);
3421                HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3422                HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3423                addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3424                                               res, argL, argR, 2, False));
3425                return res;
3426             } else if (matchIRExpr(&mi, p_vcge_8ux8, e)) {
3427                HReg res = newVRegD(env);
3428                HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3429                HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3430                addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3431                                               res, argL, argR, 0, False));
3432                return res;
3433             } else if (matchIRExpr(&mi, p_vcge_16ux4, e)) {
3434                HReg res = newVRegD(env);
3435                HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3436                HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3437                addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3438                                               res, argL, argR, 1, False));
3439                return res;
3440             } else if (matchIRExpr(&mi, p_vcge_32ux2, e)) {
3441                HReg res = newVRegD(env);
3442                HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3443                HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3444                addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3445                                               res, argL, argR, 2, False));
3446                return res;
3447             } else {
3448                HReg res = newVRegD(env);
3449                HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3450                addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, arg, 4, False));
3451                return res;
3452             }
3453          }
3454          case Iop_Dup8x8:
3455          case Iop_Dup16x4:
3456          case Iop_Dup32x2: {
3457             HReg res, arg;
3458             UInt size;
3459             DECLARE_PATTERN(p_vdup_8x8);
3460             DECLARE_PATTERN(p_vdup_16x4);
3461             DECLARE_PATTERN(p_vdup_32x2);
3462             DEFINE_PATTERN(p_vdup_8x8,
3463                   unop(Iop_Dup8x8, binop(Iop_GetElem8x8, bind(0), bind(1))));
3464             DEFINE_PATTERN(p_vdup_16x4,
3465                   unop(Iop_Dup16x4, binop(Iop_GetElem16x4, bind(0), bind(1))));
3466             DEFINE_PATTERN(p_vdup_32x2,
3467                   unop(Iop_Dup32x2, binop(Iop_GetElem32x2, bind(0), bind(1))));
3468             if (matchIRExpr(&mi, p_vdup_8x8, e)) {
3469                UInt index;
3470                UInt imm4;
3471                if (mi.bindee[1]->tag == Iex_Const &&
3472                   typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3473                   index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3474                   imm4 = (index << 1) + 1;
3475                   if (index < 8) {
3476                      res = newVRegD(env);
3477                      arg = iselNeon64Expr(env, mi.bindee[0]);
3478                      addInstr(env, ARMInstr_NUnaryS(
3479                                       ARMneon_VDUP,
3480                                       mkARMNRS(ARMNRS_Reg, res, 0),
3481                                       mkARMNRS(ARMNRS_Scalar, arg, index),
3482                                       imm4, False
3483                              ));
3484                      return res;
3485                   }
3486                }
3487             } else if (matchIRExpr(&mi, p_vdup_16x4, e)) {
3488                UInt index;
3489                UInt imm4;
3490                if (mi.bindee[1]->tag == Iex_Const &&
3491                   typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3492                   index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3493                   imm4 = (index << 2) + 2;
3494                   if (index < 4) {
3495                      res = newVRegD(env);
3496                      arg = iselNeon64Expr(env, mi.bindee[0]);
3497                      addInstr(env, ARMInstr_NUnaryS(
3498                                       ARMneon_VDUP,
3499                                       mkARMNRS(ARMNRS_Reg, res, 0),
3500                                       mkARMNRS(ARMNRS_Scalar, arg, index),
3501                                       imm4, False
3502                              ));
3503                      return res;
3504                   }
3505                }
3506             } else if (matchIRExpr(&mi, p_vdup_32x2, e)) {
3507                UInt index;
3508                UInt imm4;
3509                if (mi.bindee[1]->tag == Iex_Const &&
3510                   typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3511                   index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3512                   imm4 = (index << 3) + 4;
3513                   if (index < 2) {
3514                      res = newVRegD(env);
3515                      arg = iselNeon64Expr(env, mi.bindee[0]);
3516                      addInstr(env, ARMInstr_NUnaryS(
3517                                       ARMneon_VDUP,
3518                                       mkARMNRS(ARMNRS_Reg, res, 0),
3519                                       mkARMNRS(ARMNRS_Scalar, arg, index),
3520                                       imm4, False
3521                              ));
3522                      return res;
3523                   }
3524                }
3525             }
3526             arg = iselIntExpr_R(env, e->Iex.Unop.arg);
3527             res = newVRegD(env);
3528             switch (e->Iex.Unop.op) {
3529                case Iop_Dup8x8: size = 0; break;
3530                case Iop_Dup16x4: size = 1; break;
3531                case Iop_Dup32x2: size = 2; break;
3532                default: vassert(0);
3533             }
3534             addInstr(env, ARMInstr_NUnary(ARMneon_DUP, res, arg, size, False));
3535             return res;
3536          }
3537          case Iop_Abs8x8:
3538          case Iop_Abs16x4:
3539          case Iop_Abs32x2: {
3540             HReg res = newVRegD(env);
3541             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3542             UInt size = 0;
3543             switch(e->Iex.Binop.op) {
3544                case Iop_Abs8x8: size = 0; break;
3545                case Iop_Abs16x4: size = 1; break;
3546                case Iop_Abs32x2: size = 2; break;
3547                default: vassert(0);
3548             }
3549             addInstr(env, ARMInstr_NUnary(ARMneon_ABS, res, arg, size, False));
3550             return res;
3551          }
3552          case Iop_Reverse8sIn64_x1:
3553          case Iop_Reverse16sIn64_x1:
3554          case Iop_Reverse32sIn64_x1: {
3555             HReg res = newVRegD(env);
3556             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3557             UInt size = 0;
3558             switch(e->Iex.Binop.op) {
3559                case Iop_Reverse8sIn64_x1: size = 0; break;
3560                case Iop_Reverse16sIn64_x1: size = 1; break;
3561                case Iop_Reverse32sIn64_x1: size = 2; break;
3562                default: vassert(0);
3563             }
3564             addInstr(env, ARMInstr_NUnary(ARMneon_REV64,
3565                                           res, arg, size, False));
3566             return res;
3567          }
3568          case Iop_Reverse8sIn32_x2:
3569          case Iop_Reverse16sIn32_x2: {
3570             HReg res = newVRegD(env);
3571             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3572             UInt size = 0;
3573             switch(e->Iex.Binop.op) {
3574                case Iop_Reverse8sIn32_x2: size = 0; break;
3575                case Iop_Reverse16sIn32_x2: size = 1; break;
3576                default: vassert(0);
3577             }
3578             addInstr(env, ARMInstr_NUnary(ARMneon_REV32,
3579                                           res, arg, size, False));
3580             return res;
3581          }
3582          case Iop_Reverse8sIn16_x4: {
3583             HReg res = newVRegD(env);
3584             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3585             UInt size = 0;
3586             addInstr(env, ARMInstr_NUnary(ARMneon_REV16,
3587                                           res, arg, size, False));
3588             return res;
3589          }
3590          case Iop_CmpwNEZ64: {
3591             HReg x_lsh = newVRegD(env);
3592             HReg x_rsh = newVRegD(env);
3593             HReg lsh_amt = newVRegD(env);
3594             HReg rsh_amt = newVRegD(env);
3595             HReg zero = newVRegD(env);
3596             HReg tmp = newVRegD(env);
3597             HReg tmp2 = newVRegD(env);
3598             HReg res = newVRegD(env);
3599             HReg x = newVRegD(env);
3600             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3601             addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp2, arg, 2, False));
3602             addInstr(env, ARMInstr_NUnary(ARMneon_NOT, x, tmp2, 4, False));
3603             addInstr(env, ARMInstr_NeonImm(lsh_amt, ARMNImm_TI(0, 32)));
3604             addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0, 0)));
3605             addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
3606                                            rsh_amt, zero, lsh_amt, 2, False));
3607             addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3608                                           x_lsh, x, lsh_amt, 3, False));
3609             addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3610                                           x_rsh, x, rsh_amt, 3, False));
3611             addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
3612                                            tmp, x_lsh, x_rsh, 0, False));
3613             addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
3614                                            res, tmp, x, 0, False));
3615             return res;
3616          }
3617          case Iop_CmpNEZ8x8:
3618          case Iop_CmpNEZ16x4:
3619          case Iop_CmpNEZ32x2: {
3620             HReg res = newVRegD(env);
3621             HReg tmp = newVRegD(env);
3622             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3623             UInt size;
3624             switch (e->Iex.Unop.op) {
3625                case Iop_CmpNEZ8x8: size = 0; break;
3626                case Iop_CmpNEZ16x4: size = 1; break;
3627                case Iop_CmpNEZ32x2: size = 2; break;
3628                default: vassert(0);
3629             }
3630             addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp, arg, size, False));
3631             addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, tmp, 4, False));
3632             return res;
3633          }
3634          case Iop_NarrowUn16to8x8:
3635          case Iop_NarrowUn32to16x4:
3636          case Iop_NarrowUn64to32x2: {
3637             HReg res = newVRegD(env);
3638             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3639             UInt size = 0;
3640             switch(e->Iex.Binop.op) {
3641                case Iop_NarrowUn16to8x8:  size = 0; break;
3642                case Iop_NarrowUn32to16x4: size = 1; break;
3643                case Iop_NarrowUn64to32x2: size = 2; break;
3644                default: vassert(0);
3645             }
3646             addInstr(env, ARMInstr_NUnary(ARMneon_COPYN,
3647                                           res, arg, size, False));
3648             return res;
3649          }
3650          case Iop_QNarrowUn16Sto8Sx8:
3651          case Iop_QNarrowUn32Sto16Sx4:
3652          case Iop_QNarrowUn64Sto32Sx2: {
3653             HReg res = newVRegD(env);
3654             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3655             UInt size = 0;
3656             switch(e->Iex.Binop.op) {
3657                case Iop_QNarrowUn16Sto8Sx8:  size = 0; break;
3658                case Iop_QNarrowUn32Sto16Sx4: size = 1; break;
3659                case Iop_QNarrowUn64Sto32Sx2: size = 2; break;
3660                default: vassert(0);
3661             }
3662             addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNSS,
3663                                           res, arg, size, False));
3664             return res;
3665          }
3666          case Iop_QNarrowUn16Sto8Ux8:
3667          case Iop_QNarrowUn32Sto16Ux4:
3668          case Iop_QNarrowUn64Sto32Ux2: {
3669             HReg res = newVRegD(env);
3670             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3671             UInt size = 0;
3672             switch(e->Iex.Binop.op) {
3673                case Iop_QNarrowUn16Sto8Ux8:  size = 0; break;
3674                case Iop_QNarrowUn32Sto16Ux4: size = 1; break;
3675                case Iop_QNarrowUn64Sto32Ux2: size = 2; break;
3676                default: vassert(0);
3677             }
3678             addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNUS,
3679                                           res, arg, size, False));
3680             return res;
3681          }
3682          case Iop_QNarrowUn16Uto8Ux8:
3683          case Iop_QNarrowUn32Uto16Ux4:
3684          case Iop_QNarrowUn64Uto32Ux2: {
3685             HReg res = newVRegD(env);
3686             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3687             UInt size = 0;
3688             switch(e->Iex.Binop.op) {
3689                case Iop_QNarrowUn16Uto8Ux8:  size = 0; break;
3690                case Iop_QNarrowUn32Uto16Ux4: size = 1; break;
3691                case Iop_QNarrowUn64Uto32Ux2: size = 2; break;
3692                default: vassert(0);
3693             }
3694             addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNUU,
3695                                           res, arg, size, False));
3696             return res;
3697          }
3698          case Iop_PwAddL8Sx8:
3699          case Iop_PwAddL16Sx4:
3700          case Iop_PwAddL32Sx2: {
3701             HReg res = newVRegD(env);
3702             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3703             UInt size = 0;
3704             switch(e->Iex.Binop.op) {
3705                case Iop_PwAddL8Sx8: size = 0; break;
3706                case Iop_PwAddL16Sx4: size = 1; break;
3707                case Iop_PwAddL32Sx2: size = 2; break;
3708                default: vassert(0);
3709             }
3710             addInstr(env, ARMInstr_NUnary(ARMneon_PADDLS,
3711                                           res, arg, size, False));
3712             return res;
3713          }
3714          case Iop_PwAddL8Ux8:
3715          case Iop_PwAddL16Ux4:
3716          case Iop_PwAddL32Ux2: {
3717             HReg res = newVRegD(env);
3718             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3719             UInt size = 0;
3720             switch(e->Iex.Binop.op) {
3721                case Iop_PwAddL8Ux8: size = 0; break;
3722                case Iop_PwAddL16Ux4: size = 1; break;
3723                case Iop_PwAddL32Ux2: size = 2; break;
3724                default: vassert(0);
3725             }
3726             addInstr(env, ARMInstr_NUnary(ARMneon_PADDLU,
3727                                           res, arg, size, False));
3728             return res;
3729          }
3730          case Iop_Cnt8x8: {
3731             HReg res = newVRegD(env);
3732             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3733             UInt size = 0;
3734             addInstr(env, ARMInstr_NUnary(ARMneon_CNT,
3735                                           res, arg, size, False));
3736             return res;
3737          }
3738          case Iop_Clz8x8:
3739          case Iop_Clz16x4:
3740          case Iop_Clz32x2: {
3741             HReg res = newVRegD(env);
3742             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3743             UInt size = 0;
3744             switch(e->Iex.Binop.op) {
3745                case Iop_Clz8x8: size = 0; break;
3746                case Iop_Clz16x4: size = 1; break;
3747                case Iop_Clz32x2: size = 2; break;
3748                default: vassert(0);
3749             }
3750             addInstr(env, ARMInstr_NUnary(ARMneon_CLZ,
3751                                           res, arg, size, False));
3752             return res;
3753          }
3754          case Iop_Cls8x8:
3755          case Iop_Cls16x4:
3756          case Iop_Cls32x2: {
3757             HReg res = newVRegD(env);
3758             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3759             UInt size = 0;
3760             switch(e->Iex.Binop.op) {
3761                case Iop_Cls8x8: size = 0; break;
3762                case Iop_Cls16x4: size = 1; break;
3763                case Iop_Cls32x2: size = 2; break;
3764                default: vassert(0);
3765             }
3766             addInstr(env, ARMInstr_NUnary(ARMneon_CLS,
3767                                           res, arg, size, False));
3768             return res;
3769          }
3770          case Iop_FtoI32Sx2_RZ: {
3771             HReg res = newVRegD(env);
3772             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3773             addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoS,
3774                                           res, arg, 2, False));
3775             return res;
3776          }
3777          case Iop_FtoI32Ux2_RZ: {
3778             HReg res = newVRegD(env);
3779             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3780             addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoU,
3781                                           res, arg, 2, False));
3782             return res;
3783          }
3784          case Iop_I32StoFx2: {
3785             HReg res = newVRegD(env);
3786             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3787             addInstr(env, ARMInstr_NUnary(ARMneon_VCVTStoF,
3788                                           res, arg, 2, False));
3789             return res;
3790          }
3791          case Iop_I32UtoFx2: {
3792             HReg res = newVRegD(env);
3793             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3794             addInstr(env, ARMInstr_NUnary(ARMneon_VCVTUtoF,
3795                                           res, arg, 2, False));
3796             return res;
3797          }
3798          case Iop_F32toF16x4: {
3799             HReg res = newVRegD(env);
3800             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3801             addInstr(env, ARMInstr_NUnary(ARMneon_VCVTF32toF16,
3802                                           res, arg, 2, False));
3803             return res;
3804          }
3805          case Iop_RecipEst32Fx2: {
3806             HReg res = newVRegD(env);
3807             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3808             addInstr(env, ARMInstr_NUnary(ARMneon_VRECIPF,
3809                                           res, argL, 0, False));
3810             return res;
3811          }
3812          case Iop_RecipEst32Ux2: {
3813             HReg res = newVRegD(env);
3814             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3815             addInstr(env, ARMInstr_NUnary(ARMneon_VRECIP,
3816                                           res, argL, 0, False));
3817             return res;
3818          }
3819          case Iop_Abs32Fx2: {
3820             DECLARE_PATTERN(p_vabd_32fx2);
3821             DEFINE_PATTERN(p_vabd_32fx2,
3822                            unop(Iop_Abs32Fx2,
3823                                 binop(Iop_Sub32Fx2,
3824                                       bind(0),
3825                                       bind(1))));
3826             if (matchIRExpr(&mi, p_vabd_32fx2, e)) {
3827                HReg res = newVRegD(env);
3828                HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3829                HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3830                addInstr(env, ARMInstr_NBinary(ARMneon_VABDFP,
3831                                               res, argL, argR, 0, False));
3832                return res;
3833             } else {
3834                HReg res = newVRegD(env);
3835                HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3836                addInstr(env, ARMInstr_NUnary(ARMneon_VABSFP,
3837                                              res, arg, 0, False));
3838                return res;
3839             }
3840          }
3841          case Iop_RSqrtEst32Fx2: {
3842             HReg res = newVRegD(env);
3843             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3844             addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTEFP,
3845                                           res, arg, 0, False));
3846             return res;
3847          }
3848          case Iop_RSqrtEst32Ux2: {
3849             HReg res = newVRegD(env);
3850             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3851             addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTE,
3852                                           res, arg, 0, False));
3853             return res;
3854          }
3855          case Iop_Neg32Fx2: {
3856             HReg res = newVRegD(env);
3857             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3858             addInstr(env, ARMInstr_NUnary(ARMneon_VNEGF,
3859                                           res, arg, 0, False));
3860             return res;
3861          }
3862          case Iop_V128to64:
3863          case Iop_V128HIto64: {
3864             HReg src   = iselNeonExpr(env, e->Iex.Unop.arg);
3865             HReg resLo = newVRegD(env);
3866             HReg resHi = newVRegD(env);
3867             addInstr(env, ARMInstr_VXferQ(False/*!toQ*/, src, resHi, resLo));
3868             return e->Iex.Unop.op == Iop_V128HIto64 ? resHi : resLo;
3869          }
3870          default:
3871             break;
3872       }
3873    } /* if (e->tag == Iex_Unop) */
3874 
3875    if (e->tag == Iex_Triop) {
3876       IRTriop *triop = e->Iex.Triop.details;
3877 
3878       switch (triop->op) {
3879          case Iop_Slice64: {
3880             HReg res = newVRegD(env);
3881             HReg argL = iselNeon64Expr(env, triop->arg2);
3882             HReg argR = iselNeon64Expr(env, triop->arg1);
3883             UInt imm4;
3884             if (triop->arg3->tag != Iex_Const ||
3885                 typeOfIRExpr(env->type_env, triop->arg3) != Ity_I8) {
3886                vpanic("ARM target supports Iop_Extract64 with constant "
3887                       "third argument less than 16 only\n");
3888             }
3889             imm4 = triop->arg3->Iex.Const.con->Ico.U8;
3890             if (imm4 >= 8) {
3891                vpanic("ARM target supports Iop_Extract64 with constant "
3892                       "third argument less than 16 only\n");
3893             }
3894             addInstr(env, ARMInstr_NBinary(ARMneon_VEXT,
3895                                            res, argL, argR, imm4, False));
3896             return res;
3897          }
3898          case Iop_SetElem8x8:
3899          case Iop_SetElem16x4:
3900          case Iop_SetElem32x2: {
3901             HReg res = newVRegD(env);
3902             HReg dreg = iselNeon64Expr(env, triop->arg1);
3903             HReg arg = iselIntExpr_R(env, triop->arg3);
3904             UInt index, size;
3905             if (triop->arg2->tag != Iex_Const ||
3906                 typeOfIRExpr(env->type_env, triop->arg2) != Ity_I8) {
3907                vpanic("ARM target supports SetElem with constant "
3908                       "second argument only\n");
3909             }
3910             index = triop->arg2->Iex.Const.con->Ico.U8;
3911             switch (triop->op) {
3912                case Iop_SetElem8x8: vassert(index < 8); size = 0; break;
3913                case Iop_SetElem16x4: vassert(index < 4); size = 1; break;
3914                case Iop_SetElem32x2: vassert(index < 2); size = 2; break;
3915                default: vassert(0);
3916             }
3917             addInstr(env, ARMInstr_NUnary(ARMneon_COPY, res, dreg, 4, False));
3918             addInstr(env, ARMInstr_NUnaryS(ARMneon_SETELEM,
3919                                            mkARMNRS(ARMNRS_Scalar, res, index),
3920                                            mkARMNRS(ARMNRS_Reg, arg, 0),
3921                                            size, False));
3922             return res;
3923          }
3924          default:
3925             break;
3926       }
3927    }
3928 
3929    /* --------- MULTIPLEX --------- */
3930    if (e->tag == Iex_ITE) { // VFD
3931       HReg rLo, rHi;
3932       HReg res = newVRegD(env);
3933       iselInt64Expr(&rHi, &rLo, env, e);
3934       addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
3935       return res;
3936    }
3937 
3938    ppIRExpr(e);
3939    vpanic("iselNeon64Expr");
3940 }
3941 
3942 
iselNeonExpr(ISelEnv * env,const IRExpr * e)3943 static HReg iselNeonExpr ( ISelEnv* env, const IRExpr* e )
3944 {
3945    HReg r;
3946    vassert(env->hwcaps & VEX_HWCAPS_ARM_NEON);
3947    r = iselNeonExpr_wrk( env, e );
3948    vassert(hregClass(r) == HRcVec128);
3949    vassert(hregIsVirtual(r));
3950    return r;
3951 }
3952 
3953 /* DO NOT CALL THIS DIRECTLY */
iselNeonExpr_wrk(ISelEnv * env,const IRExpr * e)3954 static HReg iselNeonExpr_wrk ( ISelEnv* env, const IRExpr* e )
3955 {
3956    IRType ty = typeOfIRExpr(env->type_env, e);
3957    MatchInfo mi;
3958    vassert(e);
3959    vassert(ty == Ity_V128);
3960 
3961    if (e->tag == Iex_RdTmp) {
3962       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
3963    }
3964 
3965    if (e->tag == Iex_Const) {
3966       /* At the moment there should be no 128-bit constants in IR for ARM
3967          generated during disassemble. They are represented as Iop_64HLtoV128
3968          binary operation and are handled among binary ops. */
3969       /* But zero can be created by valgrind internal optimizer */
3970       if (e->Iex.Const.con->Ico.V128 == 0x0000) {
3971          HReg res = newVRegV(env);
3972          addInstr(env, ARMInstr_NeonImm(res, ARMNImm_TI(6, 0)));
3973          return res;
3974       }
3975       if (e->Iex.Const.con->Ico.V128 == 0xFFFF) {
3976          HReg res = newVRegV(env);
3977          addInstr(env, ARMInstr_NeonImm(res, ARMNImm_TI(6, 255)));
3978          return res;
3979       }
3980       ppIRExpr(e);
3981       vpanic("128-bit constant is not implemented");
3982    }
3983 
3984    if (e->tag == Iex_Load) {
3985       HReg res = newVRegV(env);
3986       ARMAModeN* am = iselIntExpr_AModeN(env, e->Iex.Load.addr);
3987       vassert(ty == Ity_V128);
3988       addInstr(env, ARMInstr_NLdStQ(True, res, am));
3989       return res;
3990    }
3991 
3992    if (e->tag == Iex_Get) {
3993       HReg addr = newVRegI(env);
3994       HReg res = newVRegV(env);
3995       vassert(ty == Ity_V128);
3996       addInstr(env, ARMInstr_Add32(addr, hregARM_R8(), e->Iex.Get.offset));
3997       addInstr(env, ARMInstr_NLdStQ(True, res, mkARMAModeN_R(addr)));
3998       return res;
3999    }
4000 
4001    if (e->tag == Iex_Unop) {
4002       switch (e->Iex.Unop.op) {
4003          case Iop_NotV128: {
4004             DECLARE_PATTERN(p_veqz_8x16);
4005             DECLARE_PATTERN(p_veqz_16x8);
4006             DECLARE_PATTERN(p_veqz_32x4);
4007             DECLARE_PATTERN(p_vcge_8sx16);
4008             DECLARE_PATTERN(p_vcge_16sx8);
4009             DECLARE_PATTERN(p_vcge_32sx4);
4010             DECLARE_PATTERN(p_vcge_8ux16);
4011             DECLARE_PATTERN(p_vcge_16ux8);
4012             DECLARE_PATTERN(p_vcge_32ux4);
4013             DEFINE_PATTERN(p_veqz_8x16,
4014                   unop(Iop_NotV128, unop(Iop_CmpNEZ8x16, bind(0))));
4015             DEFINE_PATTERN(p_veqz_16x8,
4016                   unop(Iop_NotV128, unop(Iop_CmpNEZ16x8, bind(0))));
4017             DEFINE_PATTERN(p_veqz_32x4,
4018                   unop(Iop_NotV128, unop(Iop_CmpNEZ32x4, bind(0))));
4019             DEFINE_PATTERN(p_vcge_8sx16,
4020                   unop(Iop_NotV128, binop(Iop_CmpGT8Sx16, bind(1), bind(0))));
4021             DEFINE_PATTERN(p_vcge_16sx8,
4022                   unop(Iop_NotV128, binop(Iop_CmpGT16Sx8, bind(1), bind(0))));
4023             DEFINE_PATTERN(p_vcge_32sx4,
4024                   unop(Iop_NotV128, binop(Iop_CmpGT32Sx4, bind(1), bind(0))));
4025             DEFINE_PATTERN(p_vcge_8ux16,
4026                   unop(Iop_NotV128, binop(Iop_CmpGT8Ux16, bind(1), bind(0))));
4027             DEFINE_PATTERN(p_vcge_16ux8,
4028                   unop(Iop_NotV128, binop(Iop_CmpGT16Ux8, bind(1), bind(0))));
4029             DEFINE_PATTERN(p_vcge_32ux4,
4030                   unop(Iop_NotV128, binop(Iop_CmpGT32Ux4, bind(1), bind(0))));
4031             if (matchIRExpr(&mi, p_veqz_8x16, e)) {
4032                HReg res = newVRegV(env);
4033                HReg arg = iselNeonExpr(env, mi.bindee[0]);
4034                addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 0, True));
4035                return res;
4036             } else if (matchIRExpr(&mi, p_veqz_16x8, e)) {
4037                HReg res = newVRegV(env);
4038                HReg arg = iselNeonExpr(env, mi.bindee[0]);
4039                addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 1, True));
4040                return res;
4041             } else if (matchIRExpr(&mi, p_veqz_32x4, e)) {
4042                HReg res = newVRegV(env);
4043                HReg arg = iselNeonExpr(env, mi.bindee[0]);
4044                addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 2, True));
4045                return res;
4046             } else if (matchIRExpr(&mi, p_vcge_8sx16, e)) {
4047                HReg res = newVRegV(env);
4048                HReg argL = iselNeonExpr(env, mi.bindee[0]);
4049                HReg argR = iselNeonExpr(env, mi.bindee[1]);
4050                addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
4051                                               res, argL, argR, 0, True));
4052                return res;
4053             } else if (matchIRExpr(&mi, p_vcge_16sx8, e)) {
4054                HReg res = newVRegV(env);
4055                HReg argL = iselNeonExpr(env, mi.bindee[0]);
4056                HReg argR = iselNeonExpr(env, mi.bindee[1]);
4057                addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
4058                                               res, argL, argR, 1, True));
4059                return res;
4060             } else if (matchIRExpr(&mi, p_vcge_32sx4, e)) {
4061                HReg res = newVRegV(env);
4062                HReg argL = iselNeonExpr(env, mi.bindee[0]);
4063                HReg argR = iselNeonExpr(env, mi.bindee[1]);
4064                addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
4065                                               res, argL, argR, 2, True));
4066                return res;
4067             } else if (matchIRExpr(&mi, p_vcge_8ux16, e)) {
4068                HReg res = newVRegV(env);
4069                HReg argL = iselNeonExpr(env, mi.bindee[0]);
4070                HReg argR = iselNeonExpr(env, mi.bindee[1]);
4071                addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
4072                                               res, argL, argR, 0, True));
4073                return res;
4074             } else if (matchIRExpr(&mi, p_vcge_16ux8, e)) {
4075                HReg res = newVRegV(env);
4076                HReg argL = iselNeonExpr(env, mi.bindee[0]);
4077                HReg argR = iselNeonExpr(env, mi.bindee[1]);
4078                addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
4079                                               res, argL, argR, 1, True));
4080                return res;
4081             } else if (matchIRExpr(&mi, p_vcge_32ux4, e)) {
4082                HReg res = newVRegV(env);
4083                HReg argL = iselNeonExpr(env, mi.bindee[0]);
4084                HReg argR = iselNeonExpr(env, mi.bindee[1]);
4085                addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
4086                                               res, argL, argR, 2, True));
4087                return res;
4088             } else {
4089                HReg res = newVRegV(env);
4090                HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4091                addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, arg, 4, True));
4092                return res;
4093             }
4094          }
4095          case Iop_Dup8x16:
4096          case Iop_Dup16x8:
4097          case Iop_Dup32x4: {
4098             HReg res, arg;
4099             UInt size;
4100             DECLARE_PATTERN(p_vdup_8x16);
4101             DECLARE_PATTERN(p_vdup_16x8);
4102             DECLARE_PATTERN(p_vdup_32x4);
4103             DEFINE_PATTERN(p_vdup_8x16,
4104                   unop(Iop_Dup8x16, binop(Iop_GetElem8x8, bind(0), bind(1))));
4105             DEFINE_PATTERN(p_vdup_16x8,
4106                   unop(Iop_Dup16x8, binop(Iop_GetElem16x4, bind(0), bind(1))));
4107             DEFINE_PATTERN(p_vdup_32x4,
4108                   unop(Iop_Dup32x4, binop(Iop_GetElem32x2, bind(0), bind(1))));
4109             if (matchIRExpr(&mi, p_vdup_8x16, e)) {
4110                UInt index;
4111                UInt imm4;
4112                if (mi.bindee[1]->tag == Iex_Const &&
4113                   typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
4114                   index = mi.bindee[1]->Iex.Const.con->Ico.U8;
4115                   imm4 = (index << 1) + 1;
4116                   if (index < 8) {
4117                      res = newVRegV(env);
4118                      arg = iselNeon64Expr(env, mi.bindee[0]);
4119                      addInstr(env, ARMInstr_NUnaryS(
4120                                       ARMneon_VDUP,
4121                                       mkARMNRS(ARMNRS_Reg, res, 0),
4122                                       mkARMNRS(ARMNRS_Scalar, arg, index),
4123                                       imm4, True
4124                              ));
4125                      return res;
4126                   }
4127                }
4128             } else if (matchIRExpr(&mi, p_vdup_16x8, e)) {
4129                UInt index;
4130                UInt imm4;
4131                if (mi.bindee[1]->tag == Iex_Const &&
4132                   typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
4133                   index = mi.bindee[1]->Iex.Const.con->Ico.U8;
4134                   imm4 = (index << 2) + 2;
4135                   if (index < 4) {
4136                      res = newVRegV(env);
4137                      arg = iselNeon64Expr(env, mi.bindee[0]);
4138                      addInstr(env, ARMInstr_NUnaryS(
4139                                       ARMneon_VDUP,
4140                                       mkARMNRS(ARMNRS_Reg, res, 0),
4141                                       mkARMNRS(ARMNRS_Scalar, arg, index),
4142                                       imm4, True
4143                              ));
4144                      return res;
4145                   }
4146                }
4147             } else if (matchIRExpr(&mi, p_vdup_32x4, e)) {
4148                UInt index;
4149                UInt imm4;
4150                if (mi.bindee[1]->tag == Iex_Const &&
4151                   typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
4152                   index = mi.bindee[1]->Iex.Const.con->Ico.U8;
4153                   imm4 = (index << 3) + 4;
4154                   if (index < 2) {
4155                      res = newVRegV(env);
4156                      arg = iselNeon64Expr(env, mi.bindee[0]);
4157                      addInstr(env, ARMInstr_NUnaryS(
4158                                       ARMneon_VDUP,
4159                                       mkARMNRS(ARMNRS_Reg, res, 0),
4160                                       mkARMNRS(ARMNRS_Scalar, arg, index),
4161                                       imm4, True
4162                              ));
4163                      return res;
4164                   }
4165                }
4166             }
4167             arg = iselIntExpr_R(env, e->Iex.Unop.arg);
4168             res = newVRegV(env);
4169             switch (e->Iex.Unop.op) {
4170                case Iop_Dup8x16: size = 0; break;
4171                case Iop_Dup16x8: size = 1; break;
4172                case Iop_Dup32x4: size = 2; break;
4173                default: vassert(0);
4174             }
4175             addInstr(env, ARMInstr_NUnary(ARMneon_DUP, res, arg, size, True));
4176             return res;
4177          }
4178          case Iop_Abs8x16:
4179          case Iop_Abs16x8:
4180          case Iop_Abs32x4: {
4181             HReg res = newVRegV(env);
4182             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4183             UInt size = 0;
4184             switch(e->Iex.Binop.op) {
4185                case Iop_Abs8x16: size = 0; break;
4186                case Iop_Abs16x8: size = 1; break;
4187                case Iop_Abs32x4: size = 2; break;
4188                default: vassert(0);
4189             }
4190             addInstr(env, ARMInstr_NUnary(ARMneon_ABS, res, arg, size, True));
4191             return res;
4192          }
4193          case Iop_Reverse8sIn64_x2:
4194          case Iop_Reverse16sIn64_x2:
4195          case Iop_Reverse32sIn64_x2: {
4196             HReg res = newVRegV(env);
4197             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4198             UInt size = 0;
4199             switch(e->Iex.Binop.op) {
4200                case Iop_Reverse8sIn64_x2: size = 0; break;
4201                case Iop_Reverse16sIn64_x2: size = 1; break;
4202                case Iop_Reverse32sIn64_x2: size = 2; break;
4203                default: vassert(0);
4204             }
4205             addInstr(env, ARMInstr_NUnary(ARMneon_REV64,
4206                                           res, arg, size, True));
4207             return res;
4208          }
4209          case Iop_Reverse8sIn32_x4:
4210          case Iop_Reverse16sIn32_x4: {
4211             HReg res = newVRegV(env);
4212             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4213             UInt size = 0;
4214             switch(e->Iex.Binop.op) {
4215                case Iop_Reverse8sIn32_x4: size = 0; break;
4216                case Iop_Reverse16sIn32_x4: size = 1; break;
4217                default: vassert(0);
4218             }
4219             addInstr(env, ARMInstr_NUnary(ARMneon_REV32,
4220                                           res, arg, size, True));
4221             return res;
4222          }
4223          case Iop_Reverse8sIn16_x8: {
4224             HReg res = newVRegV(env);
4225             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4226             UInt size = 0;
4227             addInstr(env, ARMInstr_NUnary(ARMneon_REV16,
4228                                           res, arg, size, True));
4229             return res;
4230          }
4231          case Iop_CmpNEZ64x2: {
4232             HReg x_lsh = newVRegV(env);
4233             HReg x_rsh = newVRegV(env);
4234             HReg lsh_amt = newVRegV(env);
4235             HReg rsh_amt = newVRegV(env);
4236             HReg zero = newVRegV(env);
4237             HReg tmp = newVRegV(env);
4238             HReg tmp2 = newVRegV(env);
4239             HReg res = newVRegV(env);
4240             HReg x = newVRegV(env);
4241             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4242             addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp2, arg, 2, True));
4243             addInstr(env, ARMInstr_NUnary(ARMneon_NOT, x, tmp2, 4, True));
4244             addInstr(env, ARMInstr_NeonImm(lsh_amt, ARMNImm_TI(0, 32)));
4245             addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0, 0)));
4246             addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
4247                                            rsh_amt, zero, lsh_amt, 2, True));
4248             addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4249                                           x_lsh, x, lsh_amt, 3, True));
4250             addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4251                                           x_rsh, x, rsh_amt, 3, True));
4252             addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
4253                                            tmp, x_lsh, x_rsh, 0, True));
4254             addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
4255                                            res, tmp, x, 0, True));
4256             return res;
4257          }
4258          case Iop_CmpNEZ8x16:
4259          case Iop_CmpNEZ16x8:
4260          case Iop_CmpNEZ32x4: {
4261             HReg res = newVRegV(env);
4262             HReg tmp = newVRegV(env);
4263             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4264             UInt size;
4265             switch (e->Iex.Unop.op) {
4266                case Iop_CmpNEZ8x16: size = 0; break;
4267                case Iop_CmpNEZ16x8: size = 1; break;
4268                case Iop_CmpNEZ32x4: size = 2; break;
4269                default: vassert(0);
4270             }
4271             addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp, arg, size, True));
4272             addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, tmp, 4, True));
4273             return res;
4274          }
4275          case Iop_Widen8Uto16x8:
4276          case Iop_Widen16Uto32x4:
4277          case Iop_Widen32Uto64x2: {
4278             HReg res = newVRegV(env);
4279             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4280             UInt size;
4281             switch (e->Iex.Unop.op) {
4282                case Iop_Widen8Uto16x8:  size = 0; break;
4283                case Iop_Widen16Uto32x4: size = 1; break;
4284                case Iop_Widen32Uto64x2: size = 2; break;
4285                default: vassert(0);
4286             }
4287             addInstr(env, ARMInstr_NUnary(ARMneon_COPYLU,
4288                                           res, arg, size, True));
4289             return res;
4290          }
4291          case Iop_Widen8Sto16x8:
4292          case Iop_Widen16Sto32x4:
4293          case Iop_Widen32Sto64x2: {
4294             HReg res = newVRegV(env);
4295             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4296             UInt size;
4297             switch (e->Iex.Unop.op) {
4298                case Iop_Widen8Sto16x8:  size = 0; break;
4299                case Iop_Widen16Sto32x4: size = 1; break;
4300                case Iop_Widen32Sto64x2: size = 2; break;
4301                default: vassert(0);
4302             }
4303             addInstr(env, ARMInstr_NUnary(ARMneon_COPYLS,
4304                                           res, arg, size, True));
4305             return res;
4306          }
4307          case Iop_PwAddL8Sx16:
4308          case Iop_PwAddL16Sx8:
4309          case Iop_PwAddL32Sx4: {
4310             HReg res = newVRegV(env);
4311             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4312             UInt size = 0;
4313             switch(e->Iex.Binop.op) {
4314                case Iop_PwAddL8Sx16: size = 0; break;
4315                case Iop_PwAddL16Sx8: size = 1; break;
4316                case Iop_PwAddL32Sx4: size = 2; break;
4317                default: vassert(0);
4318             }
4319             addInstr(env, ARMInstr_NUnary(ARMneon_PADDLS,
4320                                           res, arg, size, True));
4321             return res;
4322          }
4323          case Iop_PwAddL8Ux16:
4324          case Iop_PwAddL16Ux8:
4325          case Iop_PwAddL32Ux4: {
4326             HReg res = newVRegV(env);
4327             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4328             UInt size = 0;
4329             switch(e->Iex.Binop.op) {
4330                case Iop_PwAddL8Ux16: size = 0; break;
4331                case Iop_PwAddL16Ux8: size = 1; break;
4332                case Iop_PwAddL32Ux4: size = 2; break;
4333                default: vassert(0);
4334             }
4335             addInstr(env, ARMInstr_NUnary(ARMneon_PADDLU,
4336                                           res, arg, size, True));
4337             return res;
4338          }
4339          case Iop_Cnt8x16: {
4340             HReg res = newVRegV(env);
4341             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4342             UInt size = 0;
4343             addInstr(env, ARMInstr_NUnary(ARMneon_CNT, res, arg, size, True));
4344             return res;
4345          }
4346          case Iop_Clz8x16:
4347          case Iop_Clz16x8:
4348          case Iop_Clz32x4: {
4349             HReg res = newVRegV(env);
4350             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4351             UInt size = 0;
4352             switch(e->Iex.Binop.op) {
4353                case Iop_Clz8x16: size = 0; break;
4354                case Iop_Clz16x8: size = 1; break;
4355                case Iop_Clz32x4: size = 2; break;
4356                default: vassert(0);
4357             }
4358             addInstr(env, ARMInstr_NUnary(ARMneon_CLZ, res, arg, size, True));
4359             return res;
4360          }
4361          case Iop_Cls8x16:
4362          case Iop_Cls16x8:
4363          case Iop_Cls32x4: {
4364             HReg res = newVRegV(env);
4365             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4366             UInt size = 0;
4367             switch(e->Iex.Binop.op) {
4368                case Iop_Cls8x16: size = 0; break;
4369                case Iop_Cls16x8: size = 1; break;
4370                case Iop_Cls32x4: size = 2; break;
4371                default: vassert(0);
4372             }
4373             addInstr(env, ARMInstr_NUnary(ARMneon_CLS, res, arg, size, True));
4374             return res;
4375          }
4376          case Iop_FtoI32Sx4_RZ: {
4377             HReg res = newVRegV(env);
4378             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4379             addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoS,
4380                                           res, arg, 2, True));
4381             return res;
4382          }
4383          case Iop_FtoI32Ux4_RZ: {
4384             HReg res = newVRegV(env);
4385             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4386             addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoU,
4387                                           res, arg, 2, True));
4388             return res;
4389          }
4390          case Iop_I32StoFx4: {
4391             HReg res = newVRegV(env);
4392             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4393             addInstr(env, ARMInstr_NUnary(ARMneon_VCVTStoF,
4394                                           res, arg, 2, True));
4395             return res;
4396          }
4397          case Iop_I32UtoFx4: {
4398             HReg res = newVRegV(env);
4399             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4400             addInstr(env, ARMInstr_NUnary(ARMneon_VCVTUtoF,
4401                                           res, arg, 2, True));
4402             return res;
4403          }
4404          case Iop_F16toF32x4: {
4405             HReg res = newVRegV(env);
4406             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4407             addInstr(env, ARMInstr_NUnary(ARMneon_VCVTF16toF32,
4408                                           res, arg, 2, True));
4409             return res;
4410          }
4411          case Iop_RecipEst32Fx4: {
4412             HReg res = newVRegV(env);
4413             HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4414             addInstr(env, ARMInstr_NUnary(ARMneon_VRECIPF,
4415                                           res, argL, 0, True));
4416             return res;
4417          }
4418          case Iop_RecipEst32Ux4: {
4419             HReg res = newVRegV(env);
4420             HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4421             addInstr(env, ARMInstr_NUnary(ARMneon_VRECIP,
4422                                           res, argL, 0, True));
4423             return res;
4424          }
4425          case Iop_Abs32Fx4: {
4426             HReg res = newVRegV(env);
4427             HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4428             addInstr(env, ARMInstr_NUnary(ARMneon_VABSFP,
4429                                           res, argL, 0, True));
4430             return res;
4431          }
4432          case Iop_RSqrtEst32Fx4: {
4433             HReg res = newVRegV(env);
4434             HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4435             addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTEFP,
4436                                           res, argL, 0, True));
4437             return res;
4438          }
4439          case Iop_RSqrtEst32Ux4: {
4440             HReg res = newVRegV(env);
4441             HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4442             addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTE,
4443                                           res, argL, 0, True));
4444             return res;
4445          }
4446          case Iop_Neg32Fx4: {
4447             HReg res = newVRegV(env);
4448             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4449             addInstr(env, ARMInstr_NUnary(ARMneon_VNEGF,
4450                                           res, arg, 0, True));
4451             return res;
4452          }
4453          /* ... */
4454          default:
4455             break;
4456       }
4457    }
4458 
4459    if (e->tag == Iex_Binop) {
4460       switch (e->Iex.Binop.op) {
4461          case Iop_64HLtoV128: {
4462             /* Try to match into single "VMOV reg, imm" instruction */
4463             if (e->Iex.Binop.arg1->tag == Iex_Const &&
4464                 e->Iex.Binop.arg2->tag == Iex_Const &&
4465                 typeOfIRExpr(env->type_env, e->Iex.Binop.arg1) == Ity_I64 &&
4466                 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) == Ity_I64 &&
4467                 e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 ==
4468                            e->Iex.Binop.arg2->Iex.Const.con->Ico.U64) {
4469                ULong imm64 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
4470                ARMNImm *imm = Imm64_to_ARMNImm(imm64);
4471                if (imm) {
4472                   HReg res = newVRegV(env);
4473                   addInstr(env, ARMInstr_NeonImm(res, imm));
4474                   return res;
4475                }
4476                if ((imm64 >> 32) == 0LL &&
4477                    (imm = Imm64_to_ARMNImm(imm64 | (imm64 << 32))) != NULL) {
4478                   HReg tmp1 = newVRegV(env);
4479                   HReg tmp2 = newVRegV(env);
4480                   HReg res = newVRegV(env);
4481                   if (imm->type < 10) {
4482                      addInstr(env, ARMInstr_NeonImm(tmp1, ARMNImm_TI(9,0x0f)));
4483                      addInstr(env, ARMInstr_NeonImm(tmp2, imm));
4484                      addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
4485                                                     res, tmp1, tmp2, 4, True));
4486                      return res;
4487                   }
4488                }
4489                if ((imm64 & 0xFFFFFFFFLL) == 0LL &&
4490                    (imm = Imm64_to_ARMNImm(imm64 | (imm64 >> 32))) != NULL) {
4491                   HReg tmp1 = newVRegV(env);
4492                   HReg tmp2 = newVRegV(env);
4493                   HReg res = newVRegV(env);
4494                   if (imm->type < 10) {
4495                      addInstr(env, ARMInstr_NeonImm(tmp1, ARMNImm_TI(9,0xf0)));
4496                      addInstr(env, ARMInstr_NeonImm(tmp2, imm));
4497                      addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
4498                                                     res, tmp1, tmp2, 4, True));
4499                      return res;
4500                   }
4501                }
4502             }
4503             /* Does not match "VMOV Reg, Imm" form.  We'll have to do
4504                it the slow way. */
4505             HReg dHi = iselNeon64Expr(env, e->Iex.Binop.arg1);
4506             HReg dLo = iselNeon64Expr(env, e->Iex.Binop.arg2);
4507             HReg res = newVRegV(env);
4508             addInstr(env, ARMInstr_VXferQ(True/*toQ*/, res, dHi, dLo));
4509             return res;
4510          }
4511          case Iop_AndV128: {
4512             HReg res = newVRegV(env);
4513             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4514             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4515             addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
4516                                            res, argL, argR, 4, True));
4517             return res;
4518          }
4519          case Iop_OrV128: {
4520             HReg res = newVRegV(env);
4521             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4522             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4523             addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
4524                                            res, argL, argR, 4, True));
4525             return res;
4526          }
4527          case Iop_XorV128: {
4528             HReg res = newVRegV(env);
4529             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4530             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4531             addInstr(env, ARMInstr_NBinary(ARMneon_VXOR,
4532                                            res, argL, argR, 4, True));
4533             return res;
4534          }
4535          case Iop_Add8x16:
4536          case Iop_Add16x8:
4537          case Iop_Add32x4:
4538          case Iop_Add64x2: {
4539             /*
4540             FIXME: remove this if not used
4541             DECLARE_PATTERN(p_vrhadd_32sx4);
4542             ULong one = (1LL << 32) | 1LL;
4543             DEFINE_PATTERN(p_vrhadd_32sx4,
4544                   binop(Iop_Add32x4,
4545                         binop(Iop_Add32x4,
4546                               binop(Iop_SarN32x4,
4547                                     bind(0),
4548                                     mkU8(1)),
4549                               binop(Iop_SarN32x4,
4550                                     bind(1),
4551                                     mkU8(1))),
4552                         binop(Iop_SarN32x4,
4553                               binop(Iop_Add32x4,
4554                                     binop(Iop_Add32x4,
4555                                           binop(Iop_AndV128,
4556                                                 bind(0),
4557                                                 mkU128(one)),
4558                                           binop(Iop_AndV128,
4559                                                 bind(1),
4560                                                 mkU128(one))),
4561                                     mkU128(one)),
4562                               mkU8(1))));
4563             */
4564             HReg res = newVRegV(env);
4565             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4566             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4567             UInt size;
4568             switch (e->Iex.Binop.op) {
4569                case Iop_Add8x16: size = 0; break;
4570                case Iop_Add16x8: size = 1; break;
4571                case Iop_Add32x4: size = 2; break;
4572                case Iop_Add64x2: size = 3; break;
4573                default:
4574                   ppIROp(e->Iex.Binop.op);
4575                   vpanic("Illegal element size in VADD");
4576             }
4577             addInstr(env, ARMInstr_NBinary(ARMneon_VADD,
4578                                            res, argL, argR, size, True));
4579             return res;
4580          }
4581          case Iop_RecipStep32Fx4: {
4582             HReg res = newVRegV(env);
4583             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4584             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4585             UInt size = 0;
4586             addInstr(env, ARMInstr_NBinary(ARMneon_VRECPS,
4587                                            res, argL, argR, size, True));
4588             return res;
4589          }
4590          case Iop_RSqrtStep32Fx4: {
4591             HReg res = newVRegV(env);
4592             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4593             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4594             UInt size = 0;
4595             addInstr(env, ARMInstr_NBinary(ARMneon_VRSQRTS,
4596                                            res, argL, argR, size, True));
4597             return res;
4598          }
4599 
4600          // These 6 verified 18 Apr 2013
4601          case Iop_InterleaveEvenLanes8x16:
4602          case Iop_InterleaveOddLanes8x16:
4603          case Iop_InterleaveEvenLanes16x8:
4604          case Iop_InterleaveOddLanes16x8:
4605          case Iop_InterleaveEvenLanes32x4:
4606          case Iop_InterleaveOddLanes32x4: {
4607             HReg rD   = newVRegV(env);
4608             HReg rM   = newVRegV(env);
4609             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4610             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4611             UInt size;
4612             Bool resRd;  // is the result in rD or rM ?
4613             switch (e->Iex.Binop.op) {
4614                case Iop_InterleaveOddLanes8x16:  resRd = False; size = 0; break;
4615                case Iop_InterleaveEvenLanes8x16: resRd = True;  size = 0; break;
4616                case Iop_InterleaveOddLanes16x8:  resRd = False; size = 1; break;
4617                case Iop_InterleaveEvenLanes16x8: resRd = True;  size = 1; break;
4618                case Iop_InterleaveOddLanes32x4:  resRd = False; size = 2; break;
4619                case Iop_InterleaveEvenLanes32x4: resRd = True;  size = 2; break;
4620                default: vassert(0);
4621             }
4622             addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, True));
4623             addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, True));
4624             addInstr(env, ARMInstr_NDual(ARMneon_TRN, rD, rM, size, True));
4625             return resRd ? rD : rM;
4626          }
4627 
4628          // These 6 verified 18 Apr 2013
4629          case Iop_InterleaveHI8x16:
4630          case Iop_InterleaveLO8x16:
4631          case Iop_InterleaveHI16x8:
4632          case Iop_InterleaveLO16x8:
4633          case Iop_InterleaveHI32x4:
4634          case Iop_InterleaveLO32x4: {
4635             HReg rD   = newVRegV(env);
4636             HReg rM   = newVRegV(env);
4637             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4638             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4639             UInt size;
4640             Bool resRd;  // is the result in rD or rM ?
4641             switch (e->Iex.Binop.op) {
4642                case Iop_InterleaveHI8x16: resRd = False; size = 0; break;
4643                case Iop_InterleaveLO8x16: resRd = True;  size = 0; break;
4644                case Iop_InterleaveHI16x8: resRd = False; size = 1; break;
4645                case Iop_InterleaveLO16x8: resRd = True;  size = 1; break;
4646                case Iop_InterleaveHI32x4: resRd = False; size = 2; break;
4647                case Iop_InterleaveLO32x4: resRd = True;  size = 2; break;
4648                default: vassert(0);
4649             }
4650             addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, True));
4651             addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, True));
4652             addInstr(env, ARMInstr_NDual(ARMneon_ZIP, rD, rM, size, True));
4653             return resRd ? rD : rM;
4654          }
4655 
4656          // These 6 verified 18 Apr 2013
4657          case Iop_CatOddLanes8x16:
4658          case Iop_CatEvenLanes8x16:
4659          case Iop_CatOddLanes16x8:
4660          case Iop_CatEvenLanes16x8:
4661          case Iop_CatOddLanes32x4:
4662          case Iop_CatEvenLanes32x4: {
4663             HReg rD   = newVRegV(env);
4664             HReg rM   = newVRegV(env);
4665             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4666             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4667             UInt size;
4668             Bool resRd;  // is the result in rD or rM ?
4669             switch (e->Iex.Binop.op) {
4670                case Iop_CatOddLanes8x16:  resRd = False; size = 0; break;
4671                case Iop_CatEvenLanes8x16: resRd = True;  size = 0; break;
4672                case Iop_CatOddLanes16x8:  resRd = False; size = 1; break;
4673                case Iop_CatEvenLanes16x8: resRd = True;  size = 1; break;
4674                case Iop_CatOddLanes32x4:  resRd = False; size = 2; break;
4675                case Iop_CatEvenLanes32x4: resRd = True;  size = 2; break;
4676                default: vassert(0);
4677             }
4678             addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, True));
4679             addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, True));
4680             addInstr(env, ARMInstr_NDual(ARMneon_UZP, rD, rM, size, True));
4681             return resRd ? rD : rM;
4682          }
4683 
4684          case Iop_QAdd8Ux16:
4685          case Iop_QAdd16Ux8:
4686          case Iop_QAdd32Ux4:
4687          case Iop_QAdd64Ux2: {
4688             HReg res = newVRegV(env);
4689             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4690             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4691             UInt size;
4692             switch (e->Iex.Binop.op) {
4693                case Iop_QAdd8Ux16: size = 0; break;
4694                case Iop_QAdd16Ux8: size = 1; break;
4695                case Iop_QAdd32Ux4: size = 2; break;
4696                case Iop_QAdd64Ux2: size = 3; break;
4697                default:
4698                   ppIROp(e->Iex.Binop.op);
4699                   vpanic("Illegal element size in VQADDU");
4700             }
4701             addInstr(env, ARMInstr_NBinary(ARMneon_VQADDU,
4702                                            res, argL, argR, size, True));
4703             return res;
4704          }
4705          case Iop_QAdd8Sx16:
4706          case Iop_QAdd16Sx8:
4707          case Iop_QAdd32Sx4:
4708          case Iop_QAdd64Sx2: {
4709             HReg res = newVRegV(env);
4710             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4711             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4712             UInt size;
4713             switch (e->Iex.Binop.op) {
4714                case Iop_QAdd8Sx16: size = 0; break;
4715                case Iop_QAdd16Sx8: size = 1; break;
4716                case Iop_QAdd32Sx4: size = 2; break;
4717                case Iop_QAdd64Sx2: size = 3; break;
4718                default:
4719                   ppIROp(e->Iex.Binop.op);
4720                   vpanic("Illegal element size in VQADDS");
4721             }
4722             addInstr(env, ARMInstr_NBinary(ARMneon_VQADDS,
4723                                            res, argL, argR, size, True));
4724             return res;
4725          }
4726          case Iop_Sub8x16:
4727          case Iop_Sub16x8:
4728          case Iop_Sub32x4:
4729          case Iop_Sub64x2: {
4730             HReg res = newVRegV(env);
4731             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4732             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4733             UInt size;
4734             switch (e->Iex.Binop.op) {
4735                case Iop_Sub8x16: size = 0; break;
4736                case Iop_Sub16x8: size = 1; break;
4737                case Iop_Sub32x4: size = 2; break;
4738                case Iop_Sub64x2: size = 3; break;
4739                default:
4740                   ppIROp(e->Iex.Binop.op);
4741                   vpanic("Illegal element size in VSUB");
4742             }
4743             addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
4744                                            res, argL, argR, size, True));
4745             return res;
4746          }
4747          case Iop_QSub8Ux16:
4748          case Iop_QSub16Ux8:
4749          case Iop_QSub32Ux4:
4750          case Iop_QSub64Ux2: {
4751             HReg res = newVRegV(env);
4752             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4753             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4754             UInt size;
4755             switch (e->Iex.Binop.op) {
4756                case Iop_QSub8Ux16: size = 0; break;
4757                case Iop_QSub16Ux8: size = 1; break;
4758                case Iop_QSub32Ux4: size = 2; break;
4759                case Iop_QSub64Ux2: size = 3; break;
4760                default:
4761                   ppIROp(e->Iex.Binop.op);
4762                   vpanic("Illegal element size in VQSUBU");
4763             }
4764             addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBU,
4765                                            res, argL, argR, size, True));
4766             return res;
4767          }
4768          case Iop_QSub8Sx16:
4769          case Iop_QSub16Sx8:
4770          case Iop_QSub32Sx4:
4771          case Iop_QSub64Sx2: {
4772             HReg res = newVRegV(env);
4773             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4774             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4775             UInt size;
4776             switch (e->Iex.Binop.op) {
4777                case Iop_QSub8Sx16: size = 0; break;
4778                case Iop_QSub16Sx8: size = 1; break;
4779                case Iop_QSub32Sx4: size = 2; break;
4780                case Iop_QSub64Sx2: size = 3; break;
4781                default:
4782                   ppIROp(e->Iex.Binop.op);
4783                   vpanic("Illegal element size in VQSUBS");
4784             }
4785             addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBS,
4786                                            res, argL, argR, size, True));
4787             return res;
4788          }
4789          case Iop_Max8Ux16:
4790          case Iop_Max16Ux8:
4791          case Iop_Max32Ux4: {
4792             HReg res = newVRegV(env);
4793             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4794             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4795             UInt size;
4796             switch (e->Iex.Binop.op) {
4797                case Iop_Max8Ux16: size = 0; break;
4798                case Iop_Max16Ux8: size = 1; break;
4799                case Iop_Max32Ux4: size = 2; break;
4800                default: vpanic("Illegal element size in VMAXU");
4801             }
4802             addInstr(env, ARMInstr_NBinary(ARMneon_VMAXU,
4803                                            res, argL, argR, size, True));
4804             return res;
4805          }
4806          case Iop_Max8Sx16:
4807          case Iop_Max16Sx8:
4808          case Iop_Max32Sx4: {
4809             HReg res = newVRegV(env);
4810             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4811             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4812             UInt size;
4813             switch (e->Iex.Binop.op) {
4814                case Iop_Max8Sx16: size = 0; break;
4815                case Iop_Max16Sx8: size = 1; break;
4816                case Iop_Max32Sx4: size = 2; break;
4817                default: vpanic("Illegal element size in VMAXU");
4818             }
4819             addInstr(env, ARMInstr_NBinary(ARMneon_VMAXS,
4820                                            res, argL, argR, size, True));
4821             return res;
4822          }
4823          case Iop_Min8Ux16:
4824          case Iop_Min16Ux8:
4825          case Iop_Min32Ux4: {
4826             HReg res = newVRegV(env);
4827             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4828             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4829             UInt size;
4830             switch (e->Iex.Binop.op) {
4831                case Iop_Min8Ux16: size = 0; break;
4832                case Iop_Min16Ux8: size = 1; break;
4833                case Iop_Min32Ux4: size = 2; break;
4834                default: vpanic("Illegal element size in VMAXU");
4835             }
4836             addInstr(env, ARMInstr_NBinary(ARMneon_VMINU,
4837                                            res, argL, argR, size, True));
4838             return res;
4839          }
4840          case Iop_Min8Sx16:
4841          case Iop_Min16Sx8:
4842          case Iop_Min32Sx4: {
4843             HReg res = newVRegV(env);
4844             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4845             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4846             UInt size;
4847             switch (e->Iex.Binop.op) {
4848                case Iop_Min8Sx16: size = 0; break;
4849                case Iop_Min16Sx8: size = 1; break;
4850                case Iop_Min32Sx4: size = 2; break;
4851                default: vpanic("Illegal element size in VMAXU");
4852             }
4853             addInstr(env, ARMInstr_NBinary(ARMneon_VMINS,
4854                                            res, argL, argR, size, True));
4855             return res;
4856          }
4857          case Iop_Sar8x16:
4858          case Iop_Sar16x8:
4859          case Iop_Sar32x4:
4860          case Iop_Sar64x2: {
4861             HReg res = newVRegV(env);
4862             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4863             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4864             HReg argR2 = newVRegV(env);
4865             HReg zero = newVRegV(env);
4866             UInt size;
4867             switch (e->Iex.Binop.op) {
4868                case Iop_Sar8x16: size = 0; break;
4869                case Iop_Sar16x8: size = 1; break;
4870                case Iop_Sar32x4: size = 2; break;
4871                case Iop_Sar64x2: size = 3; break;
4872                default: vassert(0);
4873             }
4874             addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
4875             addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
4876                                            argR2, zero, argR, size, True));
4877             addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
4878                                           res, argL, argR2, size, True));
4879             return res;
4880          }
4881          case Iop_Sal8x16:
4882          case Iop_Sal16x8:
4883          case Iop_Sal32x4:
4884          case Iop_Sal64x2: {
4885             HReg res = newVRegV(env);
4886             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4887             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4888             UInt size;
4889             switch (e->Iex.Binop.op) {
4890                case Iop_Sal8x16: size = 0; break;
4891                case Iop_Sal16x8: size = 1; break;
4892                case Iop_Sal32x4: size = 2; break;
4893                case Iop_Sal64x2: size = 3; break;
4894                default: vassert(0);
4895             }
4896             addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
4897                                           res, argL, argR, size, True));
4898             return res;
4899          }
4900          case Iop_Shr8x16:
4901          case Iop_Shr16x8:
4902          case Iop_Shr32x4:
4903          case Iop_Shr64x2: {
4904             HReg res = newVRegV(env);
4905             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4906             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4907             HReg argR2 = newVRegV(env);
4908             HReg zero = newVRegV(env);
4909             UInt size;
4910             switch (e->Iex.Binop.op) {
4911                case Iop_Shr8x16: size = 0; break;
4912                case Iop_Shr16x8: size = 1; break;
4913                case Iop_Shr32x4: size = 2; break;
4914                case Iop_Shr64x2: size = 3; break;
4915                default: vassert(0);
4916             }
4917             addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
4918             addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
4919                                            argR2, zero, argR, size, True));
4920             addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4921                                           res, argL, argR2, size, True));
4922             return res;
4923          }
4924          case Iop_Shl8x16:
4925          case Iop_Shl16x8:
4926          case Iop_Shl32x4:
4927          case Iop_Shl64x2: {
4928             HReg res = newVRegV(env);
4929             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4930             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4931             UInt size;
4932             switch (e->Iex.Binop.op) {
4933                case Iop_Shl8x16: size = 0; break;
4934                case Iop_Shl16x8: size = 1; break;
4935                case Iop_Shl32x4: size = 2; break;
4936                case Iop_Shl64x2: size = 3; break;
4937                default: vassert(0);
4938             }
4939             addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4940                                           res, argL, argR, size, True));
4941             return res;
4942          }
4943          case Iop_QShl8x16:
4944          case Iop_QShl16x8:
4945          case Iop_QShl32x4:
4946          case Iop_QShl64x2: {
4947             HReg res = newVRegV(env);
4948             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4949             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4950             UInt size;
4951             switch (e->Iex.Binop.op) {
4952                case Iop_QShl8x16: size = 0; break;
4953                case Iop_QShl16x8: size = 1; break;
4954                case Iop_QShl32x4: size = 2; break;
4955                case Iop_QShl64x2: size = 3; break;
4956                default: vassert(0);
4957             }
4958             addInstr(env, ARMInstr_NShift(ARMneon_VQSHL,
4959                                           res, argL, argR, size, True));
4960             return res;
4961          }
4962          case Iop_QSal8x16:
4963          case Iop_QSal16x8:
4964          case Iop_QSal32x4:
4965          case Iop_QSal64x2: {
4966             HReg res = newVRegV(env);
4967             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4968             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4969             UInt size;
4970             switch (e->Iex.Binop.op) {
4971                case Iop_QSal8x16: size = 0; break;
4972                case Iop_QSal16x8: size = 1; break;
4973                case Iop_QSal32x4: size = 2; break;
4974                case Iop_QSal64x2: size = 3; break;
4975                default: vassert(0);
4976             }
4977             addInstr(env, ARMInstr_NShift(ARMneon_VQSAL,
4978                                           res, argL, argR, size, True));
4979             return res;
4980          }
4981          case Iop_QShlNsatUU8x16:
4982          case Iop_QShlNsatUU16x8:
4983          case Iop_QShlNsatUU32x4:
4984          case Iop_QShlNsatUU64x2: {
4985             HReg res = newVRegV(env);
4986             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4987             UInt size, imm;
4988             if (e->Iex.Binop.arg2->tag != Iex_Const ||
4989                 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
4990                vpanic("ARM target supports Iop_QShlNsatUUAxB with constant "
4991                       "second argument only\n");
4992             }
4993             imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
4994             switch (e->Iex.Binop.op) {
4995                case Iop_QShlNsatUU8x16: size = 8 | imm; break;
4996                case Iop_QShlNsatUU16x8: size = 16 | imm; break;
4997                case Iop_QShlNsatUU32x4: size = 32 | imm; break;
4998                case Iop_QShlNsatUU64x2: size = 64 | imm; break;
4999                default: vassert(0);
5000             }
5001             addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUU,
5002                                           res, argL, size, True));
5003             return res;
5004          }
5005          case Iop_QShlNsatSU8x16:
5006          case Iop_QShlNsatSU16x8:
5007          case Iop_QShlNsatSU32x4:
5008          case Iop_QShlNsatSU64x2: {
5009             HReg res = newVRegV(env);
5010             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5011             UInt size, imm;
5012             if (e->Iex.Binop.arg2->tag != Iex_Const ||
5013                 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
5014                vpanic("ARM target supports Iop_QShlNsatSUAxB with constant "
5015                       "second argument only\n");
5016             }
5017             imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
5018             switch (e->Iex.Binop.op) {
5019                case Iop_QShlNsatSU8x16: size = 8 | imm; break;
5020                case Iop_QShlNsatSU16x8: size = 16 | imm; break;
5021                case Iop_QShlNsatSU32x4: size = 32 | imm; break;
5022                case Iop_QShlNsatSU64x2: size = 64 | imm; break;
5023                default: vassert(0);
5024             }
5025             addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUS,
5026                                           res, argL, size, True));
5027             return res;
5028          }
5029          case Iop_QShlNsatSS8x16:
5030          case Iop_QShlNsatSS16x8:
5031          case Iop_QShlNsatSS32x4:
5032          case Iop_QShlNsatSS64x2: {
5033             HReg res = newVRegV(env);
5034             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5035             UInt size, imm;
5036             if (e->Iex.Binop.arg2->tag != Iex_Const ||
5037                 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
5038                vpanic("ARM target supports Iop_QShlNsatSSAxB with constant "
5039                       "second argument only\n");
5040             }
5041             imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
5042             switch (e->Iex.Binop.op) {
5043                case Iop_QShlNsatSS8x16: size = 8 | imm; break;
5044                case Iop_QShlNsatSS16x8: size = 16 | imm; break;
5045                case Iop_QShlNsatSS32x4: size = 32 | imm; break;
5046                case Iop_QShlNsatSS64x2: size = 64 | imm; break;
5047                default: vassert(0);
5048             }
5049             addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNSS,
5050                                           res, argL, size, True));
5051             return res;
5052          }
5053          case Iop_ShrN8x16:
5054          case Iop_ShrN16x8:
5055          case Iop_ShrN32x4:
5056          case Iop_ShrN64x2: {
5057             HReg res = newVRegV(env);
5058             HReg tmp = newVRegV(env);
5059             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5060             HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
5061             HReg argR2 = newVRegI(env);
5062             UInt size;
5063             switch (e->Iex.Binop.op) {
5064                case Iop_ShrN8x16: size = 0; break;
5065                case Iop_ShrN16x8: size = 1; break;
5066                case Iop_ShrN32x4: size = 2; break;
5067                case Iop_ShrN64x2: size = 3; break;
5068                default: vassert(0);
5069             }
5070             addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
5071             addInstr(env, ARMInstr_NUnary(ARMneon_DUP,
5072                                           tmp, argR2, 0, True));
5073             addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
5074                                           res, argL, tmp, size, True));
5075             return res;
5076          }
5077          case Iop_ShlN8x16:
5078          case Iop_ShlN16x8:
5079          case Iop_ShlN32x4:
5080          case Iop_ShlN64x2: {
5081             HReg res = newVRegV(env);
5082             HReg tmp = newVRegV(env);
5083             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5084             HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
5085             UInt size;
5086             switch (e->Iex.Binop.op) {
5087                case Iop_ShlN8x16: size = 0; break;
5088                case Iop_ShlN16x8: size = 1; break;
5089                case Iop_ShlN32x4: size = 2; break;
5090                case Iop_ShlN64x2: size = 3; break;
5091                default: vassert(0);
5092             }
5093             addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR, 0, True));
5094             addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
5095                                           res, argL, tmp, size, True));
5096             return res;
5097          }
5098          case Iop_SarN8x16:
5099          case Iop_SarN16x8:
5100          case Iop_SarN32x4:
5101          case Iop_SarN64x2: {
5102             HReg res = newVRegV(env);
5103             HReg tmp = newVRegV(env);
5104             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5105             HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
5106             HReg argR2 = newVRegI(env);
5107             UInt size;
5108             switch (e->Iex.Binop.op) {
5109                case Iop_SarN8x16: size = 0; break;
5110                case Iop_SarN16x8: size = 1; break;
5111                case Iop_SarN32x4: size = 2; break;
5112                case Iop_SarN64x2: size = 3; break;
5113                default: vassert(0);
5114             }
5115             addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
5116             addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, True));
5117             addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
5118                                           res, argL, tmp, size, True));
5119             return res;
5120          }
5121          case Iop_CmpGT8Ux16:
5122          case Iop_CmpGT16Ux8:
5123          case Iop_CmpGT32Ux4: {
5124             HReg res = newVRegV(env);
5125             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5126             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5127             UInt size;
5128             switch (e->Iex.Binop.op) {
5129                case Iop_CmpGT8Ux16: size = 0; break;
5130                case Iop_CmpGT16Ux8: size = 1; break;
5131                case Iop_CmpGT32Ux4: size = 2; break;
5132                default: vassert(0);
5133             }
5134             addInstr(env, ARMInstr_NBinary(ARMneon_VCGTU,
5135                                            res, argL, argR, size, True));
5136             return res;
5137          }
5138          case Iop_CmpGT8Sx16:
5139          case Iop_CmpGT16Sx8:
5140          case Iop_CmpGT32Sx4: {
5141             HReg res = newVRegV(env);
5142             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5143             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5144             UInt size;
5145             switch (e->Iex.Binop.op) {
5146                case Iop_CmpGT8Sx16: size = 0; break;
5147                case Iop_CmpGT16Sx8: size = 1; break;
5148                case Iop_CmpGT32Sx4: size = 2; break;
5149                default: vassert(0);
5150             }
5151             addInstr(env, ARMInstr_NBinary(ARMneon_VCGTS,
5152                                            res, argL, argR, size, True));
5153             return res;
5154          }
5155          case Iop_CmpEQ8x16:
5156          case Iop_CmpEQ16x8:
5157          case Iop_CmpEQ32x4: {
5158             HReg res = newVRegV(env);
5159             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5160             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5161             UInt size;
5162             switch (e->Iex.Binop.op) {
5163                case Iop_CmpEQ8x16: size = 0; break;
5164                case Iop_CmpEQ16x8: size = 1; break;
5165                case Iop_CmpEQ32x4: size = 2; break;
5166                default: vassert(0);
5167             }
5168             addInstr(env, ARMInstr_NBinary(ARMneon_VCEQ,
5169                                            res, argL, argR, size, True));
5170             return res;
5171          }
5172          case Iop_Mul8x16:
5173          case Iop_Mul16x8:
5174          case Iop_Mul32x4: {
5175             HReg res = newVRegV(env);
5176             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5177             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5178             UInt size = 0;
5179             switch(e->Iex.Binop.op) {
5180                case Iop_Mul8x16: size = 0; break;
5181                case Iop_Mul16x8: size = 1; break;
5182                case Iop_Mul32x4: size = 2; break;
5183                default: vassert(0);
5184             }
5185             addInstr(env, ARMInstr_NBinary(ARMneon_VMUL,
5186                                            res, argL, argR, size, True));
5187             return res;
5188          }
5189          case Iop_Mull8Ux8:
5190          case Iop_Mull16Ux4:
5191          case Iop_Mull32Ux2: {
5192             HReg res = newVRegV(env);
5193             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5194             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5195             UInt size = 0;
5196             switch(e->Iex.Binop.op) {
5197                case Iop_Mull8Ux8: size = 0; break;
5198                case Iop_Mull16Ux4: size = 1; break;
5199                case Iop_Mull32Ux2: size = 2; break;
5200                default: vassert(0);
5201             }
5202             addInstr(env, ARMInstr_NBinary(ARMneon_VMULLU,
5203                                            res, argL, argR, size, True));
5204             return res;
5205          }
5206 
5207          case Iop_Mull8Sx8:
5208          case Iop_Mull16Sx4:
5209          case Iop_Mull32Sx2: {
5210             HReg res = newVRegV(env);
5211             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5212             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5213             UInt size = 0;
5214             switch(e->Iex.Binop.op) {
5215                case Iop_Mull8Sx8: size = 0; break;
5216                case Iop_Mull16Sx4: size = 1; break;
5217                case Iop_Mull32Sx2: size = 2; break;
5218                default: vassert(0);
5219             }
5220             addInstr(env, ARMInstr_NBinary(ARMneon_VMULLS,
5221                                            res, argL, argR, size, True));
5222             return res;
5223          }
5224 
5225          case Iop_QDMulHi16Sx8:
5226          case Iop_QDMulHi32Sx4: {
5227             HReg res = newVRegV(env);
5228             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5229             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5230             UInt size = 0;
5231             switch(e->Iex.Binop.op) {
5232                case Iop_QDMulHi16Sx8: size = 1; break;
5233                case Iop_QDMulHi32Sx4: size = 2; break;
5234                default: vassert(0);
5235             }
5236             addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULH,
5237                                            res, argL, argR, size, True));
5238             return res;
5239          }
5240 
5241          case Iop_QRDMulHi16Sx8:
5242          case Iop_QRDMulHi32Sx4: {
5243             HReg res = newVRegV(env);
5244             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5245             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5246             UInt size = 0;
5247             switch(e->Iex.Binop.op) {
5248                case Iop_QRDMulHi16Sx8: size = 1; break;
5249                case Iop_QRDMulHi32Sx4: size = 2; break;
5250                default: vassert(0);
5251             }
5252             addInstr(env, ARMInstr_NBinary(ARMneon_VQRDMULH,
5253                                            res, argL, argR, size, True));
5254             return res;
5255          }
5256 
5257          case Iop_QDMull16Sx4:
5258          case Iop_QDMull32Sx2: {
5259             HReg res = newVRegV(env);
5260             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5261             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5262             UInt size = 0;
5263             switch(e->Iex.Binop.op) {
5264                case Iop_QDMull16Sx4: size = 1; break;
5265                case Iop_QDMull32Sx2: size = 2; break;
5266                default: vassert(0);
5267             }
5268             addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULL,
5269                                            res, argL, argR, size, True));
5270             return res;
5271          }
5272          case Iop_PolynomialMul8x16: {
5273             HReg res = newVRegV(env);
5274             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5275             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5276             UInt size = 0;
5277             addInstr(env, ARMInstr_NBinary(ARMneon_VMULP,
5278                                            res, argL, argR, size, True));
5279             return res;
5280          }
5281          case Iop_Max32Fx4: {
5282             HReg res = newVRegV(env);
5283             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5284             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5285             addInstr(env, ARMInstr_NBinary(ARMneon_VMAXF,
5286                                            res, argL, argR, 2, True));
5287             return res;
5288          }
5289          case Iop_Min32Fx4: {
5290             HReg res = newVRegV(env);
5291             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5292             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5293             addInstr(env, ARMInstr_NBinary(ARMneon_VMINF,
5294                                            res, argL, argR, 2, True));
5295             return res;
5296          }
5297          case Iop_PwMax32Fx4: {
5298             HReg res = newVRegV(env);
5299             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5300             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5301             addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXF,
5302                                            res, argL, argR, 2, True));
5303             return res;
5304          }
5305          case Iop_PwMin32Fx4: {
5306             HReg res = newVRegV(env);
5307             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5308             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5309             addInstr(env, ARMInstr_NBinary(ARMneon_VPMINF,
5310                                            res, argL, argR, 2, True));
5311             return res;
5312          }
5313          case Iop_CmpGT32Fx4: {
5314             HReg res = newVRegV(env);
5315             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5316             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5317             addInstr(env, ARMInstr_NBinary(ARMneon_VCGTF,
5318                                            res, argL, argR, 2, True));
5319             return res;
5320          }
5321          case Iop_CmpGE32Fx4: {
5322             HReg res = newVRegV(env);
5323             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5324             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5325             addInstr(env, ARMInstr_NBinary(ARMneon_VCGEF,
5326                                            res, argL, argR, 2, True));
5327             return res;
5328          }
5329          case Iop_CmpEQ32Fx4: {
5330             HReg res = newVRegV(env);
5331             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5332             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5333             addInstr(env, ARMInstr_NBinary(ARMneon_VCEQF,
5334                                            res, argL, argR, 2, True));
5335             return res;
5336          }
5337 
5338          case Iop_PolynomialMull8x8: {
5339             HReg res = newVRegV(env);
5340             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5341             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5342             UInt size = 0;
5343             addInstr(env, ARMInstr_NBinary(ARMneon_VMULLP,
5344                                            res, argL, argR, size, True));
5345             return res;
5346          }
5347          case Iop_F32ToFixed32Ux4_RZ:
5348          case Iop_F32ToFixed32Sx4_RZ:
5349          case Iop_Fixed32UToF32x4_RN:
5350          case Iop_Fixed32SToF32x4_RN: {
5351             HReg res = newVRegV(env);
5352             HReg arg = iselNeonExpr(env, e->Iex.Binop.arg1);
5353             ARMNeonUnOp op;
5354             UInt imm6;
5355             if (e->Iex.Binop.arg2->tag != Iex_Const ||
5356                typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
5357                   vpanic("ARM supports FP <-> Fixed conversion with constant "
5358                          "second argument less than 33 only\n");
5359             }
5360             imm6 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
5361             vassert(imm6 <= 32 && imm6 > 0);
5362             imm6 = 64 - imm6;
5363             switch(e->Iex.Binop.op) {
5364                case Iop_F32ToFixed32Ux4_RZ: op = ARMneon_VCVTFtoFixedU; break;
5365                case Iop_F32ToFixed32Sx4_RZ: op = ARMneon_VCVTFtoFixedS; break;
5366                case Iop_Fixed32UToF32x4_RN: op = ARMneon_VCVTFixedUtoF; break;
5367                case Iop_Fixed32SToF32x4_RN: op = ARMneon_VCVTFixedStoF; break;
5368                default: vassert(0);
5369             }
5370             addInstr(env, ARMInstr_NUnary(op, res, arg, imm6, True));
5371             return res;
5372          }
5373          /*
5374          FIXME remove if not used
5375          case Iop_VDup8x16:
5376          case Iop_VDup16x8:
5377          case Iop_VDup32x4: {
5378             HReg res = newVRegV(env);
5379             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5380             UInt imm4;
5381             UInt index;
5382             if (e->Iex.Binop.arg2->tag != Iex_Const ||
5383                typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
5384                   vpanic("ARM supports Iop_VDup with constant "
5385                          "second argument less than 16 only\n");
5386             }
5387             index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
5388             switch(e->Iex.Binop.op) {
5389                case Iop_VDup8x16: imm4 = (index << 1) + 1; break;
5390                case Iop_VDup16x8: imm4 = (index << 2) + 2; break;
5391                case Iop_VDup32x4: imm4 = (index << 3) + 4; break;
5392                default: vassert(0);
5393             }
5394             if (imm4 >= 16) {
5395                vpanic("ARM supports Iop_VDup with constant "
5396                       "second argument less than 16 only\n");
5397             }
5398             addInstr(env, ARMInstr_NUnary(ARMneon_VDUP,
5399                                           res, argL, imm4, True));
5400             return res;
5401          }
5402          */
5403          case Iop_PwAdd8x16:
5404          case Iop_PwAdd16x8:
5405          case Iop_PwAdd32x4: {
5406             HReg res = newVRegV(env);
5407             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5408             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5409             UInt size = 0;
5410             switch(e->Iex.Binop.op) {
5411                case Iop_PwAdd8x16: size = 0; break;
5412                case Iop_PwAdd16x8: size = 1; break;
5413                case Iop_PwAdd32x4: size = 2; break;
5414                default: vassert(0);
5415             }
5416             addInstr(env, ARMInstr_NBinary(ARMneon_VPADD,
5417                                            res, argL, argR, size, True));
5418             return res;
5419          }
5420          /* ... */
5421          default:
5422             break;
5423       }
5424    }
5425 
5426    if (e->tag == Iex_Triop) {
5427       IRTriop *triop = e->Iex.Triop.details;
5428 
5429       switch (triop->op) {
5430          case Iop_SliceV128: {
5431             HReg res = newVRegV(env);
5432             HReg argL = iselNeonExpr(env, triop->arg2);
5433             HReg argR = iselNeonExpr(env, triop->arg1);
5434             UInt imm4;
5435             if (triop->arg3->tag != Iex_Const ||
5436                 typeOfIRExpr(env->type_env, triop->arg3) != Ity_I8) {
5437                vpanic("ARM target supports Iop_ExtractV128 with constant "
5438                       "third argument less than 16 only\n");
5439             }
5440             imm4 = triop->arg3->Iex.Const.con->Ico.U8;
5441             if (imm4 >= 16) {
5442                vpanic("ARM target supports Iop_ExtractV128 with constant "
5443                       "third argument less than 16 only\n");
5444             }
5445             addInstr(env, ARMInstr_NBinary(ARMneon_VEXT,
5446                                            res, argL, argR, imm4, True));
5447             return res;
5448          }
5449          case Iop_Mul32Fx4:
5450          case Iop_Sub32Fx4:
5451          case Iop_Add32Fx4: {
5452             HReg res = newVRegV(env);
5453             HReg argL = iselNeonExpr(env, triop->arg2);
5454             HReg argR = iselNeonExpr(env, triop->arg3);
5455             UInt size = 0;
5456             ARMNeonBinOp op = ARMneon_INVALID;
5457             switch (triop->op) {
5458                case Iop_Mul32Fx4: op = ARMneon_VMULFP; break;
5459                case Iop_Sub32Fx4: op = ARMneon_VSUBFP; break;
5460                case Iop_Add32Fx4: op = ARMneon_VADDFP; break;
5461                default: vassert(0);
5462             }
5463             addInstr(env, ARMInstr_NBinary(op, res, argL, argR, size, True));
5464             return res;
5465          }
5466          default:
5467             break;
5468       }
5469    }
5470 
5471    if (e->tag == Iex_ITE) { // VFD
5472       ARMCondCode cc;
5473       HReg r1  = iselNeonExpr(env, e->Iex.ITE.iftrue);
5474       HReg r0  = iselNeonExpr(env, e->Iex.ITE.iffalse);
5475       HReg dst = newVRegV(env);
5476       addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, r1, 4, True));
5477       cc = iselCondCode(env, e->Iex.ITE.cond);
5478       addInstr(env, ARMInstr_NCMovQ(cc ^ 1, dst, r0));
5479       return dst;
5480    }
5481 
5482   /* neon_expr_bad: */
5483    ppIRExpr(e);
5484    vpanic("iselNeonExpr_wrk");
5485 }
5486 
5487 /*---------------------------------------------------------*/
5488 /*--- ISEL: Floating point expressions (64 bit)         ---*/
5489 /*---------------------------------------------------------*/
5490 
5491 /* Compute a 64-bit floating point value into a register, the identity
5492    of which is returned.  As with iselIntExpr_R, the reg may be either
5493    real or virtual; in any case it must not be changed by subsequent
5494    code emitted by the caller.  */
5495 
iselDblExpr(ISelEnv * env,IRExpr * e)5496 static HReg iselDblExpr ( ISelEnv* env, IRExpr* e )
5497 {
5498    HReg r = iselDblExpr_wrk( env, e );
5499 #  if 0
5500    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
5501 #  endif
5502    vassert(hregClass(r) == HRcFlt64);
5503    vassert(hregIsVirtual(r));
5504    return r;
5505 }
5506 
5507 /* DO NOT CALL THIS DIRECTLY */
iselDblExpr_wrk(ISelEnv * env,IRExpr * e)5508 static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
5509 {
5510    IRType ty = typeOfIRExpr(env->type_env,e);
5511    vassert(e);
5512    vassert(ty == Ity_F64);
5513 
5514    if (e->tag == Iex_RdTmp) {
5515       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
5516    }
5517 
5518    if (e->tag == Iex_Const) {
5519       /* Just handle the zero case. */
5520       IRConst* con = e->Iex.Const.con;
5521       if (con->tag == Ico_F64i && con->Ico.F64i == 0ULL) {
5522          HReg z32 = newVRegI(env);
5523          HReg dst = newVRegD(env);
5524          addInstr(env, ARMInstr_Imm32(z32, 0));
5525          addInstr(env, ARMInstr_VXferD(True/*toD*/, dst, z32, z32));
5526          return dst;
5527       }
5528    }
5529 
5530    if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
5531       ARMAModeV* am;
5532       HReg res = newVRegD(env);
5533       vassert(e->Iex.Load.ty == Ity_F64);
5534       am = iselIntExpr_AModeV(env, e->Iex.Load.addr);
5535       addInstr(env, ARMInstr_VLdStD(True/*isLoad*/, res, am));
5536       return res;
5537    }
5538 
5539    if (e->tag == Iex_Get) {
5540       // XXX This won't work if offset > 1020 or is not 0 % 4.
5541       // In which case we'll have to generate more longwinded code.
5542       ARMAModeV* am  = mkARMAModeV(hregARM_R8(), e->Iex.Get.offset);
5543       HReg       res = newVRegD(env);
5544       addInstr(env, ARMInstr_VLdStD(True/*isLoad*/, res, am));
5545       return res;
5546    }
5547 
5548    if (e->tag == Iex_Unop) {
5549       switch (e->Iex.Unop.op) {
5550          case Iop_ReinterpI64asF64: {
5551             if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
5552                return iselNeon64Expr(env, e->Iex.Unop.arg);
5553             } else {
5554                HReg srcHi, srcLo;
5555                HReg dst = newVRegD(env);
5556                iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg);
5557                addInstr(env, ARMInstr_VXferD(True/*toD*/, dst, srcHi, srcLo));
5558                return dst;
5559             }
5560          }
5561          case Iop_NegF64: {
5562             HReg src = iselDblExpr(env, e->Iex.Unop.arg);
5563             HReg dst = newVRegD(env);
5564             addInstr(env, ARMInstr_VUnaryD(ARMvfpu_NEG, dst, src));
5565             return dst;
5566          }
5567          case Iop_AbsF64: {
5568             HReg src = iselDblExpr(env, e->Iex.Unop.arg);
5569             HReg dst = newVRegD(env);
5570             addInstr(env, ARMInstr_VUnaryD(ARMvfpu_ABS, dst, src));
5571             return dst;
5572          }
5573          case Iop_F32toF64: {
5574             HReg src = iselFltExpr(env, e->Iex.Unop.arg);
5575             HReg dst = newVRegD(env);
5576             addInstr(env, ARMInstr_VCvtSD(True/*sToD*/, dst, src));
5577             return dst;
5578          }
5579          case Iop_I32UtoF64:
5580          case Iop_I32StoF64: {
5581             HReg src   = iselIntExpr_R(env, e->Iex.Unop.arg);
5582             HReg f32   = newVRegF(env);
5583             HReg dst   = newVRegD(env);
5584             Bool syned = e->Iex.Unop.op == Iop_I32StoF64;
5585             /* VMOV f32, src */
5586             addInstr(env, ARMInstr_VXferS(True/*toS*/, f32, src));
5587             /* FSITOD dst, f32 */
5588             addInstr(env, ARMInstr_VCvtID(True/*iToD*/, syned,
5589                                           dst, f32));
5590             return dst;
5591          }
5592          default:
5593             break;
5594       }
5595    }
5596 
5597    if (e->tag == Iex_Binop) {
5598       switch (e->Iex.Binop.op) {
5599          case Iop_SqrtF64: {
5600             /* first arg is rounding mode; we ignore it. */
5601             HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
5602             HReg dst = newVRegD(env);
5603             addInstr(env, ARMInstr_VUnaryD(ARMvfpu_SQRT, dst, src));
5604             return dst;
5605          }
5606          case Iop_RoundF64toInt: {
5607             /* We can only generate this on a >= V8 capable target.  But
5608                that's OK since we should only be asked to generate for V8
5609                capable guests, and we assume here that host == guest. */
5610             if (VEX_ARM_ARCHLEVEL(env->hwcaps) >= 8) {
5611                HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
5612                HReg dst = newVRegD(env);
5613                set_VFP_rounding_mode(env, e->Iex.Binop.arg1);
5614                addInstr(env, ARMInstr_VRIntR(True/*isF64*/, dst, src));
5615                set_VFP_rounding_default(env);
5616                return dst;
5617             }
5618             /* not a V8 target, so we can't select insns for this. */
5619             break;
5620          }
5621          case Iop_MaxNumF64:
5622          case Iop_MinNumF64: {
5623             /* Same comments regarding V8 support as for Iop_RoundF64toInt. */
5624             if (VEX_ARM_ARCHLEVEL(env->hwcaps) >= 8) {
5625                HReg srcL  = iselDblExpr(env, e->Iex.Binop.arg1);
5626                HReg srcR  = iselDblExpr(env, e->Iex.Binop.arg2);
5627                HReg dst   = newVRegD(env);
5628                Bool isMax = e->Iex.Binop.op == Iop_MaxNumF64;
5629                addInstr(env, ARMInstr_VMinMaxNum(
5630                                 True/*isF64*/, isMax, dst, srcL, srcR));
5631                return dst;
5632             }
5633             /* not a V8 target, so we can't select insns for this. */
5634             break;
5635          }
5636          default:
5637             break;
5638       }
5639    }
5640 
5641    if (e->tag == Iex_Triop) {
5642       IRTriop *triop = e->Iex.Triop.details;
5643 
5644       switch (triop->op) {
5645          case Iop_DivF64:
5646          case Iop_MulF64:
5647          case Iop_AddF64:
5648          case Iop_SubF64: {
5649             ARMVfpOp op = 0; /*INVALID*/
5650             HReg argL = iselDblExpr(env, triop->arg2);
5651             HReg argR = iselDblExpr(env, triop->arg3);
5652             HReg dst  = newVRegD(env);
5653             switch (triop->op) {
5654                case Iop_DivF64: op = ARMvfp_DIV; break;
5655                case Iop_MulF64: op = ARMvfp_MUL; break;
5656                case Iop_AddF64: op = ARMvfp_ADD; break;
5657                case Iop_SubF64: op = ARMvfp_SUB; break;
5658                default: vassert(0);
5659             }
5660             addInstr(env, ARMInstr_VAluD(op, dst, argL, argR));
5661             return dst;
5662          }
5663          default:
5664             break;
5665       }
5666    }
5667 
5668    if (e->tag == Iex_ITE) { // VFD
5669       if (ty == Ity_F64
5670           && typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) {
5671          HReg r1  = iselDblExpr(env, e->Iex.ITE.iftrue);
5672          HReg r0  = iselDblExpr(env, e->Iex.ITE.iffalse);
5673          HReg dst = newVRegD(env);
5674          addInstr(env, ARMInstr_VUnaryD(ARMvfpu_COPY, dst, r1));
5675          ARMCondCode cc = iselCondCode(env, e->Iex.ITE.cond);
5676          addInstr(env, ARMInstr_VCMovD(cc ^ 1, dst, r0));
5677          return dst;
5678       }
5679    }
5680 
5681    ppIRExpr(e);
5682    vpanic("iselDblExpr_wrk");
5683 }
5684 
5685 
5686 /*---------------------------------------------------------*/
5687 /*--- ISEL: Floating point expressions (32 bit)         ---*/
5688 /*---------------------------------------------------------*/
5689 
5690 /* Compute a 32-bit floating point value into a register, the identity
5691    of which is returned.  As with iselIntExpr_R, the reg may be either
5692    real or virtual; in any case it must not be changed by subsequent
5693    code emitted by the caller.  */
5694 
iselFltExpr(ISelEnv * env,IRExpr * e)5695 static HReg iselFltExpr ( ISelEnv* env, IRExpr* e )
5696 {
5697    HReg r = iselFltExpr_wrk( env, e );
5698 #  if 0
5699    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
5700 #  endif
5701    vassert(hregClass(r) == HRcFlt32);
5702    vassert(hregIsVirtual(r));
5703    return r;
5704 }
5705 
5706 /* DO NOT CALL THIS DIRECTLY */
iselFltExpr_wrk(ISelEnv * env,IRExpr * e)5707 static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e )
5708 {
5709    IRType ty = typeOfIRExpr(env->type_env,e);
5710    vassert(e);
5711    vassert(ty == Ity_F32);
5712 
5713    if (e->tag == Iex_RdTmp) {
5714       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
5715    }
5716 
5717    if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
5718       ARMAModeV* am;
5719       HReg res = newVRegF(env);
5720       vassert(e->Iex.Load.ty == Ity_F32);
5721       am = iselIntExpr_AModeV(env, e->Iex.Load.addr);
5722       addInstr(env, ARMInstr_VLdStS(True/*isLoad*/, res, am));
5723       return res;
5724    }
5725 
5726    if (e->tag == Iex_Get) {
5727       // XXX This won't work if offset > 1020 or is not 0 % 4.
5728       // In which case we'll have to generate more longwinded code.
5729       ARMAModeV* am  = mkARMAModeV(hregARM_R8(), e->Iex.Get.offset);
5730       HReg       res = newVRegF(env);
5731       addInstr(env, ARMInstr_VLdStS(True/*isLoad*/, res, am));
5732       return res;
5733    }
5734 
5735    if (e->tag == Iex_Unop) {
5736       switch (e->Iex.Unop.op) {
5737          case Iop_ReinterpI32asF32: {
5738             HReg dst = newVRegF(env);
5739             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
5740             addInstr(env, ARMInstr_VXferS(True/*toS*/, dst, src));
5741             return dst;
5742          }
5743          case Iop_NegF32: {
5744             HReg src = iselFltExpr(env, e->Iex.Unop.arg);
5745             HReg dst = newVRegF(env);
5746             addInstr(env, ARMInstr_VUnaryS(ARMvfpu_NEG, dst, src));
5747             return dst;
5748          }
5749          case Iop_AbsF32: {
5750             HReg src = iselFltExpr(env, e->Iex.Unop.arg);
5751             HReg dst = newVRegF(env);
5752             addInstr(env, ARMInstr_VUnaryS(ARMvfpu_ABS, dst, src));
5753             return dst;
5754          }
5755          default:
5756             break;
5757       }
5758    }
5759 
5760    if (e->tag == Iex_Binop) {
5761       switch (e->Iex.Binop.op) {
5762          case Iop_SqrtF32: {
5763             /* first arg is rounding mode; we ignore it. */
5764             HReg src = iselFltExpr(env, e->Iex.Binop.arg2);
5765             HReg dst = newVRegF(env);
5766             addInstr(env, ARMInstr_VUnaryS(ARMvfpu_SQRT, dst, src));
5767             return dst;
5768          }
5769          case Iop_F64toF32: {
5770             HReg valD = iselDblExpr(env, e->Iex.Binop.arg2);
5771             set_VFP_rounding_mode(env, e->Iex.Binop.arg1);
5772             HReg valS = newVRegF(env);
5773             /* FCVTSD valS, valD */
5774             addInstr(env, ARMInstr_VCvtSD(False/*!sToD*/, valS, valD));
5775             set_VFP_rounding_default(env);
5776             return valS;
5777          }
5778          case Iop_RoundF32toInt: {
5779             /* We can only generate this on a >= V8 capable target.  But
5780                that's OK since we should only be asked to generate for V8
5781                capable guests, and we assume here that host == guest. */
5782             if (VEX_ARM_ARCHLEVEL(env->hwcaps) >= 8) {
5783                HReg src = iselFltExpr(env, e->Iex.Binop.arg2);
5784                HReg dst = newVRegF(env);
5785                set_VFP_rounding_mode(env, e->Iex.Binop.arg1);
5786                addInstr(env, ARMInstr_VRIntR(False/*!isF64*/, dst, src));
5787                set_VFP_rounding_default(env);
5788                return dst;
5789             }
5790             /* not a V8 target, so we can't select insns for this. */
5791             break;
5792          }
5793          case Iop_MaxNumF32:
5794          case Iop_MinNumF32: {
5795             /* Same comments regarding V8 support as for Iop_RoundF32toInt. */
5796             if (VEX_ARM_ARCHLEVEL(env->hwcaps) >= 8) {
5797                HReg srcL  = iselFltExpr(env, e->Iex.Binop.arg1);
5798                HReg srcR  = iselFltExpr(env, e->Iex.Binop.arg2);
5799                HReg dst   = newVRegF(env);
5800                Bool isMax = e->Iex.Binop.op == Iop_MaxNumF32;
5801                addInstr(env, ARMInstr_VMinMaxNum(
5802                                 False/*!isF64*/, isMax, dst, srcL, srcR));
5803                return dst;
5804             }
5805             /* not a V8 target, so we can't select insns for this. */
5806             break;
5807          }
5808          default:
5809             break;
5810       }
5811    }
5812 
5813    if (e->tag == Iex_Triop) {
5814       IRTriop *triop = e->Iex.Triop.details;
5815 
5816       switch (triop->op) {
5817          case Iop_DivF32:
5818          case Iop_MulF32:
5819          case Iop_AddF32:
5820          case Iop_SubF32: {
5821             ARMVfpOp op = 0; /*INVALID*/
5822             HReg argL = iselFltExpr(env, triop->arg2);
5823             HReg argR = iselFltExpr(env, triop->arg3);
5824             HReg dst  = newVRegF(env);
5825             switch (triop->op) {
5826                case Iop_DivF32: op = ARMvfp_DIV; break;
5827                case Iop_MulF32: op = ARMvfp_MUL; break;
5828                case Iop_AddF32: op = ARMvfp_ADD; break;
5829                case Iop_SubF32: op = ARMvfp_SUB; break;
5830                default: vassert(0);
5831             }
5832             addInstr(env, ARMInstr_VAluS(op, dst, argL, argR));
5833             return dst;
5834          }
5835          default:
5836             break;
5837       }
5838    }
5839 
5840    if (e->tag == Iex_ITE) { // VFD
5841       if (ty == Ity_F32
5842           && typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) {
5843          ARMCondCode cc;
5844          HReg r1  = iselFltExpr(env, e->Iex.ITE.iftrue);
5845          HReg r0  = iselFltExpr(env, e->Iex.ITE.iffalse);
5846          HReg dst = newVRegF(env);
5847          addInstr(env, ARMInstr_VUnaryS(ARMvfpu_COPY, dst, r1));
5848          cc = iselCondCode(env, e->Iex.ITE.cond);
5849          addInstr(env, ARMInstr_VCMovS(cc ^ 1, dst, r0));
5850          return dst;
5851       }
5852    }
5853 
5854    ppIRExpr(e);
5855    vpanic("iselFltExpr_wrk");
5856 }
5857 
5858 
5859 /*---------------------------------------------------------*/
5860 /*--- ISEL: Statements                                  ---*/
5861 /*---------------------------------------------------------*/
5862 
iselStmt(ISelEnv * env,IRStmt * stmt)5863 static void iselStmt ( ISelEnv* env, IRStmt* stmt )
5864 {
5865    if (vex_traceflags & VEX_TRACE_VCODE) {
5866       vex_printf("\n-- ");
5867       ppIRStmt(stmt);
5868       vex_printf("\n");
5869    }
5870    switch (stmt->tag) {
5871 
5872    /* --------- STORE --------- */
5873    /* little-endian write to memory */
5874    case Ist_Store: {
5875       IRType    tya  = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
5876       IRType    tyd  = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
5877       IREndness end  = stmt->Ist.Store.end;
5878 
5879       if (tya != Ity_I32 || end != Iend_LE)
5880          goto stmt_fail;
5881 
5882       if (tyd == Ity_I32) {
5883          HReg       rD = iselIntExpr_R(env, stmt->Ist.Store.data);
5884          ARMAMode1* am = iselIntExpr_AMode1(env, stmt->Ist.Store.addr);
5885          addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!isLoad*/, rD, am));
5886          return;
5887       }
5888       if (tyd == Ity_I16) {
5889          HReg       rD = iselIntExpr_R(env, stmt->Ist.Store.data);
5890          ARMAMode2* am = iselIntExpr_AMode2(env, stmt->Ist.Store.addr);
5891          addInstr(env, ARMInstr_LdSt16(ARMcc_AL,
5892                                        False/*!isLoad*/,
5893                                        False/*!isSignedLoad*/, rD, am));
5894          return;
5895       }
5896       if (tyd == Ity_I8) {
5897          HReg       rD = iselIntExpr_R(env, stmt->Ist.Store.data);
5898          ARMAMode1* am = iselIntExpr_AMode1(env, stmt->Ist.Store.addr);
5899          addInstr(env, ARMInstr_LdSt8U(ARMcc_AL, False/*!isLoad*/, rD, am));
5900          return;
5901       }
5902       if (tyd == Ity_I64) {
5903          if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
5904             HReg dD = iselNeon64Expr(env, stmt->Ist.Store.data);
5905             ARMAModeN* am = iselIntExpr_AModeN(env, stmt->Ist.Store.addr);
5906             addInstr(env, ARMInstr_NLdStD(False, dD, am));
5907          } else {
5908             HReg rDhi, rDlo, rA;
5909             iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.Store.data);
5910             rA = iselIntExpr_R(env, stmt->Ist.Store.addr);
5911             addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!load*/, rDhi,
5912                                           ARMAMode1_RI(rA,4)));
5913             addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!load*/, rDlo,
5914                                           ARMAMode1_RI(rA,0)));
5915          }
5916          return;
5917       }
5918       if (tyd == Ity_F64) {
5919          HReg       dD = iselDblExpr(env, stmt->Ist.Store.data);
5920          ARMAModeV* am = iselIntExpr_AModeV(env, stmt->Ist.Store.addr);
5921          addInstr(env, ARMInstr_VLdStD(False/*!isLoad*/, dD, am));
5922          return;
5923       }
5924       if (tyd == Ity_F32) {
5925          HReg       fD = iselFltExpr(env, stmt->Ist.Store.data);
5926          ARMAModeV* am = iselIntExpr_AModeV(env, stmt->Ist.Store.addr);
5927          addInstr(env, ARMInstr_VLdStS(False/*!isLoad*/, fD, am));
5928          return;
5929       }
5930       if (tyd == Ity_V128) {
5931          HReg       qD = iselNeonExpr(env, stmt->Ist.Store.data);
5932          ARMAModeN* am = iselIntExpr_AModeN(env, stmt->Ist.Store.addr);
5933          addInstr(env, ARMInstr_NLdStQ(False, qD, am));
5934          return;
5935       }
5936 
5937       break;
5938    }
5939 
5940    /* --------- CONDITIONAL STORE --------- */
5941    /* conditional little-endian write to memory */
5942    case Ist_StoreG: {
5943       IRStoreG* sg   = stmt->Ist.StoreG.details;
5944       IRType    tya  = typeOfIRExpr(env->type_env, sg->addr);
5945       IRType    tyd  = typeOfIRExpr(env->type_env, sg->data);
5946       IREndness end  = sg->end;
5947 
5948       if (tya != Ity_I32 || end != Iend_LE)
5949          goto stmt_fail;
5950 
5951       switch (tyd) {
5952          case Ity_I8:
5953          case Ity_I32: {
5954             HReg        rD = iselIntExpr_R(env, sg->data);
5955             ARMAMode1*  am = iselIntExpr_AMode1(env, sg->addr);
5956             ARMCondCode cc = iselCondCode(env, sg->guard);
5957             addInstr(env, (tyd == Ity_I32 ? ARMInstr_LdSt32 : ARMInstr_LdSt8U)
5958                              (cc, False/*!isLoad*/, rD, am));
5959             return;
5960          }
5961          case Ity_I16: {
5962             HReg        rD = iselIntExpr_R(env, sg->data);
5963             ARMAMode2*  am = iselIntExpr_AMode2(env, sg->addr);
5964             ARMCondCode cc = iselCondCode(env, sg->guard);
5965             addInstr(env, ARMInstr_LdSt16(cc,
5966                                           False/*!isLoad*/,
5967                                           False/*!isSignedLoad*/, rD, am));
5968             return;
5969          }
5970          default:
5971             break;
5972       }
5973       break;
5974    }
5975 
5976    /* --------- CONDITIONAL LOAD --------- */
5977    /* conditional little-endian load from memory */
5978    case Ist_LoadG: {
5979       IRLoadG*  lg   = stmt->Ist.LoadG.details;
5980       IRType    tya  = typeOfIRExpr(env->type_env, lg->addr);
5981       IREndness end  = lg->end;
5982 
5983       if (tya != Ity_I32 || end != Iend_LE)
5984          goto stmt_fail;
5985 
5986       switch (lg->cvt) {
5987          case ILGop_8Uto32:
5988          case ILGop_Ident32: {
5989             HReg        rAlt = iselIntExpr_R(env, lg->alt);
5990             ARMAMode1*  am   = iselIntExpr_AMode1(env, lg->addr);
5991             HReg        rD   = lookupIRTemp(env, lg->dst);
5992             addInstr(env, mk_iMOVds_RR(rD, rAlt));
5993             ARMCondCode cc   = iselCondCode(env, lg->guard);
5994             addInstr(env, (lg->cvt == ILGop_Ident32 ? ARMInstr_LdSt32
5995                                                     : ARMInstr_LdSt8U)
5996                              (cc, True/*isLoad*/, rD, am));
5997             return;
5998          }
5999          case ILGop_16Sto32:
6000          case ILGop_16Uto32:
6001          case ILGop_8Sto32: {
6002             HReg        rAlt = iselIntExpr_R(env, lg->alt);
6003             ARMAMode2*  am   = iselIntExpr_AMode2(env, lg->addr);
6004             HReg        rD   = lookupIRTemp(env, lg->dst);
6005             addInstr(env, mk_iMOVds_RR(rD, rAlt));
6006             ARMCondCode cc   = iselCondCode(env, lg->guard);
6007             if (lg->cvt == ILGop_8Sto32) {
6008                addInstr(env, ARMInstr_Ld8S(cc, rD, am));
6009             } else {
6010                vassert(lg->cvt == ILGop_16Sto32 || lg->cvt == ILGop_16Uto32);
6011                Bool sx = lg->cvt == ILGop_16Sto32;
6012                addInstr(env, ARMInstr_LdSt16(cc, True/*isLoad*/, sx, rD, am));
6013             }
6014             return;
6015          }
6016          default:
6017             break;
6018       }
6019       break;
6020    }
6021 
6022    /* --------- PUT --------- */
6023    /* write guest state, fixed offset */
6024    case Ist_Put: {
6025        IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Put.data);
6026 
6027        if (tyd == Ity_I32) {
6028            HReg       rD = iselIntExpr_R(env, stmt->Ist.Put.data);
6029            ARMAMode1* am = ARMAMode1_RI(hregARM_R8(), stmt->Ist.Put.offset);
6030            addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!isLoad*/, rD, am));
6031            return;
6032        }
6033        if (tyd == Ity_I64) {
6034           if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
6035              HReg addr = newVRegI(env);
6036              HReg qD = iselNeon64Expr(env, stmt->Ist.Put.data);
6037              addInstr(env, ARMInstr_Add32(addr, hregARM_R8(),
6038                                                 stmt->Ist.Put.offset));
6039              addInstr(env, ARMInstr_NLdStD(False, qD, mkARMAModeN_R(addr)));
6040           } else {
6041              HReg rDhi, rDlo;
6042              ARMAMode1* am0 = ARMAMode1_RI(hregARM_R8(),
6043                                            stmt->Ist.Put.offset + 0);
6044              ARMAMode1* am4 = ARMAMode1_RI(hregARM_R8(),
6045                                            stmt->Ist.Put.offset + 4);
6046              iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.Put.data);
6047              addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!isLoad*/,
6048                                            rDhi, am4));
6049              addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!isLoad*/,
6050                                            rDlo, am0));
6051           }
6052           return;
6053        }
6054        if (tyd == Ity_F64) {
6055           // XXX This won't work if offset > 1020 or is not 0 % 4.
6056           // In which case we'll have to generate more longwinded code.
6057           ARMAModeV* am = mkARMAModeV(hregARM_R8(), stmt->Ist.Put.offset);
6058           HReg       rD = iselDblExpr(env, stmt->Ist.Put.data);
6059           addInstr(env, ARMInstr_VLdStD(False/*!isLoad*/, rD, am));
6060           return;
6061        }
6062        if (tyd == Ity_F32) {
6063           // XXX This won't work if offset > 1020 or is not 0 % 4.
6064           // In which case we'll have to generate more longwinded code.
6065           ARMAModeV* am = mkARMAModeV(hregARM_R8(), stmt->Ist.Put.offset);
6066           HReg       rD = iselFltExpr(env, stmt->Ist.Put.data);
6067           addInstr(env, ARMInstr_VLdStS(False/*!isLoad*/, rD, am));
6068           return;
6069        }
6070        if (tyd == Ity_V128) {
6071           HReg addr = newVRegI(env);
6072           HReg qD = iselNeonExpr(env, stmt->Ist.Put.data);
6073           addInstr(env, ARMInstr_Add32(addr, hregARM_R8(),
6074                                        stmt->Ist.Put.offset));
6075           addInstr(env, ARMInstr_NLdStQ(False, qD, mkARMAModeN_R(addr)));
6076           return;
6077        }
6078        break;
6079    }
6080 
6081    /* --------- TMP --------- */
6082    /* assign value to temporary */
6083    case Ist_WrTmp: {
6084       IRTemp tmp = stmt->Ist.WrTmp.tmp;
6085       IRType ty = typeOfIRTemp(env->type_env, tmp);
6086 
6087       if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
6088          ARMRI84* ri84 = iselIntExpr_RI84(NULL, False,
6089                                           env, stmt->Ist.WrTmp.data);
6090          HReg     dst  = lookupIRTemp(env, tmp);
6091          addInstr(env, ARMInstr_Mov(dst,ri84));
6092          return;
6093       }
6094       if (ty == Ity_I1) {
6095          /* Here, we are generating a I1 value into a 32 bit register.
6096             Make sure the value in the register is only zero or one,
6097             but no other.  This allows optimisation of the
6098             1Uto32(tmp:I1) case, by making it simply a copy of the
6099             register holding 'tmp'.  The point being that the value in
6100             the register holding 'tmp' can only have been created
6101             here. */
6102          HReg        dst  = lookupIRTemp(env, tmp);
6103          ARMCondCode cond = iselCondCode(env, stmt->Ist.WrTmp.data);
6104          addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
6105          addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
6106          return;
6107       }
6108       if (ty == Ity_I64) {
6109          if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
6110             HReg src = iselNeon64Expr(env, stmt->Ist.WrTmp.data);
6111             HReg dst = lookupIRTemp(env, tmp);
6112             addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, src, 4, False));
6113          } else {
6114             HReg rHi, rLo, dstHi, dstLo;
6115             iselInt64Expr(&rHi,&rLo, env, stmt->Ist.WrTmp.data);
6116             lookupIRTemp64( &dstHi, &dstLo, env, tmp);
6117             addInstr(env, mk_iMOVds_RR(dstHi, rHi) );
6118             addInstr(env, mk_iMOVds_RR(dstLo, rLo) );
6119          }
6120          return;
6121       }
6122       if (ty == Ity_F64) {
6123          HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data);
6124          HReg dst = lookupIRTemp(env, tmp);
6125          addInstr(env, ARMInstr_VUnaryD(ARMvfpu_COPY, dst, src));
6126          return;
6127       }
6128       if (ty == Ity_F32) {
6129          HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data);
6130          HReg dst = lookupIRTemp(env, tmp);
6131          addInstr(env, ARMInstr_VUnaryS(ARMvfpu_COPY, dst, src));
6132          return;
6133       }
6134       if (ty == Ity_V128) {
6135          HReg src = iselNeonExpr(env, stmt->Ist.WrTmp.data);
6136          HReg dst = lookupIRTemp(env, tmp);
6137          addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, src, 4, True));
6138          return;
6139       }
6140       break;
6141    }
6142 
6143    /* --------- Call to DIRTY helper --------- */
6144    /* call complex ("dirty") helper function */
6145    case Ist_Dirty: {
6146       IRDirty* d = stmt->Ist.Dirty.details;
6147 
6148       /* Figure out the return type, if any. */
6149       IRType retty = Ity_INVALID;
6150       if (d->tmp != IRTemp_INVALID)
6151          retty = typeOfIRTemp(env->type_env, d->tmp);
6152 
6153       Bool retty_ok = False;
6154       switch (retty) {
6155          case Ity_INVALID: /* function doesn't return anything */
6156          case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
6157          case Ity_V128:
6158             retty_ok = True; break;
6159          default:
6160             break;
6161       }
6162       if (!retty_ok)
6163          break; /* will go to stmt_fail: */
6164 
6165       /* Marshal args, do the call, and set the return value to 0x555..555
6166          if this is a conditional call that returns a value and the
6167          call is skipped. */
6168       UInt   addToSp = 0;
6169       RetLoc rloc    = mk_RetLoc_INVALID();
6170       Bool   ok      = doHelperCall( &addToSp, &rloc, env,
6171                                      d->guard, d->cee, retty, d->args );
6172       if (!ok) goto stmt_fail;
6173       vassert(is_sane_RetLoc(rloc));
6174 
6175       /* Now figure out what to do with the returned value, if any. */
6176       switch (retty) {
6177          case Ity_INVALID: {
6178             /* No return value.  Nothing to do. */
6179             vassert(d->tmp == IRTemp_INVALID);
6180             vassert(rloc.pri == RLPri_None);
6181             vassert(addToSp == 0);
6182             return;
6183          }
6184          case Ity_I64: {
6185             vassert(rloc.pri == RLPri_2Int);
6186             vassert(addToSp == 0);
6187             if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
6188                HReg tmp = lookupIRTemp(env, d->tmp);
6189                addInstr(env, ARMInstr_VXferD(True, tmp, hregARM_R1(),
6190                                                         hregARM_R0()));
6191             } else {
6192                HReg dstHi, dstLo;
6193                /* The returned value is in r1:r0.  Park it in the
6194                   register-pair associated with tmp. */
6195                lookupIRTemp64( &dstHi, &dstLo, env, d->tmp);
6196                addInstr(env, mk_iMOVds_RR(dstHi, hregARM_R1()) );
6197                addInstr(env, mk_iMOVds_RR(dstLo, hregARM_R0()) );
6198             }
6199             return;
6200          }
6201          case Ity_I32: case Ity_I16: case Ity_I8: {
6202             vassert(rloc.pri == RLPri_Int);
6203             vassert(addToSp == 0);
6204             /* The returned value is in r0.  Park it in the register
6205                associated with tmp. */
6206             HReg dst = lookupIRTemp(env, d->tmp);
6207             addInstr(env, mk_iMOVds_RR(dst, hregARM_R0()) );
6208             return;
6209          }
6210          case Ity_V128: {
6211             /* The returned value is on the stack, and *retloc tells
6212                us where.  Fish it off the stack and then move the
6213                stack pointer upwards to clear it, as directed by
6214                doHelperCall. */
6215             vassert(rloc.pri == RLPri_V128SpRel);
6216             vassert(rloc.spOff < 256); // else ARMRI84_I84(_,0) can't encode it
6217             vassert(addToSp >= 16);
6218             vassert(addToSp <= 256);
6219             /* Both the stack delta and the offset must be at least 8-aligned.
6220                If that isn't so, doHelperCall() has generated bad code. */
6221             vassert(0 == (rloc.spOff % 8));
6222             vassert(0 == (addToSp % 8));
6223             HReg dst = lookupIRTemp(env, d->tmp);
6224             HReg tmp = newVRegI(env);
6225             HReg sp  = hregARM_R13();
6226             addInstr(env, ARMInstr_Alu(ARMalu_ADD,
6227                                        tmp, sp, ARMRI84_I84(rloc.spOff,0)));
6228             ARMAModeN* am = mkARMAModeN_R(tmp);
6229             /* This load could be done with its effective address 0 % 8,
6230                because that's the best stack alignment that we can be
6231                assured of. */
6232             addInstr(env, ARMInstr_NLdStQ(True/*load*/, dst, am));
6233 
6234             ARMRI84* spAdj
6235                = addToSp == 256 ? ARMRI84_I84(64, 15) // 64 `ror` (15 * 2)
6236                                 : ARMRI84_I84(addToSp, 0);
6237             addInstr(env, ARMInstr_Alu(ARMalu_ADD, sp, sp, spAdj));
6238             return;
6239          }
6240          default:
6241             /*NOTREACHED*/
6242             vassert(0);
6243       }
6244       break;
6245    }
6246 
6247    /* --------- Load Linked and Store Conditional --------- */
6248    case Ist_LLSC: {
6249       if (stmt->Ist.LLSC.storedata == NULL) {
6250          /* LL */
6251          IRTemp res = stmt->Ist.LLSC.result;
6252          IRType ty  = typeOfIRTemp(env->type_env, res);
6253          if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
6254             Int  szB   = 0;
6255             HReg r_dst = lookupIRTemp(env, res);
6256             HReg raddr = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
6257             switch (ty) {
6258                case Ity_I8:  szB = 1; break;
6259                case Ity_I16: szB = 2; break;
6260                case Ity_I32: szB = 4; break;
6261                default:      vassert(0);
6262             }
6263             addInstr(env, mk_iMOVds_RR(hregARM_R4(), raddr));
6264             addInstr(env, ARMInstr_LdrEX(szB));
6265             addInstr(env, mk_iMOVds_RR(r_dst, hregARM_R2()));
6266             return;
6267          }
6268          if (ty == Ity_I64) {
6269             HReg raddr = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
6270             addInstr(env, mk_iMOVds_RR(hregARM_R4(), raddr));
6271             addInstr(env, ARMInstr_LdrEX(8));
6272             /* Result is in r3:r2.  On a non-NEON capable CPU, we must
6273                move it into a result register pair.  On a NEON capable
6274                CPU, the result register will be a 64 bit NEON
6275                register, so we must move it there instead. */
6276             if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
6277                HReg dst = lookupIRTemp(env, res);
6278                addInstr(env, ARMInstr_VXferD(True, dst, hregARM_R3(),
6279                                                         hregARM_R2()));
6280             } else {
6281                HReg r_dst_hi, r_dst_lo;
6282                lookupIRTemp64(&r_dst_hi, &r_dst_lo, env, res);
6283                addInstr(env, mk_iMOVds_RR(r_dst_lo, hregARM_R2()));
6284                addInstr(env, mk_iMOVds_RR(r_dst_hi, hregARM_R3()));
6285             }
6286             return;
6287          }
6288          /*NOTREACHED*/
6289          vassert(0);
6290       } else {
6291          /* SC */
6292          IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.LLSC.storedata);
6293          if (tyd == Ity_I32 || tyd == Ity_I16 || tyd == Ity_I8) {
6294             Int  szB = 0;
6295             HReg rD  = iselIntExpr_R(env, stmt->Ist.LLSC.storedata);
6296             HReg rA  = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
6297             switch (tyd) {
6298                case Ity_I8:  szB = 1; break;
6299                case Ity_I16: szB = 2; break;
6300                case Ity_I32: szB = 4; break;
6301                default:      vassert(0);
6302             }
6303             addInstr(env, mk_iMOVds_RR(hregARM_R2(), rD));
6304             addInstr(env, mk_iMOVds_RR(hregARM_R4(), rA));
6305             addInstr(env, ARMInstr_StrEX(szB));
6306          } else {
6307             vassert(tyd == Ity_I64);
6308             /* This is really ugly.  There is no is/is-not NEON
6309                decision akin to the case for LL, because iselInt64Expr
6310                fudges this for us, and always gets the result into two
6311                GPRs even if this means moving it from a NEON
6312                register. */
6313             HReg rDhi, rDlo;
6314             iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.LLSC.storedata);
6315             HReg rA = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
6316             addInstr(env, mk_iMOVds_RR(hregARM_R2(), rDlo));
6317             addInstr(env, mk_iMOVds_RR(hregARM_R3(), rDhi));
6318             addInstr(env, mk_iMOVds_RR(hregARM_R4(), rA));
6319             addInstr(env, ARMInstr_StrEX(8));
6320          }
6321          /* now r0 is 1 if failed, 0 if success.  Change to IR
6322             conventions (0 is fail, 1 is success).  Also transfer
6323             result to r_res. */
6324          IRTemp   res   = stmt->Ist.LLSC.result;
6325          IRType   ty    = typeOfIRTemp(env->type_env, res);
6326          HReg     r_res = lookupIRTemp(env, res);
6327          ARMRI84* one   = ARMRI84_I84(1,0);
6328          vassert(ty == Ity_I1);
6329          addInstr(env, ARMInstr_Alu(ARMalu_XOR, r_res, hregARM_R0(), one));
6330          /* And be conservative -- mask off all but the lowest bit */
6331          addInstr(env, ARMInstr_Alu(ARMalu_AND, r_res, r_res, one));
6332          return;
6333       }
6334       break;
6335    }
6336 
6337    /* --------- MEM FENCE --------- */
6338    case Ist_MBE:
6339       switch (stmt->Ist.MBE.event) {
6340          case Imbe_Fence:
6341             addInstr(env, ARMInstr_MFence());
6342             return;
6343          case Imbe_CancelReservation:
6344             addInstr(env, ARMInstr_CLREX());
6345             return;
6346          default:
6347             break;
6348       }
6349       break;
6350 
6351    /* --------- INSTR MARK --------- */
6352    /* Doesn't generate any executable code ... */
6353    case Ist_IMark:
6354        return;
6355 
6356    /* --------- NO-OP --------- */
6357    case Ist_NoOp:
6358        return;
6359 
6360    /* --------- EXIT --------- */
6361    case Ist_Exit: {
6362       if (stmt->Ist.Exit.dst->tag != Ico_U32)
6363          vpanic("isel_arm: Ist_Exit: dst is not a 32-bit value");
6364 
6365       ARMCondCode cc     = iselCondCode(env, stmt->Ist.Exit.guard);
6366       ARMAMode1*  amR15T = ARMAMode1_RI(hregARM_R8(),
6367                                         stmt->Ist.Exit.offsIP);
6368 
6369       /* Case: boring transfer to known address */
6370       if (stmt->Ist.Exit.jk == Ijk_Boring
6371           || stmt->Ist.Exit.jk == Ijk_Call
6372           || stmt->Ist.Exit.jk == Ijk_Ret) {
6373          if (env->chainingAllowed) {
6374             /* .. almost always true .. */
6375             /* Skip the event check at the dst if this is a forwards
6376                edge. */
6377             Bool toFastEP
6378                = stmt->Ist.Exit.dst->Ico.U32 > env->max_ga;
6379             if (0) vex_printf("%s", toFastEP ? "Y" : ",");
6380             addInstr(env, ARMInstr_XDirect(stmt->Ist.Exit.dst->Ico.U32,
6381                                            amR15T, cc, toFastEP));
6382          } else {
6383             /* .. very occasionally .. */
6384             /* We can't use chaining, so ask for an assisted transfer,
6385                as that's the only alternative that is allowable. */
6386             HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
6387             addInstr(env, ARMInstr_XAssisted(r, amR15T, cc, Ijk_Boring));
6388          }
6389          return;
6390       }
6391 
6392       /* Case: assisted transfer to arbitrary address */
6393       switch (stmt->Ist.Exit.jk) {
6394          /* Keep this list in sync with that in iselNext below */
6395          case Ijk_ClientReq:
6396          case Ijk_NoDecode:
6397          case Ijk_NoRedir:
6398          case Ijk_Sys_syscall:
6399          case Ijk_InvalICache:
6400          case Ijk_Yield:
6401          {
6402             HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
6403             addInstr(env, ARMInstr_XAssisted(r, amR15T, cc,
6404                                              stmt->Ist.Exit.jk));
6405             return;
6406          }
6407          default:
6408             break;
6409       }
6410 
6411       /* Do we ever expect to see any other kind? */
6412       goto stmt_fail;
6413    }
6414 
6415    default: break;
6416    }
6417   stmt_fail:
6418    ppIRStmt(stmt);
6419    vpanic("iselStmt");
6420 }
6421 
6422 
6423 /*---------------------------------------------------------*/
6424 /*--- ISEL: Basic block terminators (Nexts)             ---*/
6425 /*---------------------------------------------------------*/
6426 
iselNext(ISelEnv * env,IRExpr * next,IRJumpKind jk,Int offsIP)6427 static void iselNext ( ISelEnv* env,
6428                        IRExpr* next, IRJumpKind jk, Int offsIP )
6429 {
6430    if (vex_traceflags & VEX_TRACE_VCODE) {
6431       vex_printf( "\n-- PUT(%d) = ", offsIP);
6432       ppIRExpr( next );
6433       vex_printf( "; exit-");
6434       ppIRJumpKind(jk);
6435       vex_printf( "\n");
6436    }
6437 
6438    /* Case: boring transfer to known address */
6439    if (next->tag == Iex_Const) {
6440       IRConst* cdst = next->Iex.Const.con;
6441       vassert(cdst->tag == Ico_U32);
6442       if (jk == Ijk_Boring || jk == Ijk_Call) {
6443          /* Boring transfer to known address */
6444          ARMAMode1* amR15T = ARMAMode1_RI(hregARM_R8(), offsIP);
6445          if (env->chainingAllowed) {
6446             /* .. almost always true .. */
6447             /* Skip the event check at the dst if this is a forwards
6448                edge. */
6449             Bool toFastEP
6450                = cdst->Ico.U32 > env->max_ga;
6451             if (0) vex_printf("%s", toFastEP ? "X" : ".");
6452             addInstr(env, ARMInstr_XDirect(cdst->Ico.U32,
6453                                            amR15T, ARMcc_AL,
6454                                            toFastEP));
6455          } else {
6456             /* .. very occasionally .. */
6457             /* We can't use chaining, so ask for an assisted transfer,
6458                as that's the only alternative that is allowable. */
6459             HReg r = iselIntExpr_R(env, next);
6460             addInstr(env, ARMInstr_XAssisted(r, amR15T, ARMcc_AL,
6461                                              Ijk_Boring));
6462          }
6463          return;
6464       }
6465    }
6466 
6467    /* Case: call/return (==boring) transfer to any address */
6468    switch (jk) {
6469       case Ijk_Boring: case Ijk_Ret: case Ijk_Call: {
6470          HReg       r      = iselIntExpr_R(env, next);
6471          ARMAMode1* amR15T = ARMAMode1_RI(hregARM_R8(), offsIP);
6472          if (env->chainingAllowed) {
6473             addInstr(env, ARMInstr_XIndir(r, amR15T, ARMcc_AL));
6474          } else {
6475             addInstr(env, ARMInstr_XAssisted(r, amR15T, ARMcc_AL,
6476                                                 Ijk_Boring));
6477          }
6478          return;
6479       }
6480       default:
6481          break;
6482    }
6483 
6484    /* Case: assisted transfer to arbitrary address */
6485    switch (jk) {
6486       /* Keep this list in sync with that for Ist_Exit above */
6487       case Ijk_ClientReq:
6488       case Ijk_NoDecode:
6489       case Ijk_NoRedir:
6490       case Ijk_Sys_syscall:
6491       case Ijk_InvalICache:
6492       case Ijk_Yield:
6493       {
6494          HReg       r      = iselIntExpr_R(env, next);
6495          ARMAMode1* amR15T = ARMAMode1_RI(hregARM_R8(), offsIP);
6496          addInstr(env, ARMInstr_XAssisted(r, amR15T, ARMcc_AL, jk));
6497          return;
6498       }
6499       default:
6500          break;
6501    }
6502 
6503    vex_printf( "\n-- PUT(%d) = ", offsIP);
6504    ppIRExpr( next );
6505    vex_printf( "; exit-");
6506    ppIRJumpKind(jk);
6507    vex_printf( "\n");
6508    vassert(0); // are we expecting any other kind?
6509 }
6510 
6511 
6512 /*---------------------------------------------------------*/
6513 /*--- Insn selector top-level                           ---*/
6514 /*---------------------------------------------------------*/
6515 
6516 /* Translate an entire SB to arm code. */
6517 
iselSB_ARM(const IRSB * bb,VexArch arch_host,const VexArchInfo * archinfo_host,const VexAbiInfo * vbi,Int offs_Host_EvC_Counter,Int offs_Host_EvC_FailAddr,Bool chainingAllowed,Bool addProfInc,Addr max_ga)6518 HInstrArray* iselSB_ARM ( const IRSB* bb,
6519                           VexArch      arch_host,
6520                           const VexArchInfo* archinfo_host,
6521                           const VexAbiInfo*  vbi/*UNUSED*/,
6522                           Int offs_Host_EvC_Counter,
6523                           Int offs_Host_EvC_FailAddr,
6524                           Bool chainingAllowed,
6525                           Bool addProfInc,
6526                           Addr max_ga )
6527 {
6528    Int       i, j;
6529    HReg      hreg, hregHI;
6530    ISelEnv*  env;
6531    UInt      hwcaps_host = archinfo_host->hwcaps;
6532    ARMAMode1 *amCounter, *amFailAddr;
6533 
6534    /* sanity ... */
6535    vassert(arch_host == VexArchARM);
6536 
6537    /* Check that the host's endianness is as expected. */
6538    vassert(archinfo_host->endness == VexEndnessLE);
6539 
6540    /* guard against unexpected space regressions */
6541    vassert(sizeof(ARMInstr) <= 28);
6542 
6543    /* hwcaps should not change from one ISEL call to another. */
6544    arm_hwcaps = hwcaps_host; // JRS 2012 Mar 31: FIXME (RM)
6545 
6546    /* Make up an initial environment to use. */
6547    env = LibVEX_Alloc_inline(sizeof(ISelEnv));
6548    env->vreg_ctr = 0;
6549 
6550    /* Set up output code array. */
6551    env->code = newHInstrArray();
6552 
6553    /* Copy BB's type env. */
6554    env->type_env = bb->tyenv;
6555 
6556    /* Make up an IRTemp -> virtual HReg mapping.  This doesn't
6557       change as we go along. */
6558    env->n_vregmap = bb->tyenv->types_used;
6559    env->vregmap   = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
6560    env->vregmapHI = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
6561 
6562    /* and finally ... */
6563    env->chainingAllowed = chainingAllowed;
6564    env->hwcaps          = hwcaps_host;
6565    env->max_ga          = max_ga;
6566 
6567    /* For each IR temporary, allocate a suitably-kinded virtual
6568       register. */
6569    j = 0;
6570    for (i = 0; i < env->n_vregmap; i++) {
6571       hregHI = hreg = INVALID_HREG;
6572       switch (bb->tyenv->types[i]) {
6573          case Ity_I1:
6574          case Ity_I8:
6575          case Ity_I16:
6576          case Ity_I32:  hreg   = mkHReg(True, HRcInt32, 0, j++); break;
6577          case Ity_I64:
6578             if (hwcaps_host & VEX_HWCAPS_ARM_NEON) {
6579                hreg = mkHReg(True, HRcFlt64, 0, j++);
6580             } else {
6581                hregHI = mkHReg(True, HRcInt32, 0, j++);
6582                hreg   = mkHReg(True, HRcInt32, 0, j++);
6583             }
6584             break;
6585          case Ity_F32:  hreg   = mkHReg(True, HRcFlt32,  0, j++); break;
6586          case Ity_F64:  hreg   = mkHReg(True, HRcFlt64,  0, j++); break;
6587          case Ity_V128: hreg   = mkHReg(True, HRcVec128, 0, j++); break;
6588          default: ppIRType(bb->tyenv->types[i]);
6589                   vpanic("iselBB: IRTemp type");
6590       }
6591       env->vregmap[i]   = hreg;
6592       env->vregmapHI[i] = hregHI;
6593    }
6594    env->vreg_ctr = j;
6595 
6596    /* The very first instruction must be an event check. */
6597    amCounter  = ARMAMode1_RI(hregARM_R8(), offs_Host_EvC_Counter);
6598    amFailAddr = ARMAMode1_RI(hregARM_R8(), offs_Host_EvC_FailAddr);
6599    addInstr(env, ARMInstr_EvCheck(amCounter, amFailAddr));
6600 
6601    /* Possibly a block counter increment (for profiling).  At this
6602       point we don't know the address of the counter, so just pretend
6603       it is zero.  It will have to be patched later, but before this
6604       translation is used, by a call to LibVEX_patchProfCtr. */
6605    if (addProfInc) {
6606       addInstr(env, ARMInstr_ProfInc());
6607    }
6608 
6609    /* Ok, finally we can iterate over the statements. */
6610    for (i = 0; i < bb->stmts_used; i++)
6611       iselStmt(env, bb->stmts[i]);
6612 
6613    iselNext(env, bb->next, bb->jumpkind, bb->offsIP);
6614 
6615    /* record the number of vregs we used. */
6616    env->code->n_vregs = env->vreg_ctr;
6617    return env->code;
6618 }
6619 
6620 
6621 /*---------------------------------------------------------------*/
6622 /*--- end                                     host_arm_isel.c ---*/
6623 /*---------------------------------------------------------------*/
6624 
6625