1 
2 /*---------------------------------------------------------------*/
3 /*--- begin                                   host_x86_defs.c ---*/
4 /*---------------------------------------------------------------*/
5 
6 /*
7    This file is part of Valgrind, a dynamic binary instrumentation
8    framework.
9 
10    Copyright (C) 2004-2017 OpenWorks LLP
11       info@open-works.net
12 
13    This program is free software; you can redistribute it and/or
14    modify it under the terms of the GNU General Public License as
15    published by the Free Software Foundation; either version 2 of the
16    License, or (at your option) any later version.
17 
18    This program is distributed in the hope that it will be useful, but
19    WITHOUT ANY WARRANTY; without even the implied warranty of
20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21    General Public License for more details.
22 
23    You should have received a copy of the GNU General Public License
24    along with this program; if not, write to the Free Software
25    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26    02110-1301, USA.
27 
28    The GNU General Public License is contained in the file COPYING.
29 
30    Neither the names of the U.S. Department of Energy nor the
31    University of California nor the names of its contributors may be
32    used to endorse or promote products derived from this software
33    without prior written permission.
34 */
35 
36 #include "libvex_basictypes.h"
37 #include "libvex.h"
38 #include "libvex_trc_values.h"
39 
40 #include "main_util.h"
41 #include "host_generic_regs.h"
42 #include "host_x86_defs.h"
43 
44 
45 /* --------- Registers. --------- */
46 
getRRegUniverse_X86(void)47 const RRegUniverse* getRRegUniverse_X86 ( void )
48 {
49    /* The real-register universe is a big constant, so we just want to
50       initialise it once. */
51    static RRegUniverse rRegUniverse_X86;
52    static Bool         rRegUniverse_X86_initted = False;
53 
54    /* Handy shorthand, nothing more */
55    RRegUniverse* ru = &rRegUniverse_X86;
56 
57    /* This isn't thread-safe.  Sigh. */
58    if (LIKELY(rRegUniverse_X86_initted))
59       return ru;
60 
61    RRegUniverse__init(ru);
62 
63    /* Add the registers.  The initial segment of this array must be
64       those available for allocation by reg-alloc, and those that
65       follow are not available for allocation. */
66    ru->allocable_start[HRcInt32] = ru->size;
67    ru->regs[ru->size++] = hregX86_EBX();
68    ru->regs[ru->size++] = hregX86_ESI();
69    ru->regs[ru->size++] = hregX86_EDI();
70    ru->regs[ru->size++] = hregX86_EAX();
71    ru->regs[ru->size++] = hregX86_ECX();
72    ru->regs[ru->size++] = hregX86_EDX();
73    ru->allocable_end[HRcInt32] = ru->size - 1;
74 
75    ru->allocable_start[HRcFlt64] = ru->size;
76    ru->regs[ru->size++] = hregX86_FAKE0();
77    ru->regs[ru->size++] = hregX86_FAKE1();
78    ru->regs[ru->size++] = hregX86_FAKE2();
79    ru->regs[ru->size++] = hregX86_FAKE3();
80    ru->regs[ru->size++] = hregX86_FAKE4();
81    ru->regs[ru->size++] = hregX86_FAKE5();
82    ru->allocable_end[HRcFlt64] = ru->size - 1;
83 
84    ru->allocable_start[HRcVec128] = ru->size;
85    ru->regs[ru->size++] = hregX86_XMM0();
86    ru->regs[ru->size++] = hregX86_XMM1();
87    ru->regs[ru->size++] = hregX86_XMM2();
88    ru->regs[ru->size++] = hregX86_XMM3();
89    ru->regs[ru->size++] = hregX86_XMM4();
90    ru->regs[ru->size++] = hregX86_XMM5();
91    ru->regs[ru->size++] = hregX86_XMM6();
92    ru->regs[ru->size++] = hregX86_XMM7();
93    ru->allocable_end[HRcVec128] = ru->size - 1;
94    ru->allocable = ru->size;
95 
96    /* And other regs, not available to the allocator. */
97    ru->regs[ru->size++] = hregX86_ESP();
98    ru->regs[ru->size++] = hregX86_EBP();
99 
100    rRegUniverse_X86_initted = True;
101 
102    RRegUniverse__check_is_sane(ru);
103    return ru;
104 }
105 
106 
ppHRegX86(HReg reg)107 UInt ppHRegX86 ( HReg reg )
108 {
109    Int r;
110    static const HChar* ireg32_names[8]
111      = { "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi" };
112    /* Be generic for all virtual regs. */
113    if (hregIsVirtual(reg)) {
114       return ppHReg(reg);
115    }
116    /* But specific for real regs. */
117    switch (hregClass(reg)) {
118       case HRcInt32:
119          r = hregEncoding(reg);
120          vassert(r >= 0 && r < 8);
121          return vex_printf("%s", ireg32_names[r]);
122       case HRcFlt64:
123          r = hregEncoding(reg);
124          vassert(r >= 0 && r < 6);
125          return vex_printf("%%fake%d", r);
126       case HRcVec128:
127          r = hregEncoding(reg);
128          vassert(r >= 0 && r < 8);
129          return vex_printf("%%xmm%d", r);
130       default:
131          vpanic("ppHRegX86");
132    }
133 }
134 
135 
136 /* --------- Condition codes, Intel encoding. --------- */
137 
showX86CondCode(X86CondCode cond)138 const HChar* showX86CondCode ( X86CondCode cond )
139 {
140    switch (cond) {
141       case Xcc_O:      return "o";
142       case Xcc_NO:     return "no";
143       case Xcc_B:      return "b";
144       case Xcc_NB:     return "nb";
145       case Xcc_Z:      return "z";
146       case Xcc_NZ:     return "nz";
147       case Xcc_BE:     return "be";
148       case Xcc_NBE:    return "nbe";
149       case Xcc_S:      return "s";
150       case Xcc_NS:     return "ns";
151       case Xcc_P:      return "p";
152       case Xcc_NP:     return "np";
153       case Xcc_L:      return "l";
154       case Xcc_NL:     return "nl";
155       case Xcc_LE:     return "le";
156       case Xcc_NLE:    return "nle";
157       case Xcc_ALWAYS: return "ALWAYS";
158       default: vpanic("ppX86CondCode");
159    }
160 }
161 
162 
163 /* --------- X86AMode: memory address expressions. --------- */
164 
X86AMode_IR(UInt imm32,HReg reg)165 X86AMode* X86AMode_IR ( UInt imm32, HReg reg ) {
166    X86AMode* am = LibVEX_Alloc_inline(sizeof(X86AMode));
167    am->tag = Xam_IR;
168    am->Xam.IR.imm = imm32;
169    am->Xam.IR.reg = reg;
170    return am;
171 }
X86AMode_IRRS(UInt imm32,HReg base,HReg indEx,Int shift)172 X86AMode* X86AMode_IRRS ( UInt imm32, HReg base, HReg indEx, Int shift ) {
173    X86AMode* am = LibVEX_Alloc_inline(sizeof(X86AMode));
174    am->tag = Xam_IRRS;
175    am->Xam.IRRS.imm = imm32;
176    am->Xam.IRRS.base = base;
177    am->Xam.IRRS.index = indEx;
178    am->Xam.IRRS.shift = shift;
179    vassert(shift >= 0 && shift <= 3);
180    return am;
181 }
182 
dopyX86AMode(X86AMode * am)183 X86AMode* dopyX86AMode ( X86AMode* am ) {
184    switch (am->tag) {
185       case Xam_IR:
186          return X86AMode_IR( am->Xam.IR.imm, am->Xam.IR.reg );
187       case Xam_IRRS:
188          return X86AMode_IRRS( am->Xam.IRRS.imm, am->Xam.IRRS.base,
189                                am->Xam.IRRS.index, am->Xam.IRRS.shift );
190       default:
191          vpanic("dopyX86AMode");
192    }
193 }
194 
ppX86AMode(X86AMode * am)195 void ppX86AMode ( X86AMode* am ) {
196    switch (am->tag) {
197       case Xam_IR:
198          if (am->Xam.IR.imm == 0)
199             vex_printf("(");
200          else
201             vex_printf("0x%x(", am->Xam.IR.imm);
202          ppHRegX86(am->Xam.IR.reg);
203          vex_printf(")");
204          return;
205       case Xam_IRRS:
206          vex_printf("0x%x(", am->Xam.IRRS.imm);
207          ppHRegX86(am->Xam.IRRS.base);
208          vex_printf(",");
209          ppHRegX86(am->Xam.IRRS.index);
210          vex_printf(",%d)", 1 << am->Xam.IRRS.shift);
211          return;
212       default:
213          vpanic("ppX86AMode");
214    }
215 }
216 
addRegUsage_X86AMode(HRegUsage * u,X86AMode * am)217 static void addRegUsage_X86AMode ( HRegUsage* u, X86AMode* am ) {
218    switch (am->tag) {
219       case Xam_IR:
220          addHRegUse(u, HRmRead, am->Xam.IR.reg);
221          return;
222       case Xam_IRRS:
223          addHRegUse(u, HRmRead, am->Xam.IRRS.base);
224          addHRegUse(u, HRmRead, am->Xam.IRRS.index);
225          return;
226       default:
227          vpanic("addRegUsage_X86AMode");
228    }
229 }
230 
mapRegs_X86AMode(HRegRemap * m,X86AMode * am)231 static void mapRegs_X86AMode ( HRegRemap* m, X86AMode* am ) {
232    switch (am->tag) {
233       case Xam_IR:
234          am->Xam.IR.reg = lookupHRegRemap(m, am->Xam.IR.reg);
235          return;
236       case Xam_IRRS:
237          am->Xam.IRRS.base = lookupHRegRemap(m, am->Xam.IRRS.base);
238          am->Xam.IRRS.index = lookupHRegRemap(m, am->Xam.IRRS.index);
239          return;
240       default:
241          vpanic("mapRegs_X86AMode");
242    }
243 }
244 
245 /* --------- Operand, which can be reg, immediate or memory. --------- */
246 
X86RMI_Imm(UInt imm32)247 X86RMI* X86RMI_Imm ( UInt imm32 ) {
248    X86RMI* op         = LibVEX_Alloc_inline(sizeof(X86RMI));
249    op->tag            = Xrmi_Imm;
250    op->Xrmi.Imm.imm32 = imm32;
251    return op;
252 }
X86RMI_Reg(HReg reg)253 X86RMI* X86RMI_Reg ( HReg reg ) {
254    X86RMI* op       = LibVEX_Alloc_inline(sizeof(X86RMI));
255    op->tag          = Xrmi_Reg;
256    op->Xrmi.Reg.reg = reg;
257    return op;
258 }
X86RMI_Mem(X86AMode * am)259 X86RMI* X86RMI_Mem ( X86AMode* am ) {
260    X86RMI* op      = LibVEX_Alloc_inline(sizeof(X86RMI));
261    op->tag         = Xrmi_Mem;
262    op->Xrmi.Mem.am = am;
263    return op;
264 }
265 
ppX86RMI(X86RMI * op)266 void ppX86RMI ( X86RMI* op ) {
267    switch (op->tag) {
268       case Xrmi_Imm:
269          vex_printf("$0x%x", op->Xrmi.Imm.imm32);
270          return;
271       case Xrmi_Reg:
272          ppHRegX86(op->Xrmi.Reg.reg);
273          return;
274       case Xrmi_Mem:
275          ppX86AMode(op->Xrmi.Mem.am);
276          return;
277      default:
278          vpanic("ppX86RMI");
279    }
280 }
281 
282 /* An X86RMI can only be used in a "read" context (what would it mean
283    to write or modify a literal?) and so we enumerate its registers
284    accordingly. */
addRegUsage_X86RMI(HRegUsage * u,X86RMI * op)285 static void addRegUsage_X86RMI ( HRegUsage* u, X86RMI* op ) {
286    switch (op->tag) {
287       case Xrmi_Imm:
288          return;
289       case Xrmi_Reg:
290          addHRegUse(u, HRmRead, op->Xrmi.Reg.reg);
291          return;
292       case Xrmi_Mem:
293          addRegUsage_X86AMode(u, op->Xrmi.Mem.am);
294          return;
295       default:
296          vpanic("addRegUsage_X86RMI");
297    }
298 }
299 
mapRegs_X86RMI(HRegRemap * m,X86RMI * op)300 static void mapRegs_X86RMI ( HRegRemap* m, X86RMI* op ) {
301    switch (op->tag) {
302       case Xrmi_Imm:
303          return;
304       case Xrmi_Reg:
305          op->Xrmi.Reg.reg = lookupHRegRemap(m, op->Xrmi.Reg.reg);
306          return;
307       case Xrmi_Mem:
308          mapRegs_X86AMode(m, op->Xrmi.Mem.am);
309          return;
310       default:
311          vpanic("mapRegs_X86RMI");
312    }
313 }
314 
315 
316 /* --------- Operand, which can be reg or immediate only. --------- */
317 
X86RI_Imm(UInt imm32)318 X86RI* X86RI_Imm ( UInt imm32 ) {
319    X86RI* op         = LibVEX_Alloc_inline(sizeof(X86RI));
320    op->tag           = Xri_Imm;
321    op->Xri.Imm.imm32 = imm32;
322    return op;
323 }
X86RI_Reg(HReg reg)324 X86RI* X86RI_Reg ( HReg reg ) {
325    X86RI* op       = LibVEX_Alloc_inline(sizeof(X86RI));
326    op->tag         = Xri_Reg;
327    op->Xri.Reg.reg = reg;
328    return op;
329 }
330 
ppX86RI(X86RI * op)331 void ppX86RI ( X86RI* op ) {
332    switch (op->tag) {
333       case Xri_Imm:
334          vex_printf("$0x%x", op->Xri.Imm.imm32);
335          return;
336       case Xri_Reg:
337          ppHRegX86(op->Xri.Reg.reg);
338          return;
339      default:
340          vpanic("ppX86RI");
341    }
342 }
343 
344 /* An X86RI can only be used in a "read" context (what would it mean
345    to write or modify a literal?) and so we enumerate its registers
346    accordingly. */
addRegUsage_X86RI(HRegUsage * u,X86RI * op)347 static void addRegUsage_X86RI ( HRegUsage* u, X86RI* op ) {
348    switch (op->tag) {
349       case Xri_Imm:
350          return;
351       case Xri_Reg:
352          addHRegUse(u, HRmRead, op->Xri.Reg.reg);
353          return;
354       default:
355          vpanic("addRegUsage_X86RI");
356    }
357 }
358 
mapRegs_X86RI(HRegRemap * m,X86RI * op)359 static void mapRegs_X86RI ( HRegRemap* m, X86RI* op ) {
360    switch (op->tag) {
361       case Xri_Imm:
362          return;
363       case Xri_Reg:
364          op->Xri.Reg.reg = lookupHRegRemap(m, op->Xri.Reg.reg);
365          return;
366       default:
367          vpanic("mapRegs_X86RI");
368    }
369 }
370 
371 
372 /* --------- Operand, which can be reg or memory only. --------- */
373 
X86RM_Reg(HReg reg)374 X86RM* X86RM_Reg ( HReg reg ) {
375    X86RM* op       = LibVEX_Alloc_inline(sizeof(X86RM));
376    op->tag         = Xrm_Reg;
377    op->Xrm.Reg.reg = reg;
378    return op;
379 }
X86RM_Mem(X86AMode * am)380 X86RM* X86RM_Mem ( X86AMode* am ) {
381    X86RM* op      = LibVEX_Alloc_inline(sizeof(X86RM));
382    op->tag        = Xrm_Mem;
383    op->Xrm.Mem.am = am;
384    return op;
385 }
386 
ppX86RM(X86RM * op)387 void ppX86RM ( X86RM* op ) {
388    switch (op->tag) {
389       case Xrm_Mem:
390          ppX86AMode(op->Xrm.Mem.am);
391          return;
392       case Xrm_Reg:
393          ppHRegX86(op->Xrm.Reg.reg);
394          return;
395      default:
396          vpanic("ppX86RM");
397    }
398 }
399 
400 /* Because an X86RM can be both a source or destination operand, we
401    have to supply a mode -- pertaining to the operand as a whole --
402    indicating how it's being used. */
addRegUsage_X86RM(HRegUsage * u,X86RM * op,HRegMode mode)403 static void addRegUsage_X86RM ( HRegUsage* u, X86RM* op, HRegMode mode ) {
404    switch (op->tag) {
405       case Xrm_Mem:
406          /* Memory is read, written or modified.  So we just want to
407             know the regs read by the amode. */
408          addRegUsage_X86AMode(u, op->Xrm.Mem.am);
409          return;
410       case Xrm_Reg:
411          /* reg is read, written or modified.  Add it in the
412             appropriate way. */
413          addHRegUse(u, mode, op->Xrm.Reg.reg);
414          return;
415      default:
416          vpanic("addRegUsage_X86RM");
417    }
418 }
419 
mapRegs_X86RM(HRegRemap * m,X86RM * op)420 static void mapRegs_X86RM ( HRegRemap* m, X86RM* op )
421 {
422    switch (op->tag) {
423       case Xrm_Mem:
424          mapRegs_X86AMode(m, op->Xrm.Mem.am);
425          return;
426       case Xrm_Reg:
427          op->Xrm.Reg.reg = lookupHRegRemap(m, op->Xrm.Reg.reg);
428          return;
429      default:
430          vpanic("mapRegs_X86RM");
431    }
432 }
433 
434 
435 /* --------- Instructions. --------- */
436 
showX86UnaryOp(X86UnaryOp op)437 const HChar* showX86UnaryOp ( X86UnaryOp op ) {
438    switch (op) {
439       case Xun_NOT: return "not";
440       case Xun_NEG: return "neg";
441       default: vpanic("showX86UnaryOp");
442    }
443 }
444 
showX86AluOp(X86AluOp op)445 const HChar* showX86AluOp ( X86AluOp op ) {
446    switch (op) {
447       case Xalu_MOV:  return "mov";
448       case Xalu_CMP:  return "cmp";
449       case Xalu_ADD:  return "add";
450       case Xalu_SUB:  return "sub";
451       case Xalu_ADC:  return "adc";
452       case Xalu_SBB:  return "sbb";
453       case Xalu_AND:  return "and";
454       case Xalu_OR:   return "or";
455       case Xalu_XOR:  return "xor";
456       case Xalu_MUL:  return "mul";
457       default: vpanic("showX86AluOp");
458    }
459 }
460 
showX86ShiftOp(X86ShiftOp op)461 const HChar* showX86ShiftOp ( X86ShiftOp op ) {
462    switch (op) {
463       case Xsh_SHL: return "shl";
464       case Xsh_SHR: return "shr";
465       case Xsh_SAR: return "sar";
466       default: vpanic("showX86ShiftOp");
467    }
468 }
469 
showX86FpOp(X86FpOp op)470 const HChar* showX86FpOp ( X86FpOp op ) {
471    switch (op) {
472       case Xfp_ADD:    return "add";
473       case Xfp_SUB:    return "sub";
474       case Xfp_MUL:    return "mul";
475       case Xfp_DIV:    return "div";
476       case Xfp_SCALE:  return "scale";
477       case Xfp_ATAN:   return "atan";
478       case Xfp_YL2X:   return "yl2x";
479       case Xfp_YL2XP1: return "yl2xp1";
480       case Xfp_PREM:   return "prem";
481       case Xfp_PREM1:  return "prem1";
482       case Xfp_SQRT:   return "sqrt";
483       case Xfp_ABS:    return "abs";
484       case Xfp_NEG:    return "chs";
485       case Xfp_MOV:    return "mov";
486       case Xfp_SIN:    return "sin";
487       case Xfp_COS:    return "cos";
488       case Xfp_TAN:    return "tan";
489       case Xfp_ROUND:  return "round";
490       case Xfp_2XM1:   return "2xm1";
491       default: vpanic("showX86FpOp");
492    }
493 }
494 
showX86SseOp(X86SseOp op)495 const HChar* showX86SseOp ( X86SseOp op ) {
496    switch (op) {
497       case Xsse_MOV:      return "mov(?!)";
498       case Xsse_ADDF:     return "add";
499       case Xsse_SUBF:     return "sub";
500       case Xsse_MULF:     return "mul";
501       case Xsse_DIVF:     return "div";
502       case Xsse_MAXF:     return "max";
503       case Xsse_MINF:     return "min";
504       case Xsse_CMPEQF:   return "cmpFeq";
505       case Xsse_CMPLTF:   return "cmpFlt";
506       case Xsse_CMPLEF:   return "cmpFle";
507       case Xsse_CMPUNF:   return "cmpFun";
508       case Xsse_RCPF:     return "rcp";
509       case Xsse_RSQRTF:   return "rsqrt";
510       case Xsse_SQRTF:    return "sqrt";
511       case Xsse_AND:      return "and";
512       case Xsse_OR:       return "or";
513       case Xsse_XOR:      return "xor";
514       case Xsse_ANDN:     return "andn";
515       case Xsse_ADD8:     return "paddb";
516       case Xsse_ADD16:    return "paddw";
517       case Xsse_ADD32:    return "paddd";
518       case Xsse_ADD64:    return "paddq";
519       case Xsse_QADD8U:   return "paddusb";
520       case Xsse_QADD16U:  return "paddusw";
521       case Xsse_QADD8S:   return "paddsb";
522       case Xsse_QADD16S:  return "paddsw";
523       case Xsse_SUB8:     return "psubb";
524       case Xsse_SUB16:    return "psubw";
525       case Xsse_SUB32:    return "psubd";
526       case Xsse_SUB64:    return "psubq";
527       case Xsse_QSUB8U:   return "psubusb";
528       case Xsse_QSUB16U:  return "psubusw";
529       case Xsse_QSUB8S:   return "psubsb";
530       case Xsse_QSUB16S:  return "psubsw";
531       case Xsse_MUL16:    return "pmullw";
532       case Xsse_MULHI16U: return "pmulhuw";
533       case Xsse_MULHI16S: return "pmulhw";
534       case Xsse_AVG8U:    return "pavgb";
535       case Xsse_AVG16U:   return "pavgw";
536       case Xsse_MAX16S:   return "pmaxw";
537       case Xsse_MAX8U:    return "pmaxub";
538       case Xsse_MIN16S:   return "pminw";
539       case Xsse_MIN8U:    return "pminub";
540       case Xsse_CMPEQ8:   return "pcmpeqb";
541       case Xsse_CMPEQ16:  return "pcmpeqw";
542       case Xsse_CMPEQ32:  return "pcmpeqd";
543       case Xsse_CMPGT8S:  return "pcmpgtb";
544       case Xsse_CMPGT16S: return "pcmpgtw";
545       case Xsse_CMPGT32S: return "pcmpgtd";
546       case Xsse_SHL16:    return "psllw";
547       case Xsse_SHL32:    return "pslld";
548       case Xsse_SHL64:    return "psllq";
549       case Xsse_SHR16:    return "psrlw";
550       case Xsse_SHR32:    return "psrld";
551       case Xsse_SHR64:    return "psrlq";
552       case Xsse_SAR16:    return "psraw";
553       case Xsse_SAR32:    return "psrad";
554       case Xsse_PACKSSD:  return "packssdw";
555       case Xsse_PACKSSW:  return "packsswb";
556       case Xsse_PACKUSW:  return "packuswb";
557       case Xsse_UNPCKHB:  return "punpckhb";
558       case Xsse_UNPCKHW:  return "punpckhw";
559       case Xsse_UNPCKHD:  return "punpckhd";
560       case Xsse_UNPCKHQ:  return "punpckhq";
561       case Xsse_UNPCKLB:  return "punpcklb";
562       case Xsse_UNPCKLW:  return "punpcklw";
563       case Xsse_UNPCKLD:  return "punpckld";
564       case Xsse_UNPCKLQ:  return "punpcklq";
565       default: vpanic("showX86SseOp");
566    }
567 }
568 
X86Instr_Alu32R(X86AluOp op,X86RMI * src,HReg dst)569 X86Instr* X86Instr_Alu32R ( X86AluOp op, X86RMI* src, HReg dst ) {
570    X86Instr* i       = LibVEX_Alloc_inline(sizeof(X86Instr));
571    i->tag            = Xin_Alu32R;
572    i->Xin.Alu32R.op  = op;
573    i->Xin.Alu32R.src = src;
574    i->Xin.Alu32R.dst = dst;
575    return i;
576 }
X86Instr_Alu32M(X86AluOp op,X86RI * src,X86AMode * dst)577 X86Instr* X86Instr_Alu32M ( X86AluOp op, X86RI* src, X86AMode* dst ) {
578    X86Instr* i       = LibVEX_Alloc_inline(sizeof(X86Instr));
579    i->tag            = Xin_Alu32M;
580    i->Xin.Alu32M.op  = op;
581    i->Xin.Alu32M.src = src;
582    i->Xin.Alu32M.dst = dst;
583    vassert(op != Xalu_MUL);
584    return i;
585 }
X86Instr_Sh32(X86ShiftOp op,UInt src,HReg dst)586 X86Instr* X86Instr_Sh32 ( X86ShiftOp op, UInt src, HReg dst ) {
587    X86Instr* i     = LibVEX_Alloc_inline(sizeof(X86Instr));
588    i->tag          = Xin_Sh32;
589    i->Xin.Sh32.op  = op;
590    i->Xin.Sh32.src = src;
591    i->Xin.Sh32.dst = dst;
592    return i;
593 }
X86Instr_Test32(UInt imm32,X86RM * dst)594 X86Instr* X86Instr_Test32 ( UInt imm32, X86RM* dst ) {
595    X86Instr* i         = LibVEX_Alloc_inline(sizeof(X86Instr));
596    i->tag              = Xin_Test32;
597    i->Xin.Test32.imm32 = imm32;
598    i->Xin.Test32.dst   = dst;
599    return i;
600 }
X86Instr_Unary32(X86UnaryOp op,HReg dst)601 X86Instr* X86Instr_Unary32 ( X86UnaryOp op, HReg dst ) {
602    X86Instr* i        = LibVEX_Alloc_inline(sizeof(X86Instr));
603    i->tag             = Xin_Unary32;
604    i->Xin.Unary32.op  = op;
605    i->Xin.Unary32.dst = dst;
606    return i;
607 }
X86Instr_Lea32(X86AMode * am,HReg dst)608 X86Instr* X86Instr_Lea32 ( X86AMode* am, HReg dst ) {
609    X86Instr* i        = LibVEX_Alloc_inline(sizeof(X86Instr));
610    i->tag             = Xin_Lea32;
611    i->Xin.Lea32.am    = am;
612    i->Xin.Lea32.dst   = dst;
613    return i;
614 }
X86Instr_MulL(Bool syned,X86RM * src)615 X86Instr* X86Instr_MulL ( Bool syned, X86RM* src ) {
616    X86Instr* i        = LibVEX_Alloc_inline(sizeof(X86Instr));
617    i->tag             = Xin_MulL;
618    i->Xin.MulL.syned  = syned;
619    i->Xin.MulL.src    = src;
620    return i;
621 }
X86Instr_Div(Bool syned,X86RM * src)622 X86Instr* X86Instr_Div ( Bool syned, X86RM* src ) {
623    X86Instr* i      = LibVEX_Alloc_inline(sizeof(X86Instr));
624    i->tag           = Xin_Div;
625    i->Xin.Div.syned = syned;
626    i->Xin.Div.src   = src;
627    return i;
628 }
X86Instr_Sh3232(X86ShiftOp op,UInt amt,HReg src,HReg dst)629 X86Instr* X86Instr_Sh3232  ( X86ShiftOp op, UInt amt, HReg src, HReg dst ) {
630    X86Instr* i       = LibVEX_Alloc_inline(sizeof(X86Instr));
631    i->tag            = Xin_Sh3232;
632    i->Xin.Sh3232.op  = op;
633    i->Xin.Sh3232.amt = amt;
634    i->Xin.Sh3232.src = src;
635    i->Xin.Sh3232.dst = dst;
636    vassert(op == Xsh_SHL || op == Xsh_SHR);
637    return i;
638 }
X86Instr_Push(X86RMI * src)639 X86Instr* X86Instr_Push( X86RMI* src ) {
640    X86Instr* i     = LibVEX_Alloc_inline(sizeof(X86Instr));
641    i->tag          = Xin_Push;
642    i->Xin.Push.src = src;
643    return i;
644 }
X86Instr_Call(X86CondCode cond,Addr32 target,Int regparms,RetLoc rloc)645 X86Instr* X86Instr_Call ( X86CondCode cond, Addr32 target, Int regparms,
646                           RetLoc rloc ) {
647    X86Instr* i          = LibVEX_Alloc_inline(sizeof(X86Instr));
648    i->tag               = Xin_Call;
649    i->Xin.Call.cond     = cond;
650    i->Xin.Call.target   = target;
651    i->Xin.Call.regparms = regparms;
652    i->Xin.Call.rloc     = rloc;
653    vassert(regparms >= 0 && regparms <= 3);
654    vassert(is_sane_RetLoc(rloc));
655    return i;
656 }
X86Instr_XDirect(Addr32 dstGA,X86AMode * amEIP,X86CondCode cond,Bool toFastEP)657 X86Instr* X86Instr_XDirect ( Addr32 dstGA, X86AMode* amEIP,
658                              X86CondCode cond, Bool toFastEP ) {
659    X86Instr* i             = LibVEX_Alloc_inline(sizeof(X86Instr));
660    i->tag                  = Xin_XDirect;
661    i->Xin.XDirect.dstGA    = dstGA;
662    i->Xin.XDirect.amEIP    = amEIP;
663    i->Xin.XDirect.cond     = cond;
664    i->Xin.XDirect.toFastEP = toFastEP;
665    return i;
666 }
X86Instr_XIndir(HReg dstGA,X86AMode * amEIP,X86CondCode cond)667 X86Instr* X86Instr_XIndir ( HReg dstGA, X86AMode* amEIP,
668                             X86CondCode cond ) {
669    X86Instr* i         = LibVEX_Alloc_inline(sizeof(X86Instr));
670    i->tag              = Xin_XIndir;
671    i->Xin.XIndir.dstGA = dstGA;
672    i->Xin.XIndir.amEIP = amEIP;
673    i->Xin.XIndir.cond  = cond;
674    return i;
675 }
X86Instr_XAssisted(HReg dstGA,X86AMode * amEIP,X86CondCode cond,IRJumpKind jk)676 X86Instr* X86Instr_XAssisted ( HReg dstGA, X86AMode* amEIP,
677                                X86CondCode cond, IRJumpKind jk ) {
678    X86Instr* i            = LibVEX_Alloc_inline(sizeof(X86Instr));
679    i->tag                 = Xin_XAssisted;
680    i->Xin.XAssisted.dstGA = dstGA;
681    i->Xin.XAssisted.amEIP = amEIP;
682    i->Xin.XAssisted.cond  = cond;
683    i->Xin.XAssisted.jk    = jk;
684    return i;
685 }
X86Instr_CMov32(X86CondCode cond,X86RM * src,HReg dst)686 X86Instr* X86Instr_CMov32  ( X86CondCode cond, X86RM* src, HReg dst ) {
687    X86Instr* i        = LibVEX_Alloc_inline(sizeof(X86Instr));
688    i->tag             = Xin_CMov32;
689    i->Xin.CMov32.cond = cond;
690    i->Xin.CMov32.src  = src;
691    i->Xin.CMov32.dst  = dst;
692    vassert(cond != Xcc_ALWAYS);
693    return i;
694 }
X86Instr_LoadEX(UChar szSmall,Bool syned,X86AMode * src,HReg dst)695 X86Instr* X86Instr_LoadEX ( UChar szSmall, Bool syned,
696                             X86AMode* src, HReg dst ) {
697    X86Instr* i           = LibVEX_Alloc_inline(sizeof(X86Instr));
698    i->tag                = Xin_LoadEX;
699    i->Xin.LoadEX.szSmall = szSmall;
700    i->Xin.LoadEX.syned   = syned;
701    i->Xin.LoadEX.src     = src;
702    i->Xin.LoadEX.dst     = dst;
703    vassert(szSmall == 1 || szSmall == 2);
704    return i;
705 }
X86Instr_Store(UChar sz,HReg src,X86AMode * dst)706 X86Instr* X86Instr_Store ( UChar sz, HReg src, X86AMode* dst ) {
707    X86Instr* i      = LibVEX_Alloc_inline(sizeof(X86Instr));
708    i->tag           = Xin_Store;
709    i->Xin.Store.sz  = sz;
710    i->Xin.Store.src = src;
711    i->Xin.Store.dst = dst;
712    vassert(sz == 1 || sz == 2);
713    return i;
714 }
X86Instr_Set32(X86CondCode cond,HReg dst)715 X86Instr* X86Instr_Set32 ( X86CondCode cond, HReg dst ) {
716    X86Instr* i       = LibVEX_Alloc_inline(sizeof(X86Instr));
717    i->tag            = Xin_Set32;
718    i->Xin.Set32.cond = cond;
719    i->Xin.Set32.dst  = dst;
720    return i;
721 }
X86Instr_Bsfr32(Bool isFwds,HReg src,HReg dst)722 X86Instr* X86Instr_Bsfr32 ( Bool isFwds, HReg src, HReg dst ) {
723    X86Instr* i          = LibVEX_Alloc_inline(sizeof(X86Instr));
724    i->tag               = Xin_Bsfr32;
725    i->Xin.Bsfr32.isFwds = isFwds;
726    i->Xin.Bsfr32.src    = src;
727    i->Xin.Bsfr32.dst    = dst;
728    return i;
729 }
X86Instr_MFence(UInt hwcaps)730 X86Instr* X86Instr_MFence ( UInt hwcaps ) {
731    X86Instr* i          = LibVEX_Alloc_inline(sizeof(X86Instr));
732    i->tag               = Xin_MFence;
733    i->Xin.MFence.hwcaps = hwcaps;
734    vassert(0 == (hwcaps & ~(VEX_HWCAPS_X86_MMXEXT
735                             |VEX_HWCAPS_X86_SSE1
736                             |VEX_HWCAPS_X86_SSE2
737                             |VEX_HWCAPS_X86_SSE3
738                             |VEX_HWCAPS_X86_LZCNT)));
739    return i;
740 }
X86Instr_ACAS(X86AMode * addr,UChar sz)741 X86Instr* X86Instr_ACAS ( X86AMode* addr, UChar sz ) {
742    X86Instr* i      = LibVEX_Alloc_inline(sizeof(X86Instr));
743    i->tag           = Xin_ACAS;
744    i->Xin.ACAS.addr = addr;
745    i->Xin.ACAS.sz   = sz;
746    vassert(sz == 4 || sz == 2 || sz == 1);
747    return i;
748 }
X86Instr_DACAS(X86AMode * addr)749 X86Instr* X86Instr_DACAS ( X86AMode* addr ) {
750    X86Instr* i       = LibVEX_Alloc_inline(sizeof(X86Instr));
751    i->tag            = Xin_DACAS;
752    i->Xin.DACAS.addr = addr;
753    return i;
754 }
755 
X86Instr_FpUnary(X86FpOp op,HReg src,HReg dst)756 X86Instr* X86Instr_FpUnary ( X86FpOp op, HReg src, HReg dst ) {
757    X86Instr* i        = LibVEX_Alloc_inline(sizeof(X86Instr));
758    i->tag             = Xin_FpUnary;
759    i->Xin.FpUnary.op  = op;
760    i->Xin.FpUnary.src = src;
761    i->Xin.FpUnary.dst = dst;
762    return i;
763 }
X86Instr_FpBinary(X86FpOp op,HReg srcL,HReg srcR,HReg dst)764 X86Instr* X86Instr_FpBinary ( X86FpOp op, HReg srcL, HReg srcR, HReg dst ) {
765    X86Instr* i          = LibVEX_Alloc_inline(sizeof(X86Instr));
766    i->tag               = Xin_FpBinary;
767    i->Xin.FpBinary.op   = op;
768    i->Xin.FpBinary.srcL = srcL;
769    i->Xin.FpBinary.srcR = srcR;
770    i->Xin.FpBinary.dst  = dst;
771    return i;
772 }
X86Instr_FpLdSt(Bool isLoad,UChar sz,HReg reg,X86AMode * addr)773 X86Instr* X86Instr_FpLdSt ( Bool isLoad, UChar sz, HReg reg, X86AMode* addr ) {
774    X86Instr* i          = LibVEX_Alloc_inline(sizeof(X86Instr));
775    i->tag               = Xin_FpLdSt;
776    i->Xin.FpLdSt.isLoad = isLoad;
777    i->Xin.FpLdSt.sz     = sz;
778    i->Xin.FpLdSt.reg    = reg;
779    i->Xin.FpLdSt.addr   = addr;
780    vassert(sz == 4 || sz == 8 || sz == 10);
781    return i;
782 }
X86Instr_FpLdStI(Bool isLoad,UChar sz,HReg reg,X86AMode * addr)783 X86Instr* X86Instr_FpLdStI ( Bool isLoad, UChar sz,
784                              HReg reg, X86AMode* addr ) {
785    X86Instr* i           = LibVEX_Alloc_inline(sizeof(X86Instr));
786    i->tag                = Xin_FpLdStI;
787    i->Xin.FpLdStI.isLoad = isLoad;
788    i->Xin.FpLdStI.sz     = sz;
789    i->Xin.FpLdStI.reg    = reg;
790    i->Xin.FpLdStI.addr   = addr;
791    vassert(sz == 2 || sz == 4 || sz == 8);
792    return i;
793 }
X86Instr_Fp64to32(HReg src,HReg dst)794 X86Instr* X86Instr_Fp64to32 ( HReg src, HReg dst ) {
795    X86Instr* i         = LibVEX_Alloc_inline(sizeof(X86Instr));
796    i->tag              = Xin_Fp64to32;
797    i->Xin.Fp64to32.src = src;
798    i->Xin.Fp64to32.dst = dst;
799    return i;
800 }
X86Instr_FpCMov(X86CondCode cond,HReg src,HReg dst)801 X86Instr* X86Instr_FpCMov ( X86CondCode cond, HReg src, HReg dst ) {
802    X86Instr* i        = LibVEX_Alloc_inline(sizeof(X86Instr));
803    i->tag             = Xin_FpCMov;
804    i->Xin.FpCMov.cond = cond;
805    i->Xin.FpCMov.src  = src;
806    i->Xin.FpCMov.dst  = dst;
807    vassert(cond != Xcc_ALWAYS);
808    return i;
809 }
X86Instr_FpLdCW(X86AMode * addr)810 X86Instr* X86Instr_FpLdCW ( X86AMode* addr ) {
811    X86Instr* i          = LibVEX_Alloc_inline(sizeof(X86Instr));
812    i->tag               = Xin_FpLdCW;
813    i->Xin.FpLdCW.addr   = addr;
814    return i;
815 }
X86Instr_FpStSW_AX(void)816 X86Instr* X86Instr_FpStSW_AX ( void ) {
817    X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
818    i->tag      = Xin_FpStSW_AX;
819    return i;
820 }
X86Instr_FpCmp(HReg srcL,HReg srcR,HReg dst)821 X86Instr* X86Instr_FpCmp ( HReg srcL, HReg srcR, HReg dst ) {
822    X86Instr* i       = LibVEX_Alloc_inline(sizeof(X86Instr));
823    i->tag            = Xin_FpCmp;
824    i->Xin.FpCmp.srcL = srcL;
825    i->Xin.FpCmp.srcR = srcR;
826    i->Xin.FpCmp.dst  = dst;
827    return i;
828 }
X86Instr_SseConst(UShort con,HReg dst)829 X86Instr* X86Instr_SseConst ( UShort con, HReg dst ) {
830    X86Instr* i            = LibVEX_Alloc_inline(sizeof(X86Instr));
831    i->tag                 = Xin_SseConst;
832    i->Xin.SseConst.con    = con;
833    i->Xin.SseConst.dst    = dst;
834    vassert(hregClass(dst) == HRcVec128);
835    return i;
836 }
X86Instr_SseLdSt(Bool isLoad,HReg reg,X86AMode * addr)837 X86Instr* X86Instr_SseLdSt ( Bool isLoad, HReg reg, X86AMode* addr ) {
838    X86Instr* i           = LibVEX_Alloc_inline(sizeof(X86Instr));
839    i->tag                = Xin_SseLdSt;
840    i->Xin.SseLdSt.isLoad = isLoad;
841    i->Xin.SseLdSt.reg    = reg;
842    i->Xin.SseLdSt.addr   = addr;
843    return i;
844 }
X86Instr_SseLdzLO(Int sz,HReg reg,X86AMode * addr)845 X86Instr* X86Instr_SseLdzLO  ( Int sz, HReg reg, X86AMode* addr )
846 {
847    X86Instr* i           = LibVEX_Alloc_inline(sizeof(X86Instr));
848    i->tag                = Xin_SseLdzLO;
849    i->Xin.SseLdzLO.sz    = toUChar(sz);
850    i->Xin.SseLdzLO.reg   = reg;
851    i->Xin.SseLdzLO.addr  = addr;
852    vassert(sz == 4 || sz == 8);
853    return i;
854 }
X86Instr_Sse32Fx4(X86SseOp op,HReg src,HReg dst)855 X86Instr* X86Instr_Sse32Fx4 ( X86SseOp op, HReg src, HReg dst ) {
856    X86Instr* i         = LibVEX_Alloc_inline(sizeof(X86Instr));
857    i->tag              = Xin_Sse32Fx4;
858    i->Xin.Sse32Fx4.op  = op;
859    i->Xin.Sse32Fx4.src = src;
860    i->Xin.Sse32Fx4.dst = dst;
861    vassert(op != Xsse_MOV);
862    return i;
863 }
X86Instr_Sse32FLo(X86SseOp op,HReg src,HReg dst)864 X86Instr* X86Instr_Sse32FLo ( X86SseOp op, HReg src, HReg dst ) {
865    X86Instr* i         = LibVEX_Alloc_inline(sizeof(X86Instr));
866    i->tag              = Xin_Sse32FLo;
867    i->Xin.Sse32FLo.op  = op;
868    i->Xin.Sse32FLo.src = src;
869    i->Xin.Sse32FLo.dst = dst;
870    vassert(op != Xsse_MOV);
871    return i;
872 }
X86Instr_Sse64Fx2(X86SseOp op,HReg src,HReg dst)873 X86Instr* X86Instr_Sse64Fx2 ( X86SseOp op, HReg src, HReg dst ) {
874    X86Instr* i         = LibVEX_Alloc_inline(sizeof(X86Instr));
875    i->tag              = Xin_Sse64Fx2;
876    i->Xin.Sse64Fx2.op  = op;
877    i->Xin.Sse64Fx2.src = src;
878    i->Xin.Sse64Fx2.dst = dst;
879    vassert(op != Xsse_MOV);
880    return i;
881 }
X86Instr_Sse64FLo(X86SseOp op,HReg src,HReg dst)882 X86Instr* X86Instr_Sse64FLo ( X86SseOp op, HReg src, HReg dst ) {
883    X86Instr* i         = LibVEX_Alloc_inline(sizeof(X86Instr));
884    i->tag              = Xin_Sse64FLo;
885    i->Xin.Sse64FLo.op  = op;
886    i->Xin.Sse64FLo.src = src;
887    i->Xin.Sse64FLo.dst = dst;
888    vassert(op != Xsse_MOV);
889    return i;
890 }
X86Instr_SseReRg(X86SseOp op,HReg re,HReg rg)891 X86Instr* X86Instr_SseReRg ( X86SseOp op, HReg re, HReg rg ) {
892    X86Instr* i        = LibVEX_Alloc_inline(sizeof(X86Instr));
893    i->tag             = Xin_SseReRg;
894    i->Xin.SseReRg.op  = op;
895    i->Xin.SseReRg.src = re;
896    i->Xin.SseReRg.dst = rg;
897    return i;
898 }
X86Instr_SseCMov(X86CondCode cond,HReg src,HReg dst)899 X86Instr* X86Instr_SseCMov ( X86CondCode cond, HReg src, HReg dst ) {
900    X86Instr* i         = LibVEX_Alloc_inline(sizeof(X86Instr));
901    i->tag              = Xin_SseCMov;
902    i->Xin.SseCMov.cond = cond;
903    i->Xin.SseCMov.src  = src;
904    i->Xin.SseCMov.dst  = dst;
905    vassert(cond != Xcc_ALWAYS);
906    return i;
907 }
X86Instr_SseShuf(Int order,HReg src,HReg dst)908 X86Instr* X86Instr_SseShuf ( Int order, HReg src, HReg dst ) {
909    X86Instr* i          = LibVEX_Alloc_inline(sizeof(X86Instr));
910    i->tag               = Xin_SseShuf;
911    i->Xin.SseShuf.order = order;
912    i->Xin.SseShuf.src   = src;
913    i->Xin.SseShuf.dst   = dst;
914    vassert(order >= 0 && order <= 0xFF);
915    return i;
916 }
X86Instr_EvCheck(X86AMode * amCounter,X86AMode * amFailAddr)917 X86Instr* X86Instr_EvCheck ( X86AMode* amCounter,
918                              X86AMode* amFailAddr ) {
919    X86Instr* i               = LibVEX_Alloc_inline(sizeof(X86Instr));
920    i->tag                    = Xin_EvCheck;
921    i->Xin.EvCheck.amCounter  = amCounter;
922    i->Xin.EvCheck.amFailAddr = amFailAddr;
923    return i;
924 }
X86Instr_ProfInc(void)925 X86Instr* X86Instr_ProfInc ( void ) {
926    X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
927    i->tag      = Xin_ProfInc;
928    return i;
929 }
930 
ppX86Instr(const X86Instr * i,Bool mode64)931 void ppX86Instr ( const X86Instr* i, Bool mode64 ) {
932    vassert(mode64 == False);
933    switch (i->tag) {
934       case Xin_Alu32R:
935          vex_printf("%sl ", showX86AluOp(i->Xin.Alu32R.op));
936          ppX86RMI(i->Xin.Alu32R.src);
937          vex_printf(",");
938          ppHRegX86(i->Xin.Alu32R.dst);
939          return;
940       case Xin_Alu32M:
941          vex_printf("%sl ", showX86AluOp(i->Xin.Alu32M.op));
942          ppX86RI(i->Xin.Alu32M.src);
943          vex_printf(",");
944          ppX86AMode(i->Xin.Alu32M.dst);
945          return;
946       case Xin_Sh32:
947          vex_printf("%sl ", showX86ShiftOp(i->Xin.Sh32.op));
948          if (i->Xin.Sh32.src == 0)
949            vex_printf("%%cl,");
950          else
951             vex_printf("$%d,", (Int)i->Xin.Sh32.src);
952          ppHRegX86(i->Xin.Sh32.dst);
953          return;
954       case Xin_Test32:
955          vex_printf("testl $%d,", (Int)i->Xin.Test32.imm32);
956          ppX86RM(i->Xin.Test32.dst);
957          return;
958       case Xin_Unary32:
959          vex_printf("%sl ", showX86UnaryOp(i->Xin.Unary32.op));
960          ppHRegX86(i->Xin.Unary32.dst);
961          return;
962       case Xin_Lea32:
963          vex_printf("leal ");
964          ppX86AMode(i->Xin.Lea32.am);
965          vex_printf(",");
966          ppHRegX86(i->Xin.Lea32.dst);
967          return;
968       case Xin_MulL:
969          vex_printf("%cmull ", i->Xin.MulL.syned ? 's' : 'u');
970          ppX86RM(i->Xin.MulL.src);
971          return;
972       case Xin_Div:
973          vex_printf("%cdivl ", i->Xin.Div.syned ? 's' : 'u');
974          ppX86RM(i->Xin.Div.src);
975          return;
976       case Xin_Sh3232:
977          vex_printf("%sdl ", showX86ShiftOp(i->Xin.Sh3232.op));
978          if (i->Xin.Sh3232.amt == 0)
979            vex_printf(" %%cl,");
980          else
981             vex_printf(" $%d,", (Int)i->Xin.Sh3232.amt);
982          ppHRegX86(i->Xin.Sh3232.src);
983          vex_printf(",");
984          ppHRegX86(i->Xin.Sh3232.dst);
985          return;
986       case Xin_Push:
987          vex_printf("pushl ");
988          ppX86RMI(i->Xin.Push.src);
989          return;
990       case Xin_Call:
991          vex_printf("call%s[%d,",
992                     i->Xin.Call.cond==Xcc_ALWAYS
993                        ? "" : showX86CondCode(i->Xin.Call.cond),
994                     i->Xin.Call.regparms);
995          ppRetLoc(i->Xin.Call.rloc);
996          vex_printf("] 0x%x", i->Xin.Call.target);
997          break;
998       case Xin_XDirect:
999          vex_printf("(xDirect) ");
1000          vex_printf("if (%%eflags.%s) { ",
1001                     showX86CondCode(i->Xin.XDirect.cond));
1002          vex_printf("movl $0x%x,", i->Xin.XDirect.dstGA);
1003          ppX86AMode(i->Xin.XDirect.amEIP);
1004          vex_printf("; ");
1005          vex_printf("movl $disp_cp_chain_me_to_%sEP,%%edx; call *%%edx }",
1006                     i->Xin.XDirect.toFastEP ? "fast" : "slow");
1007          return;
1008       case Xin_XIndir:
1009          vex_printf("(xIndir) ");
1010          vex_printf("if (%%eflags.%s) { movl ",
1011                     showX86CondCode(i->Xin.XIndir.cond));
1012          ppHRegX86(i->Xin.XIndir.dstGA);
1013          vex_printf(",");
1014          ppX86AMode(i->Xin.XIndir.amEIP);
1015          vex_printf("; movl $disp_indir,%%edx; jmp *%%edx }");
1016          return;
1017       case Xin_XAssisted:
1018          vex_printf("(xAssisted) ");
1019          vex_printf("if (%%eflags.%s) { ",
1020                     showX86CondCode(i->Xin.XAssisted.cond));
1021          vex_printf("movl ");
1022          ppHRegX86(i->Xin.XAssisted.dstGA);
1023          vex_printf(",");
1024          ppX86AMode(i->Xin.XAssisted.amEIP);
1025          vex_printf("; movl $IRJumpKind_to_TRCVAL(%d),%%ebp",
1026                     (Int)i->Xin.XAssisted.jk);
1027          vex_printf("; movl $disp_assisted,%%edx; jmp *%%edx }");
1028          return;
1029       case Xin_CMov32:
1030          vex_printf("cmov%s ", showX86CondCode(i->Xin.CMov32.cond));
1031          ppX86RM(i->Xin.CMov32.src);
1032          vex_printf(",");
1033          ppHRegX86(i->Xin.CMov32.dst);
1034          return;
1035       case Xin_LoadEX:
1036          vex_printf("mov%c%cl ",
1037                     i->Xin.LoadEX.syned ? 's' : 'z',
1038                     i->Xin.LoadEX.szSmall==1 ? 'b' : 'w');
1039          ppX86AMode(i->Xin.LoadEX.src);
1040          vex_printf(",");
1041          ppHRegX86(i->Xin.LoadEX.dst);
1042          return;
1043       case Xin_Store:
1044          vex_printf("mov%c ", i->Xin.Store.sz==1 ? 'b' : 'w');
1045          ppHRegX86(i->Xin.Store.src);
1046          vex_printf(",");
1047          ppX86AMode(i->Xin.Store.dst);
1048          return;
1049       case Xin_Set32:
1050          vex_printf("setl%s ", showX86CondCode(i->Xin.Set32.cond));
1051          ppHRegX86(i->Xin.Set32.dst);
1052          return;
1053       case Xin_Bsfr32:
1054          vex_printf("bs%cl ", i->Xin.Bsfr32.isFwds ? 'f' : 'r');
1055          ppHRegX86(i->Xin.Bsfr32.src);
1056          vex_printf(",");
1057          ppHRegX86(i->Xin.Bsfr32.dst);
1058          return;
1059       case Xin_MFence:
1060          vex_printf("mfence(%s)",
1061                     LibVEX_ppVexHwCaps(VexArchX86,i->Xin.MFence.hwcaps));
1062          return;
1063       case Xin_ACAS:
1064          vex_printf("lock cmpxchg%c ",
1065                      i->Xin.ACAS.sz==1 ? 'b'
1066                                        : i->Xin.ACAS.sz==2 ? 'w' : 'l');
1067          vex_printf("{%%eax->%%ebx},");
1068          ppX86AMode(i->Xin.ACAS.addr);
1069          return;
1070       case Xin_DACAS:
1071          vex_printf("lock cmpxchg8b {%%edx:%%eax->%%ecx:%%ebx},");
1072          ppX86AMode(i->Xin.DACAS.addr);
1073          return;
1074       case Xin_FpUnary:
1075          vex_printf("g%sD ", showX86FpOp(i->Xin.FpUnary.op));
1076          ppHRegX86(i->Xin.FpUnary.src);
1077          vex_printf(",");
1078          ppHRegX86(i->Xin.FpUnary.dst);
1079          break;
1080       case Xin_FpBinary:
1081          vex_printf("g%sD ", showX86FpOp(i->Xin.FpBinary.op));
1082          ppHRegX86(i->Xin.FpBinary.srcL);
1083          vex_printf(",");
1084          ppHRegX86(i->Xin.FpBinary.srcR);
1085          vex_printf(",");
1086          ppHRegX86(i->Xin.FpBinary.dst);
1087          break;
1088       case Xin_FpLdSt:
1089          if (i->Xin.FpLdSt.isLoad) {
1090             vex_printf("gld%c " ,  i->Xin.FpLdSt.sz==10 ? 'T'
1091                                    : (i->Xin.FpLdSt.sz==8 ? 'D' : 'F'));
1092             ppX86AMode(i->Xin.FpLdSt.addr);
1093             vex_printf(", ");
1094             ppHRegX86(i->Xin.FpLdSt.reg);
1095          } else {
1096             vex_printf("gst%c " , i->Xin.FpLdSt.sz==10 ? 'T'
1097                                   : (i->Xin.FpLdSt.sz==8 ? 'D' : 'F'));
1098             ppHRegX86(i->Xin.FpLdSt.reg);
1099             vex_printf(", ");
1100             ppX86AMode(i->Xin.FpLdSt.addr);
1101          }
1102          return;
1103       case Xin_FpLdStI:
1104          if (i->Xin.FpLdStI.isLoad) {
1105             vex_printf("gild%s ", i->Xin.FpLdStI.sz==8 ? "ll" :
1106                                   i->Xin.FpLdStI.sz==4 ? "l" : "w");
1107             ppX86AMode(i->Xin.FpLdStI.addr);
1108             vex_printf(", ");
1109             ppHRegX86(i->Xin.FpLdStI.reg);
1110          } else {
1111             vex_printf("gist%s ", i->Xin.FpLdStI.sz==8 ? "ll" :
1112                                   i->Xin.FpLdStI.sz==4 ? "l" : "w");
1113             ppHRegX86(i->Xin.FpLdStI.reg);
1114             vex_printf(", ");
1115             ppX86AMode(i->Xin.FpLdStI.addr);
1116          }
1117          return;
1118       case Xin_Fp64to32:
1119          vex_printf("gdtof ");
1120          ppHRegX86(i->Xin.Fp64to32.src);
1121          vex_printf(",");
1122          ppHRegX86(i->Xin.Fp64to32.dst);
1123          return;
1124       case Xin_FpCMov:
1125          vex_printf("gcmov%s ", showX86CondCode(i->Xin.FpCMov.cond));
1126          ppHRegX86(i->Xin.FpCMov.src);
1127          vex_printf(",");
1128          ppHRegX86(i->Xin.FpCMov.dst);
1129          return;
1130       case Xin_FpLdCW:
1131          vex_printf("fldcw ");
1132          ppX86AMode(i->Xin.FpLdCW.addr);
1133          return;
1134       case Xin_FpStSW_AX:
1135          vex_printf("fstsw %%ax");
1136          return;
1137       case Xin_FpCmp:
1138          vex_printf("gcmp ");
1139          ppHRegX86(i->Xin.FpCmp.srcL);
1140          vex_printf(",");
1141          ppHRegX86(i->Xin.FpCmp.srcR);
1142          vex_printf(",");
1143          ppHRegX86(i->Xin.FpCmp.dst);
1144          break;
1145       case Xin_SseConst:
1146          vex_printf("const $0x%04x,", (Int)i->Xin.SseConst.con);
1147          ppHRegX86(i->Xin.SseConst.dst);
1148          break;
1149       case Xin_SseLdSt:
1150          vex_printf("movups ");
1151          if (i->Xin.SseLdSt.isLoad) {
1152             ppX86AMode(i->Xin.SseLdSt.addr);
1153             vex_printf(",");
1154             ppHRegX86(i->Xin.SseLdSt.reg);
1155          } else {
1156             ppHRegX86(i->Xin.SseLdSt.reg);
1157             vex_printf(",");
1158             ppX86AMode(i->Xin.SseLdSt.addr);
1159          }
1160          return;
1161       case Xin_SseLdzLO:
1162          vex_printf("movs%s ", i->Xin.SseLdzLO.sz==4 ? "s" : "d");
1163          ppX86AMode(i->Xin.SseLdzLO.addr);
1164          vex_printf(",");
1165          ppHRegX86(i->Xin.SseLdzLO.reg);
1166          return;
1167       case Xin_Sse32Fx4:
1168          vex_printf("%sps ", showX86SseOp(i->Xin.Sse32Fx4.op));
1169          ppHRegX86(i->Xin.Sse32Fx4.src);
1170          vex_printf(",");
1171          ppHRegX86(i->Xin.Sse32Fx4.dst);
1172          return;
1173       case Xin_Sse32FLo:
1174          vex_printf("%sss ", showX86SseOp(i->Xin.Sse32FLo.op));
1175          ppHRegX86(i->Xin.Sse32FLo.src);
1176          vex_printf(",");
1177          ppHRegX86(i->Xin.Sse32FLo.dst);
1178          return;
1179       case Xin_Sse64Fx2:
1180          vex_printf("%spd ", showX86SseOp(i->Xin.Sse64Fx2.op));
1181          ppHRegX86(i->Xin.Sse64Fx2.src);
1182          vex_printf(",");
1183          ppHRegX86(i->Xin.Sse64Fx2.dst);
1184          return;
1185       case Xin_Sse64FLo:
1186          vex_printf("%ssd ", showX86SseOp(i->Xin.Sse64FLo.op));
1187          ppHRegX86(i->Xin.Sse64FLo.src);
1188          vex_printf(",");
1189          ppHRegX86(i->Xin.Sse64FLo.dst);
1190          return;
1191       case Xin_SseReRg:
1192          vex_printf("%s ", showX86SseOp(i->Xin.SseReRg.op));
1193          ppHRegX86(i->Xin.SseReRg.src);
1194          vex_printf(",");
1195          ppHRegX86(i->Xin.SseReRg.dst);
1196          return;
1197       case Xin_SseCMov:
1198          vex_printf("cmov%s ", showX86CondCode(i->Xin.SseCMov.cond));
1199          ppHRegX86(i->Xin.SseCMov.src);
1200          vex_printf(",");
1201          ppHRegX86(i->Xin.SseCMov.dst);
1202          return;
1203       case Xin_SseShuf:
1204          vex_printf("pshufd $0x%x,", (UInt)i->Xin.SseShuf.order);
1205          ppHRegX86(i->Xin.SseShuf.src);
1206          vex_printf(",");
1207          ppHRegX86(i->Xin.SseShuf.dst);
1208          return;
1209       case Xin_EvCheck:
1210          vex_printf("(evCheck) decl ");
1211          ppX86AMode(i->Xin.EvCheck.amCounter);
1212          vex_printf("; jns nofail; jmp *");
1213          ppX86AMode(i->Xin.EvCheck.amFailAddr);
1214          vex_printf("; nofail:");
1215          return;
1216       case Xin_ProfInc:
1217          vex_printf("(profInc) addl $1,NotKnownYet; "
1218                     "adcl $0,NotKnownYet+4");
1219          return;
1220       default:
1221          vpanic("ppX86Instr");
1222    }
1223 }
1224 
1225 /* --------- Helpers for register allocation. --------- */
1226 
getRegUsage_X86Instr(HRegUsage * u,const X86Instr * i,Bool mode64)1227 void getRegUsage_X86Instr (HRegUsage* u, const X86Instr* i, Bool mode64)
1228 {
1229    Bool unary;
1230    vassert(mode64 == False);
1231    initHRegUsage(u);
1232    switch (i->tag) {
1233       case Xin_Alu32R:
1234          addRegUsage_X86RMI(u, i->Xin.Alu32R.src);
1235          if (i->Xin.Alu32R.op == Xalu_MOV) {
1236             addHRegUse(u, HRmWrite, i->Xin.Alu32R.dst);
1237 
1238             if (i->Xin.Alu32R.src->tag == Xrmi_Reg) {
1239                u->isRegRegMove = True;
1240                u->regMoveSrc   = i->Xin.Alu32R.src->Xrmi.Reg.reg;
1241                u->regMoveDst   = i->Xin.Alu32R.dst;
1242             }
1243             return;
1244          }
1245          if (i->Xin.Alu32R.op == Xalu_CMP) {
1246             addHRegUse(u, HRmRead, i->Xin.Alu32R.dst);
1247             return;
1248          }
1249          addHRegUse(u, HRmModify, i->Xin.Alu32R.dst);
1250          return;
1251       case Xin_Alu32M:
1252          addRegUsage_X86RI(u, i->Xin.Alu32M.src);
1253          addRegUsage_X86AMode(u, i->Xin.Alu32M.dst);
1254          return;
1255       case Xin_Sh32:
1256          addHRegUse(u, HRmModify, i->Xin.Sh32.dst);
1257          if (i->Xin.Sh32.src == 0)
1258             addHRegUse(u, HRmRead, hregX86_ECX());
1259          return;
1260       case Xin_Test32:
1261          addRegUsage_X86RM(u, i->Xin.Test32.dst, HRmRead);
1262          return;
1263       case Xin_Unary32:
1264          addHRegUse(u, HRmModify, i->Xin.Unary32.dst);
1265          return;
1266       case Xin_Lea32:
1267          addRegUsage_X86AMode(u, i->Xin.Lea32.am);
1268          addHRegUse(u, HRmWrite, i->Xin.Lea32.dst);
1269          return;
1270       case Xin_MulL:
1271          addRegUsage_X86RM(u, i->Xin.MulL.src, HRmRead);
1272          addHRegUse(u, HRmModify, hregX86_EAX());
1273          addHRegUse(u, HRmWrite, hregX86_EDX());
1274          return;
1275       case Xin_Div:
1276          addRegUsage_X86RM(u, i->Xin.Div.src, HRmRead);
1277          addHRegUse(u, HRmModify, hregX86_EAX());
1278          addHRegUse(u, HRmModify, hregX86_EDX());
1279          return;
1280       case Xin_Sh3232:
1281          addHRegUse(u, HRmRead, i->Xin.Sh3232.src);
1282          addHRegUse(u, HRmModify, i->Xin.Sh3232.dst);
1283          if (i->Xin.Sh3232.amt == 0)
1284             addHRegUse(u, HRmRead, hregX86_ECX());
1285          return;
1286       case Xin_Push:
1287          addRegUsage_X86RMI(u, i->Xin.Push.src);
1288          addHRegUse(u, HRmModify, hregX86_ESP());
1289          return;
1290       case Xin_Call:
1291          /* This is a bit subtle. */
1292          /* First off, claim it trashes all the caller-saved regs
1293             which fall within the register allocator's jurisdiction.
1294             These I believe to be %eax %ecx %edx and all the xmm
1295             registers. */
1296          addHRegUse(u, HRmWrite, hregX86_EAX());
1297          addHRegUse(u, HRmWrite, hregX86_ECX());
1298          addHRegUse(u, HRmWrite, hregX86_EDX());
1299          addHRegUse(u, HRmWrite, hregX86_XMM0());
1300          addHRegUse(u, HRmWrite, hregX86_XMM1());
1301          addHRegUse(u, HRmWrite, hregX86_XMM2());
1302          addHRegUse(u, HRmWrite, hregX86_XMM3());
1303          addHRegUse(u, HRmWrite, hregX86_XMM4());
1304          addHRegUse(u, HRmWrite, hregX86_XMM5());
1305          addHRegUse(u, HRmWrite, hregX86_XMM6());
1306          addHRegUse(u, HRmWrite, hregX86_XMM7());
1307          /* Now we have to state any parameter-carrying registers
1308             which might be read.  This depends on the regparmness. */
1309          switch (i->Xin.Call.regparms) {
1310             case 3: addHRegUse(u, HRmRead, hregX86_ECX()); /*fallthru*/
1311             case 2: addHRegUse(u, HRmRead, hregX86_EDX()); /*fallthru*/
1312             case 1: addHRegUse(u, HRmRead, hregX86_EAX()); break;
1313             case 0: break;
1314             default: vpanic("getRegUsage_X86Instr:Call:regparms");
1315          }
1316          /* Finally, there is the issue that the insn trashes a
1317             register because the literal target address has to be
1318             loaded into a register.  Fortunately, for the 0/1/2
1319             regparm case, we can use EAX, EDX and ECX respectively, so
1320             this does not cause any further damage.  For the 3-regparm
1321             case, we'll have to choose another register arbitrarily --
1322             since A, D and C are used for parameters -- and so we might
1323             as well choose EDI. */
1324          if (i->Xin.Call.regparms == 3)
1325             addHRegUse(u, HRmWrite, hregX86_EDI());
1326          /* Upshot of this is that the assembler really must observe
1327             the here-stated convention of which register to use as an
1328             address temporary, depending on the regparmness: 0==EAX,
1329             1==EDX, 2==ECX, 3==EDI. */
1330          return;
1331       /* XDirect/XIndir/XAssisted are also a bit subtle.  They
1332          conditionally exit the block.  Hence we only need to list (1)
1333          the registers that they read, and (2) the registers that they
1334          write in the case where the block is not exited.  (2) is
1335          empty, hence only (1) is relevant here. */
1336       case Xin_XDirect:
1337          addRegUsage_X86AMode(u, i->Xin.XDirect.amEIP);
1338          return;
1339       case Xin_XIndir:
1340          addHRegUse(u, HRmRead, i->Xin.XIndir.dstGA);
1341          addRegUsage_X86AMode(u, i->Xin.XIndir.amEIP);
1342          return;
1343       case Xin_XAssisted:
1344          addHRegUse(u, HRmRead, i->Xin.XAssisted.dstGA);
1345          addRegUsage_X86AMode(u, i->Xin.XAssisted.amEIP);
1346          return;
1347       case Xin_CMov32:
1348          addRegUsage_X86RM(u, i->Xin.CMov32.src, HRmRead);
1349          addHRegUse(u, HRmModify, i->Xin.CMov32.dst);
1350          return;
1351       case Xin_LoadEX:
1352          addRegUsage_X86AMode(u, i->Xin.LoadEX.src);
1353          addHRegUse(u, HRmWrite, i->Xin.LoadEX.dst);
1354          return;
1355       case Xin_Store:
1356          addHRegUse(u, HRmRead, i->Xin.Store.src);
1357          addRegUsage_X86AMode(u, i->Xin.Store.dst);
1358          return;
1359       case Xin_Set32:
1360          addHRegUse(u, HRmWrite, i->Xin.Set32.dst);
1361          return;
1362       case Xin_Bsfr32:
1363          addHRegUse(u, HRmRead, i->Xin.Bsfr32.src);
1364          addHRegUse(u, HRmWrite, i->Xin.Bsfr32.dst);
1365          return;
1366       case Xin_MFence:
1367          return;
1368       case Xin_ACAS:
1369          addRegUsage_X86AMode(u, i->Xin.ACAS.addr);
1370          addHRegUse(u, HRmRead, hregX86_EBX());
1371          addHRegUse(u, HRmModify, hregX86_EAX());
1372          return;
1373       case Xin_DACAS:
1374          addRegUsage_X86AMode(u, i->Xin.DACAS.addr);
1375          addHRegUse(u, HRmRead, hregX86_ECX());
1376          addHRegUse(u, HRmRead, hregX86_EBX());
1377          addHRegUse(u, HRmModify, hregX86_EDX());
1378          addHRegUse(u, HRmModify, hregX86_EAX());
1379          return;
1380       case Xin_FpUnary:
1381          addHRegUse(u, HRmRead, i->Xin.FpUnary.src);
1382          addHRegUse(u, HRmWrite, i->Xin.FpUnary.dst);
1383 
1384          if (i->Xin.FpUnary.op == Xfp_MOV) {
1385             u->isRegRegMove = True;
1386             u->regMoveSrc   = i->Xin.FpUnary.src;
1387             u->regMoveDst   = i->Xin.FpUnary.dst;
1388          }
1389          return;
1390       case Xin_FpBinary:
1391          addHRegUse(u, HRmRead, i->Xin.FpBinary.srcL);
1392          addHRegUse(u, HRmRead, i->Xin.FpBinary.srcR);
1393          addHRegUse(u, HRmWrite, i->Xin.FpBinary.dst);
1394          return;
1395       case Xin_FpLdSt:
1396          addRegUsage_X86AMode(u, i->Xin.FpLdSt.addr);
1397          addHRegUse(u, i->Xin.FpLdSt.isLoad ? HRmWrite : HRmRead,
1398                        i->Xin.FpLdSt.reg);
1399          return;
1400       case Xin_FpLdStI:
1401          addRegUsage_X86AMode(u, i->Xin.FpLdStI.addr);
1402          addHRegUse(u, i->Xin.FpLdStI.isLoad ? HRmWrite : HRmRead,
1403                        i->Xin.FpLdStI.reg);
1404          return;
1405       case Xin_Fp64to32:
1406          addHRegUse(u, HRmRead,  i->Xin.Fp64to32.src);
1407          addHRegUse(u, HRmWrite, i->Xin.Fp64to32.dst);
1408          return;
1409       case Xin_FpCMov:
1410          addHRegUse(u, HRmRead,   i->Xin.FpCMov.src);
1411          addHRegUse(u, HRmModify, i->Xin.FpCMov.dst);
1412          return;
1413       case Xin_FpLdCW:
1414          addRegUsage_X86AMode(u, i->Xin.FpLdCW.addr);
1415          return;
1416       case Xin_FpStSW_AX:
1417          addHRegUse(u, HRmWrite, hregX86_EAX());
1418          return;
1419       case Xin_FpCmp:
1420          addHRegUse(u, HRmRead, i->Xin.FpCmp.srcL);
1421          addHRegUse(u, HRmRead, i->Xin.FpCmp.srcR);
1422          addHRegUse(u, HRmWrite, i->Xin.FpCmp.dst);
1423          addHRegUse(u, HRmWrite, hregX86_EAX());
1424          return;
1425       case Xin_SseLdSt:
1426          addRegUsage_X86AMode(u, i->Xin.SseLdSt.addr);
1427          addHRegUse(u, i->Xin.SseLdSt.isLoad ? HRmWrite : HRmRead,
1428                        i->Xin.SseLdSt.reg);
1429          return;
1430       case Xin_SseLdzLO:
1431          addRegUsage_X86AMode(u, i->Xin.SseLdzLO.addr);
1432          addHRegUse(u, HRmWrite, i->Xin.SseLdzLO.reg);
1433          return;
1434       case Xin_SseConst:
1435          addHRegUse(u, HRmWrite, i->Xin.SseConst.dst);
1436          return;
1437       case Xin_Sse32Fx4:
1438          vassert(i->Xin.Sse32Fx4.op != Xsse_MOV);
1439          unary = toBool( i->Xin.Sse32Fx4.op == Xsse_RCPF
1440                          || i->Xin.Sse32Fx4.op == Xsse_RSQRTF
1441                          || i->Xin.Sse32Fx4.op == Xsse_SQRTF );
1442          addHRegUse(u, HRmRead, i->Xin.Sse32Fx4.src);
1443          addHRegUse(u, unary ? HRmWrite : HRmModify,
1444                        i->Xin.Sse32Fx4.dst);
1445          return;
1446       case Xin_Sse32FLo:
1447          vassert(i->Xin.Sse32FLo.op != Xsse_MOV);
1448          unary = toBool( i->Xin.Sse32FLo.op == Xsse_RCPF
1449                          || i->Xin.Sse32FLo.op == Xsse_RSQRTF
1450                          || i->Xin.Sse32FLo.op == Xsse_SQRTF );
1451          addHRegUse(u, HRmRead, i->Xin.Sse32FLo.src);
1452          addHRegUse(u, unary ? HRmWrite : HRmModify,
1453                        i->Xin.Sse32FLo.dst);
1454          return;
1455       case Xin_Sse64Fx2:
1456          vassert(i->Xin.Sse64Fx2.op != Xsse_MOV);
1457          unary = toBool( i->Xin.Sse64Fx2.op == Xsse_RCPF
1458                          || i->Xin.Sse64Fx2.op == Xsse_RSQRTF
1459                          || i->Xin.Sse64Fx2.op == Xsse_SQRTF );
1460          addHRegUse(u, HRmRead, i->Xin.Sse64Fx2.src);
1461          addHRegUse(u, unary ? HRmWrite : HRmModify,
1462                        i->Xin.Sse64Fx2.dst);
1463          return;
1464       case Xin_Sse64FLo:
1465          vassert(i->Xin.Sse64FLo.op != Xsse_MOV);
1466          unary = toBool( i->Xin.Sse64FLo.op == Xsse_RCPF
1467                          || i->Xin.Sse64FLo.op == Xsse_RSQRTF
1468                          || i->Xin.Sse64FLo.op == Xsse_SQRTF );
1469          addHRegUse(u, HRmRead, i->Xin.Sse64FLo.src);
1470          addHRegUse(u, unary ? HRmWrite : HRmModify,
1471                        i->Xin.Sse64FLo.dst);
1472          return;
1473       case Xin_SseReRg:
1474          if (i->Xin.SseReRg.op == Xsse_XOR
1475              && sameHReg(i->Xin.SseReRg.src, i->Xin.SseReRg.dst)) {
1476             /* reg-alloc needs to understand 'xor r,r' as a write of r */
1477             /* (as opposed to a rite of passage :-) */
1478             addHRegUse(u, HRmWrite, i->Xin.SseReRg.dst);
1479          } else {
1480             addHRegUse(u, HRmRead, i->Xin.SseReRg.src);
1481             addHRegUse(u, i->Xin.SseReRg.op == Xsse_MOV
1482                              ? HRmWrite : HRmModify,
1483                           i->Xin.SseReRg.dst);
1484 
1485             if (i->Xin.SseReRg.op == Xsse_MOV) {
1486                u->isRegRegMove = True;
1487                u->regMoveSrc   = i->Xin.SseReRg.src;
1488                u->regMoveDst   = i->Xin.SseReRg.dst;
1489             }
1490          }
1491          return;
1492       case Xin_SseCMov:
1493          addHRegUse(u, HRmRead,   i->Xin.SseCMov.src);
1494          addHRegUse(u, HRmModify, i->Xin.SseCMov.dst);
1495          return;
1496       case Xin_SseShuf:
1497          addHRegUse(u, HRmRead,  i->Xin.SseShuf.src);
1498          addHRegUse(u, HRmWrite, i->Xin.SseShuf.dst);
1499          return;
1500       case Xin_EvCheck:
1501          /* We expect both amodes only to mention %ebp, so this is in
1502             fact pointless, since %ebp isn't allocatable, but anyway.. */
1503          addRegUsage_X86AMode(u, i->Xin.EvCheck.amCounter);
1504          addRegUsage_X86AMode(u, i->Xin.EvCheck.amFailAddr);
1505          return;
1506       case Xin_ProfInc:
1507          /* does not use any registers. */
1508          return;
1509       default:
1510          ppX86Instr(i, False);
1511          vpanic("getRegUsage_X86Instr");
1512    }
1513 }
1514 
1515 /* local helper */
mapReg(HRegRemap * m,HReg * r)1516 static void mapReg( HRegRemap* m, HReg* r )
1517 {
1518    *r = lookupHRegRemap(m, *r);
1519 }
1520 
mapRegs_X86Instr(HRegRemap * m,X86Instr * i,Bool mode64)1521 void mapRegs_X86Instr ( HRegRemap* m, X86Instr* i, Bool mode64 )
1522 {
1523    vassert(mode64 == False);
1524    switch (i->tag) {
1525       case Xin_Alu32R:
1526          mapRegs_X86RMI(m, i->Xin.Alu32R.src);
1527          mapReg(m, &i->Xin.Alu32R.dst);
1528          return;
1529       case Xin_Alu32M:
1530          mapRegs_X86RI(m, i->Xin.Alu32M.src);
1531          mapRegs_X86AMode(m, i->Xin.Alu32M.dst);
1532          return;
1533       case Xin_Sh32:
1534          mapReg(m, &i->Xin.Sh32.dst);
1535          return;
1536       case Xin_Test32:
1537          mapRegs_X86RM(m, i->Xin.Test32.dst);
1538          return;
1539       case Xin_Unary32:
1540          mapReg(m, &i->Xin.Unary32.dst);
1541          return;
1542       case Xin_Lea32:
1543          mapRegs_X86AMode(m, i->Xin.Lea32.am);
1544          mapReg(m, &i->Xin.Lea32.dst);
1545          return;
1546       case Xin_MulL:
1547          mapRegs_X86RM(m, i->Xin.MulL.src);
1548          return;
1549       case Xin_Div:
1550          mapRegs_X86RM(m, i->Xin.Div.src);
1551          return;
1552       case Xin_Sh3232:
1553          mapReg(m, &i->Xin.Sh3232.src);
1554          mapReg(m, &i->Xin.Sh3232.dst);
1555          return;
1556       case Xin_Push:
1557          mapRegs_X86RMI(m, i->Xin.Push.src);
1558          return;
1559       case Xin_Call:
1560          return;
1561       case Xin_XDirect:
1562          mapRegs_X86AMode(m, i->Xin.XDirect.amEIP);
1563          return;
1564       case Xin_XIndir:
1565          mapReg(m, &i->Xin.XIndir.dstGA);
1566          mapRegs_X86AMode(m, i->Xin.XIndir.amEIP);
1567          return;
1568       case Xin_XAssisted:
1569          mapReg(m, &i->Xin.XAssisted.dstGA);
1570          mapRegs_X86AMode(m, i->Xin.XAssisted.amEIP);
1571          return;
1572       case Xin_CMov32:
1573          mapRegs_X86RM(m, i->Xin.CMov32.src);
1574          mapReg(m, &i->Xin.CMov32.dst);
1575          return;
1576       case Xin_LoadEX:
1577          mapRegs_X86AMode(m, i->Xin.LoadEX.src);
1578          mapReg(m, &i->Xin.LoadEX.dst);
1579          return;
1580       case Xin_Store:
1581          mapReg(m, &i->Xin.Store.src);
1582          mapRegs_X86AMode(m, i->Xin.Store.dst);
1583          return;
1584       case Xin_Set32:
1585          mapReg(m, &i->Xin.Set32.dst);
1586          return;
1587       case Xin_Bsfr32:
1588          mapReg(m, &i->Xin.Bsfr32.src);
1589          mapReg(m, &i->Xin.Bsfr32.dst);
1590          return;
1591       case Xin_MFence:
1592          return;
1593       case Xin_ACAS:
1594          mapRegs_X86AMode(m, i->Xin.ACAS.addr);
1595          return;
1596       case Xin_DACAS:
1597          mapRegs_X86AMode(m, i->Xin.DACAS.addr);
1598          return;
1599       case Xin_FpUnary:
1600          mapReg(m, &i->Xin.FpUnary.src);
1601          mapReg(m, &i->Xin.FpUnary.dst);
1602          return;
1603       case Xin_FpBinary:
1604          mapReg(m, &i->Xin.FpBinary.srcL);
1605          mapReg(m, &i->Xin.FpBinary.srcR);
1606          mapReg(m, &i->Xin.FpBinary.dst);
1607          return;
1608       case Xin_FpLdSt:
1609          mapRegs_X86AMode(m, i->Xin.FpLdSt.addr);
1610          mapReg(m, &i->Xin.FpLdSt.reg);
1611          return;
1612       case Xin_FpLdStI:
1613          mapRegs_X86AMode(m, i->Xin.FpLdStI.addr);
1614          mapReg(m, &i->Xin.FpLdStI.reg);
1615          return;
1616       case Xin_Fp64to32:
1617          mapReg(m, &i->Xin.Fp64to32.src);
1618          mapReg(m, &i->Xin.Fp64to32.dst);
1619          return;
1620       case Xin_FpCMov:
1621          mapReg(m, &i->Xin.FpCMov.src);
1622          mapReg(m, &i->Xin.FpCMov.dst);
1623          return;
1624       case Xin_FpLdCW:
1625          mapRegs_X86AMode(m, i->Xin.FpLdCW.addr);
1626          return;
1627       case Xin_FpStSW_AX:
1628          return;
1629       case Xin_FpCmp:
1630          mapReg(m, &i->Xin.FpCmp.srcL);
1631          mapReg(m, &i->Xin.FpCmp.srcR);
1632          mapReg(m, &i->Xin.FpCmp.dst);
1633          return;
1634       case Xin_SseConst:
1635          mapReg(m, &i->Xin.SseConst.dst);
1636          return;
1637       case Xin_SseLdSt:
1638          mapReg(m, &i->Xin.SseLdSt.reg);
1639          mapRegs_X86AMode(m, i->Xin.SseLdSt.addr);
1640          break;
1641       case Xin_SseLdzLO:
1642          mapReg(m, &i->Xin.SseLdzLO.reg);
1643          mapRegs_X86AMode(m, i->Xin.SseLdzLO.addr);
1644          break;
1645       case Xin_Sse32Fx4:
1646          mapReg(m, &i->Xin.Sse32Fx4.src);
1647          mapReg(m, &i->Xin.Sse32Fx4.dst);
1648          return;
1649       case Xin_Sse32FLo:
1650          mapReg(m, &i->Xin.Sse32FLo.src);
1651          mapReg(m, &i->Xin.Sse32FLo.dst);
1652          return;
1653       case Xin_Sse64Fx2:
1654          mapReg(m, &i->Xin.Sse64Fx2.src);
1655          mapReg(m, &i->Xin.Sse64Fx2.dst);
1656          return;
1657       case Xin_Sse64FLo:
1658          mapReg(m, &i->Xin.Sse64FLo.src);
1659          mapReg(m, &i->Xin.Sse64FLo.dst);
1660          return;
1661       case Xin_SseReRg:
1662          mapReg(m, &i->Xin.SseReRg.src);
1663          mapReg(m, &i->Xin.SseReRg.dst);
1664          return;
1665       case Xin_SseCMov:
1666          mapReg(m, &i->Xin.SseCMov.src);
1667          mapReg(m, &i->Xin.SseCMov.dst);
1668          return;
1669       case Xin_SseShuf:
1670          mapReg(m, &i->Xin.SseShuf.src);
1671          mapReg(m, &i->Xin.SseShuf.dst);
1672          return;
1673       case Xin_EvCheck:
1674          /* We expect both amodes only to mention %ebp, so this is in
1675             fact pointless, since %ebp isn't allocatable, but anyway.. */
1676          mapRegs_X86AMode(m, i->Xin.EvCheck.amCounter);
1677          mapRegs_X86AMode(m, i->Xin.EvCheck.amFailAddr);
1678          return;
1679       case Xin_ProfInc:
1680          /* does not use any registers. */
1681          return;
1682 
1683       default:
1684          ppX86Instr(i, mode64);
1685          vpanic("mapRegs_X86Instr");
1686    }
1687 }
1688 
1689 /* Generate x86 spill/reload instructions under the direction of the
1690    register allocator.  Note it's critical these don't write the
1691    condition codes. */
1692 
genSpill_X86(HInstr ** i1,HInstr ** i2,HReg rreg,Int offsetB,Bool mode64)1693 void genSpill_X86 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
1694                     HReg rreg, Int offsetB, Bool mode64 )
1695 {
1696    X86AMode* am;
1697    vassert(offsetB >= 0);
1698    vassert(!hregIsVirtual(rreg));
1699    vassert(mode64 == False);
1700    *i1 = *i2 = NULL;
1701    am = X86AMode_IR(offsetB, hregX86_EBP());
1702    switch (hregClass(rreg)) {
1703       case HRcInt32:
1704          *i1 = X86Instr_Alu32M ( Xalu_MOV, X86RI_Reg(rreg), am );
1705          return;
1706       case HRcFlt64:
1707          *i1 = X86Instr_FpLdSt ( False/*store*/, 10, rreg, am );
1708          return;
1709       case HRcVec128:
1710          *i1 = X86Instr_SseLdSt ( False/*store*/, rreg, am );
1711          return;
1712       default:
1713          ppHRegClass(hregClass(rreg));
1714          vpanic("genSpill_X86: unimplemented regclass");
1715    }
1716 }
1717 
genReload_X86(HInstr ** i1,HInstr ** i2,HReg rreg,Int offsetB,Bool mode64)1718 void genReload_X86 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
1719                      HReg rreg, Int offsetB, Bool mode64 )
1720 {
1721    X86AMode* am;
1722    vassert(offsetB >= 0);
1723    vassert(!hregIsVirtual(rreg));
1724    vassert(mode64 == False);
1725    *i1 = *i2 = NULL;
1726    am = X86AMode_IR(offsetB, hregX86_EBP());
1727    switch (hregClass(rreg)) {
1728       case HRcInt32:
1729          *i1 = X86Instr_Alu32R ( Xalu_MOV, X86RMI_Mem(am), rreg );
1730          return;
1731       case HRcFlt64:
1732          *i1 = X86Instr_FpLdSt ( True/*load*/, 10, rreg, am );
1733          return;
1734       case HRcVec128:
1735          *i1 = X86Instr_SseLdSt ( True/*load*/, rreg, am );
1736          return;
1737       default:
1738          ppHRegClass(hregClass(rreg));
1739          vpanic("genReload_X86: unimplemented regclass");
1740    }
1741 }
1742 
genMove_X86(HReg from,HReg to,Bool mode64)1743 X86Instr* genMove_X86(HReg from, HReg to, Bool mode64)
1744 {
1745    switch (hregClass(from)) {
1746    case HRcInt32:
1747       return X86Instr_Alu32R(Xalu_MOV, X86RMI_Reg(from), to);
1748    case HRcVec128:
1749       return X86Instr_SseReRg(Xsse_MOV, from, to);
1750    default:
1751       ppHRegClass(hregClass(from));
1752       vpanic("genMove_X86: unimplemented regclass");
1753    }
1754 }
1755 
1756 /* The given instruction reads the specified vreg exactly once, and
1757    that vreg is currently located at the given spill offset.  If
1758    possible, return a variant of the instruction to one which instead
1759    references the spill slot directly. */
1760 
directReload_X86(X86Instr * i,HReg vreg,Short spill_off)1761 X86Instr* directReload_X86( X86Instr* i, HReg vreg, Short spill_off )
1762 {
1763    vassert(spill_off >= 0 && spill_off < 10000); /* let's say */
1764 
1765    /* Deal with form: src=RMI_Reg, dst=Reg where src == vreg
1766       Convert to: src=RMI_Mem, dst=Reg
1767    */
1768    if (i->tag == Xin_Alu32R
1769        && (i->Xin.Alu32R.op == Xalu_MOV || i->Xin.Alu32R.op == Xalu_OR
1770            || i->Xin.Alu32R.op == Xalu_XOR)
1771        && i->Xin.Alu32R.src->tag == Xrmi_Reg
1772        && sameHReg(i->Xin.Alu32R.src->Xrmi.Reg.reg, vreg)) {
1773       vassert(! sameHReg(i->Xin.Alu32R.dst, vreg));
1774       return X86Instr_Alu32R(
1775                 i->Xin.Alu32R.op,
1776                 X86RMI_Mem( X86AMode_IR( spill_off, hregX86_EBP())),
1777                 i->Xin.Alu32R.dst
1778              );
1779    }
1780 
1781    /* Deal with form: src=RMI_Imm, dst=Reg where dst == vreg
1782       Convert to: src=RI_Imm, dst=Mem
1783    */
1784    if (i->tag == Xin_Alu32R
1785        && (i->Xin.Alu32R.op == Xalu_CMP)
1786        && i->Xin.Alu32R.src->tag == Xrmi_Imm
1787        && sameHReg(i->Xin.Alu32R.dst, vreg)) {
1788       return X86Instr_Alu32M(
1789                 i->Xin.Alu32R.op,
1790 		X86RI_Imm( i->Xin.Alu32R.src->Xrmi.Imm.imm32 ),
1791                 X86AMode_IR( spill_off, hregX86_EBP())
1792              );
1793    }
1794 
1795    /* Deal with form: Push(RMI_Reg)
1796       Convert to: Push(RMI_Mem)
1797    */
1798    if (i->tag == Xin_Push
1799        && i->Xin.Push.src->tag == Xrmi_Reg
1800        && sameHReg(i->Xin.Push.src->Xrmi.Reg.reg, vreg)) {
1801       return X86Instr_Push(
1802                 X86RMI_Mem( X86AMode_IR( spill_off, hregX86_EBP()))
1803              );
1804    }
1805 
1806    /* Deal with form: CMov32(src=RM_Reg, dst) where vreg == src
1807       Convert to CMov32(RM_Mem, dst) */
1808    if (i->tag == Xin_CMov32
1809        && i->Xin.CMov32.src->tag == Xrm_Reg
1810        && sameHReg(i->Xin.CMov32.src->Xrm.Reg.reg, vreg)) {
1811       vassert(! sameHReg(i->Xin.CMov32.dst, vreg));
1812       return X86Instr_CMov32(
1813                 i->Xin.CMov32.cond,
1814                 X86RM_Mem( X86AMode_IR( spill_off, hregX86_EBP() )),
1815                 i->Xin.CMov32.dst
1816              );
1817    }
1818 
1819    /* Deal with form: Test32(imm,RM_Reg vreg) -> Test32(imm,amode) */
1820    if (i->tag == Xin_Test32
1821        && i->Xin.Test32.dst->tag == Xrm_Reg
1822        && sameHReg(i->Xin.Test32.dst->Xrm.Reg.reg, vreg)) {
1823       return X86Instr_Test32(
1824                 i->Xin.Test32.imm32,
1825                 X86RM_Mem( X86AMode_IR( spill_off, hregX86_EBP() ) )
1826              );
1827    }
1828 
1829    return NULL;
1830 }
1831 
1832 
1833 /* --------- The x86 assembler (bleh.) --------- */
1834 
iregEnc(HReg r)1835 inline static UInt iregEnc ( HReg r )
1836 {
1837    UInt n;
1838    vassert(hregClass(r) == HRcInt32);
1839    vassert(!hregIsVirtual(r));
1840    n = hregEncoding(r);
1841    vassert(n <= 7);
1842    return n;
1843 }
1844 
fregEnc(HReg r)1845 inline static UInt fregEnc ( HReg r )
1846 {
1847    UInt n;
1848    vassert(hregClass(r) == HRcFlt64);
1849    vassert(!hregIsVirtual(r));
1850    n = hregEncoding(r);
1851    vassert(n <= 5);
1852    return n;
1853 }
1854 
vregEnc(HReg r)1855 inline static UInt vregEnc ( HReg r )
1856 {
1857    UInt n;
1858    vassert(hregClass(r) == HRcVec128);
1859    vassert(!hregIsVirtual(r));
1860    n = hregEncoding(r);
1861    vassert(n <= 7);
1862    return n;
1863 }
1864 
mkModRegRM(UInt mod,UInt reg,UInt regmem)1865 inline static UChar mkModRegRM ( UInt mod, UInt reg, UInt regmem )
1866 {
1867    vassert(mod < 4);
1868    vassert((reg|regmem) < 8);
1869    return (UChar)( ((mod & 3) << 6) | ((reg & 7) << 3) | (regmem & 7) );
1870 }
1871 
mkSIB(UInt shift,UInt regindex,UInt regbase)1872 inline static UChar mkSIB ( UInt shift, UInt regindex, UInt regbase )
1873 {
1874    vassert(shift < 4);
1875    vassert((regindex|regbase) < 8);
1876    return (UChar)( ((shift & 3) << 6) | ((regindex & 7) << 3) | (regbase & 7) );
1877 }
1878 
emit32(UChar * p,UInt w32)1879 static UChar* emit32 ( UChar* p, UInt w32 )
1880 {
1881    *p++ = toUChar( w32        & 0x000000FF);
1882    *p++ = toUChar((w32 >>  8) & 0x000000FF);
1883    *p++ = toUChar((w32 >> 16) & 0x000000FF);
1884    *p++ = toUChar((w32 >> 24) & 0x000000FF);
1885    return p;
1886 }
1887 
1888 /* Does a sign-extend of the lowest 8 bits give
1889    the original number? */
fits8bits(UInt w32)1890 static Bool fits8bits ( UInt w32 )
1891 {
1892    Int i32 = (Int)w32;
1893    return toBool(i32 == ((Int)(w32 << 24) >> 24));
1894 }
1895 
1896 
1897 /* Forming mod-reg-rm bytes and scale-index-base bytes.
1898 
1899      greg,  0(ereg)    |  ereg != ESP && ereg != EBP
1900                        =  00 greg ereg
1901 
1902      greg,  d8(ereg)   |  ereg != ESP
1903                        =  01 greg ereg, d8
1904 
1905      greg,  d32(ereg)  |  ereg != ESP
1906                        =  10 greg ereg, d32
1907 
1908      greg,  d8(%esp)   =  01 greg 100, 0x24, d8
1909 
1910      -----------------------------------------------
1911 
1912      greg,  d8(base,index,scale)
1913                |  index != ESP
1914                =  01 greg 100, scale index base, d8
1915 
1916      greg,  d32(base,index,scale)
1917                |  index != ESP
1918                =  10 greg 100, scale index base, d32
1919 */
doAMode_M__wrk(UChar * p,UInt gregEnc,X86AMode * am)1920 static UChar* doAMode_M__wrk ( UChar* p, UInt gregEnc, X86AMode* am )
1921 {
1922    if (am->tag == Xam_IR) {
1923       if (am->Xam.IR.imm == 0
1924           && ! sameHReg(am->Xam.IR.reg, hregX86_ESP())
1925           && ! sameHReg(am->Xam.IR.reg, hregX86_EBP()) ) {
1926          *p++ = mkModRegRM(0, gregEnc, iregEnc(am->Xam.IR.reg));
1927          return p;
1928       }
1929       if (fits8bits(am->Xam.IR.imm)
1930           && ! sameHReg(am->Xam.IR.reg, hregX86_ESP())) {
1931          *p++ = mkModRegRM(1, gregEnc, iregEnc(am->Xam.IR.reg));
1932          *p++ = toUChar(am->Xam.IR.imm & 0xFF);
1933          return p;
1934       }
1935       if (! sameHReg(am->Xam.IR.reg, hregX86_ESP())) {
1936          *p++ = mkModRegRM(2, gregEnc, iregEnc(am->Xam.IR.reg));
1937          p = emit32(p, am->Xam.IR.imm);
1938          return p;
1939       }
1940       if (sameHReg(am->Xam.IR.reg, hregX86_ESP())
1941           && fits8bits(am->Xam.IR.imm)) {
1942  	 *p++ = mkModRegRM(1, gregEnc, 4);
1943          *p++ = 0x24;
1944          *p++ = toUChar(am->Xam.IR.imm & 0xFF);
1945          return p;
1946       }
1947       ppX86AMode(am);
1948       vpanic("doAMode_M: can't emit amode IR");
1949       /*NOTREACHED*/
1950    }
1951    if (am->tag == Xam_IRRS) {
1952       if (fits8bits(am->Xam.IRRS.imm)
1953           && ! sameHReg(am->Xam.IRRS.index, hregX86_ESP())) {
1954          *p++ = mkModRegRM(1, gregEnc, 4);
1955          *p++ = mkSIB(am->Xam.IRRS.shift, iregEnc(am->Xam.IRRS.index),
1956                                           iregEnc(am->Xam.IRRS.base));
1957          *p++ = toUChar(am->Xam.IRRS.imm & 0xFF);
1958          return p;
1959       }
1960       if (! sameHReg(am->Xam.IRRS.index, hregX86_ESP())) {
1961          *p++ = mkModRegRM(2, gregEnc, 4);
1962          *p++ = mkSIB(am->Xam.IRRS.shift, iregEnc(am->Xam.IRRS.index),
1963                                           iregEnc(am->Xam.IRRS.base));
1964          p = emit32(p, am->Xam.IRRS.imm);
1965          return p;
1966       }
1967       ppX86AMode(am);
1968       vpanic("doAMode_M: can't emit amode IRRS");
1969       /*NOTREACHED*/
1970    }
1971    vpanic("doAMode_M: unknown amode");
1972    /*NOTREACHED*/
1973 }
1974 
doAMode_M(UChar * p,HReg greg,X86AMode * am)1975 static UChar* doAMode_M ( UChar* p, HReg greg, X86AMode* am )
1976 {
1977    return doAMode_M__wrk(p, iregEnc(greg), am);
1978 }
1979 
doAMode_M_enc(UChar * p,UInt gregEnc,X86AMode * am)1980 static UChar* doAMode_M_enc ( UChar* p, UInt gregEnc, X86AMode* am )
1981 {
1982    vassert(gregEnc < 8);
1983    return doAMode_M__wrk(p, gregEnc, am);
1984 }
1985 
1986 
1987 /* Emit a mod-reg-rm byte when the rm bit denotes a reg. */
doAMode_R__wrk(UChar * p,UInt gregEnc,UInt eregEnc)1988 inline static UChar* doAMode_R__wrk ( UChar* p, UInt gregEnc, UInt eregEnc )
1989 {
1990    *p++ = mkModRegRM(3, gregEnc, eregEnc);
1991    return p;
1992 }
1993 
doAMode_R(UChar * p,HReg greg,HReg ereg)1994 static UChar* doAMode_R ( UChar* p, HReg greg, HReg ereg )
1995 {
1996    return doAMode_R__wrk(p, iregEnc(greg), iregEnc(ereg));
1997 }
1998 
doAMode_R_enc_reg(UChar * p,UInt gregEnc,HReg ereg)1999 static UChar* doAMode_R_enc_reg ( UChar* p, UInt gregEnc, HReg ereg )
2000 {
2001    vassert(gregEnc < 8);
2002    return doAMode_R__wrk(p, gregEnc, iregEnc(ereg));
2003 }
2004 
doAMode_R_enc_enc(UChar * p,UInt gregEnc,UInt eregEnc)2005 static UChar* doAMode_R_enc_enc ( UChar* p, UInt gregEnc, UInt eregEnc )
2006 {
2007    vassert( (gregEnc|eregEnc) < 8);
2008    return doAMode_R__wrk(p, gregEnc, eregEnc);
2009 }
2010 
2011 
2012 /* Emit ffree %st(7) */
do_ffree_st7(UChar * p)2013 static UChar* do_ffree_st7 ( UChar* p )
2014 {
2015    *p++ = 0xDD;
2016    *p++ = 0xC7;
2017    return p;
2018 }
2019 
2020 /* Emit fstp %st(i), 1 <= i <= 7 */
do_fstp_st(UChar * p,Int i)2021 static UChar* do_fstp_st ( UChar* p, Int i )
2022 {
2023    vassert(1 <= i && i <= 7);
2024    *p++ = 0xDD;
2025    *p++ = toUChar(0xD8+i);
2026    return p;
2027 }
2028 
2029 /* Emit fld %st(i), 0 <= i <= 6 */
do_fld_st(UChar * p,Int i)2030 static UChar* do_fld_st ( UChar* p, Int i )
2031 {
2032    vassert(0 <= i && i <= 6);
2033    *p++ = 0xD9;
2034    *p++ = toUChar(0xC0+i);
2035    return p;
2036 }
2037 
2038 /* Emit f<op> %st(0) */
do_fop1_st(UChar * p,X86FpOp op)2039 static UChar* do_fop1_st ( UChar* p, X86FpOp op )
2040 {
2041    switch (op) {
2042       case Xfp_NEG:    *p++ = 0xD9; *p++ = 0xE0; break;
2043       case Xfp_ABS:    *p++ = 0xD9; *p++ = 0xE1; break;
2044       case Xfp_SQRT:   *p++ = 0xD9; *p++ = 0xFA; break;
2045       case Xfp_ROUND:  *p++ = 0xD9; *p++ = 0xFC; break;
2046       case Xfp_SIN:    *p++ = 0xD9; *p++ = 0xFE; break;
2047       case Xfp_COS:    *p++ = 0xD9; *p++ = 0xFF; break;
2048       case Xfp_2XM1:   *p++ = 0xD9; *p++ = 0xF0; break;
2049       case Xfp_MOV:    break;
2050       case Xfp_TAN:
2051          /* fptan pushes 1.0 on the FP stack, except when the argument
2052             is out of range.  Hence we have to do the instruction,
2053             then inspect C2 to see if there is an out of range
2054             condition.  If there is, we skip the fincstp that is used
2055             by the in-range case to get rid of this extra 1.0
2056             value. */
2057          p = do_ffree_st7(p); /* since fptan sometimes pushes 1.0 */
2058          *p++ = 0xD9; *p++ = 0xF2; // fptan
2059          *p++ = 0x50;              // pushl %eax
2060          *p++ = 0xDF; *p++ = 0xE0; // fnstsw %ax
2061          *p++ = 0x66; *p++ = 0xA9;
2062          *p++ = 0x00; *p++ = 0x04; // testw $0x400,%ax
2063          *p++ = 0x75; *p++ = 0x02; // jnz after_fincstp
2064          *p++ = 0xD9; *p++ = 0xF7; // fincstp
2065          *p++ = 0x58;              // after_fincstp: popl %eax
2066          break;
2067       default:
2068          vpanic("do_fop1_st: unknown op");
2069    }
2070    return p;
2071 }
2072 
2073 /* Emit f<op> %st(i), 1 <= i <= 5 */
do_fop2_st(UChar * p,X86FpOp op,Int i)2074 static UChar* do_fop2_st ( UChar* p, X86FpOp op, Int i )
2075 {
2076    Int subopc;
2077    switch (op) {
2078       case Xfp_ADD: subopc = 0; break;
2079       case Xfp_SUB: subopc = 4; break;
2080       case Xfp_MUL: subopc = 1; break;
2081       case Xfp_DIV: subopc = 6; break;
2082       default: vpanic("do_fop2_st: unknown op");
2083    }
2084    *p++ = 0xD8;
2085    p    = doAMode_R_enc_enc(p, subopc, i);
2086    return p;
2087 }
2088 
2089 /* Push a 32-bit word on the stack.  The word depends on tags[3:0];
2090 each byte is either 0x00 or 0xFF depending on the corresponding bit in tags[].
2091 */
push_word_from_tags(UChar * p,UShort tags)2092 static UChar* push_word_from_tags ( UChar* p, UShort tags )
2093 {
2094    UInt w;
2095    vassert(0 == (tags & ~0xF));
2096    if (tags == 0) {
2097       /* pushl $0x00000000 */
2098       *p++ = 0x6A;
2099       *p++ = 0x00;
2100    }
2101    else
2102    /* pushl $0xFFFFFFFF */
2103    if (tags == 0xF) {
2104       *p++ = 0x6A;
2105       *p++ = 0xFF;
2106    } else {
2107       vassert(0); /* awaiting test case */
2108       w = 0;
2109       if (tags & 1) w |= 0x000000FF;
2110       if (tags & 2) w |= 0x0000FF00;
2111       if (tags & 4) w |= 0x00FF0000;
2112       if (tags & 8) w |= 0xFF000000;
2113       *p++ = 0x68;
2114       p = emit32(p, w);
2115    }
2116    return p;
2117 }
2118 
2119 /* Emit an instruction into buf and return the number of bytes used.
2120    Note that buf is not the insn's final place, and therefore it is
2121    imperative to emit position-independent code.  If the emitted
2122    instruction was a profiler inc, set *is_profInc to True, else
2123    leave it unchanged. */
2124 
emit_X86Instr(Bool * is_profInc,UChar * buf,Int nbuf,const X86Instr * i,Bool mode64,VexEndness endness_host,const void * disp_cp_chain_me_to_slowEP,const void * disp_cp_chain_me_to_fastEP,const void * disp_cp_xindir,const void * disp_cp_xassisted)2125 Int emit_X86Instr ( /*MB_MOD*/Bool* is_profInc,
2126                     UChar* buf, Int nbuf, const X86Instr* i,
2127                     Bool mode64, VexEndness endness_host,
2128                     const void* disp_cp_chain_me_to_slowEP,
2129                     const void* disp_cp_chain_me_to_fastEP,
2130                     const void* disp_cp_xindir,
2131                     const void* disp_cp_xassisted )
2132 {
2133    UInt irno, opc, opc_rr, subopc_imm, opc_imma, opc_cl, opc_imm, subopc;
2134 
2135    UInt   xtra;
2136    UChar* p = &buf[0];
2137    UChar* ptmp;
2138    vassert(nbuf >= 32);
2139    vassert(mode64 == False);
2140 
2141    /* vex_printf("asm  ");ppX86Instr(i, mode64); vex_printf("\n"); */
2142 
2143    switch (i->tag) {
2144 
2145    case Xin_Alu32R:
2146       /* Deal specially with MOV */
2147       if (i->Xin.Alu32R.op == Xalu_MOV) {
2148          switch (i->Xin.Alu32R.src->tag) {
2149             case Xrmi_Imm:
2150                *p++ = toUChar(0xB8 + iregEnc(i->Xin.Alu32R.dst));
2151                p = emit32(p, i->Xin.Alu32R.src->Xrmi.Imm.imm32);
2152                goto done;
2153             case Xrmi_Reg:
2154                *p++ = 0x89;
2155                p = doAMode_R(p, i->Xin.Alu32R.src->Xrmi.Reg.reg,
2156                                 i->Xin.Alu32R.dst);
2157                goto done;
2158             case Xrmi_Mem:
2159                *p++ = 0x8B;
2160                p = doAMode_M(p, i->Xin.Alu32R.dst,
2161                                 i->Xin.Alu32R.src->Xrmi.Mem.am);
2162                goto done;
2163             default:
2164                goto bad;
2165          }
2166       }
2167       /* MUL */
2168       if (i->Xin.Alu32R.op == Xalu_MUL) {
2169          switch (i->Xin.Alu32R.src->tag) {
2170             case Xrmi_Reg:
2171                *p++ = 0x0F;
2172                *p++ = 0xAF;
2173                p = doAMode_R(p, i->Xin.Alu32R.dst,
2174                                 i->Xin.Alu32R.src->Xrmi.Reg.reg);
2175                goto done;
2176             case Xrmi_Mem:
2177                *p++ = 0x0F;
2178                *p++ = 0xAF;
2179                p = doAMode_M(p, i->Xin.Alu32R.dst,
2180                                 i->Xin.Alu32R.src->Xrmi.Mem.am);
2181                goto done;
2182             case Xrmi_Imm:
2183                if (fits8bits(i->Xin.Alu32R.src->Xrmi.Imm.imm32)) {
2184                   *p++ = 0x6B;
2185                   p = doAMode_R(p, i->Xin.Alu32R.dst, i->Xin.Alu32R.dst);
2186                   *p++ = toUChar(0xFF & i->Xin.Alu32R.src->Xrmi.Imm.imm32);
2187                } else {
2188                   *p++ = 0x69;
2189                   p = doAMode_R(p, i->Xin.Alu32R.dst, i->Xin.Alu32R.dst);
2190                   p = emit32(p, i->Xin.Alu32R.src->Xrmi.Imm.imm32);
2191                }
2192                goto done;
2193             default:
2194                goto bad;
2195          }
2196       }
2197       /* ADD/SUB/ADC/SBB/AND/OR/XOR/CMP */
2198       opc = opc_rr = subopc_imm = opc_imma = 0;
2199       switch (i->Xin.Alu32R.op) {
2200          case Xalu_ADC: opc = 0x13; opc_rr = 0x11;
2201                         subopc_imm = 2; opc_imma = 0x15; break;
2202          case Xalu_ADD: opc = 0x03; opc_rr = 0x01;
2203                         subopc_imm = 0; opc_imma = 0x05; break;
2204          case Xalu_SUB: opc = 0x2B; opc_rr = 0x29;
2205                         subopc_imm = 5; opc_imma = 0x2D; break;
2206          case Xalu_SBB: opc = 0x1B; opc_rr = 0x19;
2207                         subopc_imm = 3; opc_imma = 0x1D; break;
2208          case Xalu_AND: opc = 0x23; opc_rr = 0x21;
2209                         subopc_imm = 4; opc_imma = 0x25; break;
2210          case Xalu_XOR: opc = 0x33; opc_rr = 0x31;
2211                         subopc_imm = 6; opc_imma = 0x35; break;
2212          case Xalu_OR:  opc = 0x0B; opc_rr = 0x09;
2213                         subopc_imm = 1; opc_imma = 0x0D; break;
2214          case Xalu_CMP: opc = 0x3B; opc_rr = 0x39;
2215                         subopc_imm = 7; opc_imma = 0x3D; break;
2216          default: goto bad;
2217       }
2218       switch (i->Xin.Alu32R.src->tag) {
2219          case Xrmi_Imm:
2220             if (sameHReg(i->Xin.Alu32R.dst, hregX86_EAX())
2221                 && !fits8bits(i->Xin.Alu32R.src->Xrmi.Imm.imm32)) {
2222                *p++ = toUChar(opc_imma);
2223                p = emit32(p, i->Xin.Alu32R.src->Xrmi.Imm.imm32);
2224             } else
2225             if (fits8bits(i->Xin.Alu32R.src->Xrmi.Imm.imm32)) {
2226                *p++ = 0x83;
2227                p    = doAMode_R_enc_reg(p, subopc_imm, i->Xin.Alu32R.dst);
2228                *p++ = toUChar(0xFF & i->Xin.Alu32R.src->Xrmi.Imm.imm32);
2229             } else {
2230                *p++ = 0x81;
2231                p    = doAMode_R_enc_reg(p, subopc_imm, i->Xin.Alu32R.dst);
2232                p    = emit32(p, i->Xin.Alu32R.src->Xrmi.Imm.imm32);
2233             }
2234             goto done;
2235          case Xrmi_Reg:
2236             *p++ = toUChar(opc_rr);
2237             p = doAMode_R(p, i->Xin.Alu32R.src->Xrmi.Reg.reg,
2238                              i->Xin.Alu32R.dst);
2239             goto done;
2240          case Xrmi_Mem:
2241             *p++ = toUChar(opc);
2242             p = doAMode_M(p, i->Xin.Alu32R.dst,
2243                              i->Xin.Alu32R.src->Xrmi.Mem.am);
2244             goto done;
2245          default:
2246             goto bad;
2247       }
2248       break;
2249 
2250    case Xin_Alu32M:
2251       /* Deal specially with MOV */
2252       if (i->Xin.Alu32M.op == Xalu_MOV) {
2253          switch (i->Xin.Alu32M.src->tag) {
2254             case Xri_Reg:
2255                *p++ = 0x89;
2256                p = doAMode_M(p, i->Xin.Alu32M.src->Xri.Reg.reg,
2257                                 i->Xin.Alu32M.dst);
2258                goto done;
2259             case Xri_Imm:
2260                *p++ = 0xC7;
2261                p = doAMode_M_enc(p, 0, i->Xin.Alu32M.dst);
2262                p = emit32(p, i->Xin.Alu32M.src->Xri.Imm.imm32);
2263                goto done;
2264             default:
2265                goto bad;
2266          }
2267       }
2268       /* ADD/SUB/ADC/SBB/AND/OR/XOR/CMP.  MUL is not
2269          allowed here. */
2270       opc = subopc_imm = opc_imma = 0;
2271       switch (i->Xin.Alu32M.op) {
2272          case Xalu_ADD: opc = 0x01; subopc_imm = 0; break;
2273          case Xalu_SUB: opc = 0x29; subopc_imm = 5; break;
2274          case Xalu_CMP: opc = 0x39; subopc_imm = 7; break;
2275          default: goto bad;
2276       }
2277       switch (i->Xin.Alu32M.src->tag) {
2278          case Xri_Reg:
2279             *p++ = toUChar(opc);
2280             p = doAMode_M(p, i->Xin.Alu32M.src->Xri.Reg.reg,
2281                              i->Xin.Alu32M.dst);
2282             goto done;
2283          case Xri_Imm:
2284             if (fits8bits(i->Xin.Alu32M.src->Xri.Imm.imm32)) {
2285                *p++ = 0x83;
2286                p    = doAMode_M_enc(p, subopc_imm, i->Xin.Alu32M.dst);
2287                *p++ = toUChar(0xFF & i->Xin.Alu32M.src->Xri.Imm.imm32);
2288                goto done;
2289             } else {
2290                *p++ = 0x81;
2291                p    = doAMode_M_enc(p, subopc_imm, i->Xin.Alu32M.dst);
2292                p    = emit32(p, i->Xin.Alu32M.src->Xri.Imm.imm32);
2293                goto done;
2294             }
2295          default:
2296             goto bad;
2297       }
2298       break;
2299 
2300    case Xin_Sh32:
2301       opc_cl = opc_imm = subopc = 0;
2302       switch (i->Xin.Sh32.op) {
2303          case Xsh_SHR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 5; break;
2304          case Xsh_SAR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 7; break;
2305          case Xsh_SHL: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 4; break;
2306          default: goto bad;
2307       }
2308       if (i->Xin.Sh32.src == 0) {
2309          *p++ = toUChar(opc_cl);
2310          p = doAMode_R_enc_reg(p, subopc, i->Xin.Sh32.dst);
2311       } else {
2312          *p++ = toUChar(opc_imm);
2313          p = doAMode_R_enc_reg(p, subopc, i->Xin.Sh32.dst);
2314          *p++ = (UChar)(i->Xin.Sh32.src);
2315       }
2316       goto done;
2317 
2318    case Xin_Test32:
2319       if (i->Xin.Test32.dst->tag == Xrm_Reg) {
2320          /* testl $imm32, %reg */
2321          *p++ = 0xF7;
2322          p = doAMode_R_enc_reg(p, 0, i->Xin.Test32.dst->Xrm.Reg.reg);
2323          p = emit32(p, i->Xin.Test32.imm32);
2324          goto done;
2325       } else {
2326          /* testl $imm32, amode */
2327          *p++ = 0xF7;
2328          p = doAMode_M_enc(p, 0, i->Xin.Test32.dst->Xrm.Mem.am);
2329          p = emit32(p, i->Xin.Test32.imm32);
2330          goto done;
2331       }
2332 
2333    case Xin_Unary32:
2334       if (i->Xin.Unary32.op == Xun_NOT) {
2335          *p++ = 0xF7;
2336          p = doAMode_R_enc_reg(p, 2, i->Xin.Unary32.dst);
2337          goto done;
2338       }
2339       if (i->Xin.Unary32.op == Xun_NEG) {
2340          *p++ = 0xF7;
2341          p = doAMode_R_enc_reg(p, 3, i->Xin.Unary32.dst);
2342          goto done;
2343       }
2344       break;
2345 
2346    case Xin_Lea32:
2347       *p++ = 0x8D;
2348       p = doAMode_M(p, i->Xin.Lea32.dst, i->Xin.Lea32.am);
2349       goto done;
2350 
2351    case Xin_MulL:
2352       subopc = i->Xin.MulL.syned ? 5 : 4;
2353       *p++ = 0xF7;
2354       switch (i->Xin.MulL.src->tag)  {
2355          case Xrm_Mem:
2356             p = doAMode_M_enc(p, subopc, i->Xin.MulL.src->Xrm.Mem.am);
2357             goto done;
2358          case Xrm_Reg:
2359             p = doAMode_R_enc_reg(p, subopc, i->Xin.MulL.src->Xrm.Reg.reg);
2360             goto done;
2361          default:
2362             goto bad;
2363       }
2364       break;
2365 
2366    case Xin_Div:
2367       subopc = i->Xin.Div.syned ? 7 : 6;
2368       *p++ = 0xF7;
2369       switch (i->Xin.Div.src->tag)  {
2370          case Xrm_Mem:
2371             p = doAMode_M_enc(p, subopc, i->Xin.Div.src->Xrm.Mem.am);
2372             goto done;
2373          case Xrm_Reg:
2374             p = doAMode_R_enc_reg(p, subopc, i->Xin.Div.src->Xrm.Reg.reg);
2375             goto done;
2376          default:
2377             goto bad;
2378       }
2379       break;
2380 
2381    case Xin_Sh3232:
2382       vassert(i->Xin.Sh3232.op == Xsh_SHL || i->Xin.Sh3232.op == Xsh_SHR);
2383       if (i->Xin.Sh3232.amt == 0) {
2384          /* shldl/shrdl by %cl */
2385          *p++ = 0x0F;
2386          if (i->Xin.Sh3232.op == Xsh_SHL) {
2387             *p++ = 0xA5;
2388          } else {
2389             *p++ = 0xAD;
2390          }
2391          p = doAMode_R(p, i->Xin.Sh3232.src, i->Xin.Sh3232.dst);
2392          goto done;
2393       }
2394       break;
2395 
2396    case Xin_Push:
2397       switch (i->Xin.Push.src->tag) {
2398          case Xrmi_Mem:
2399             *p++ = 0xFF;
2400             p = doAMode_M_enc(p, 6, i->Xin.Push.src->Xrmi.Mem.am);
2401             goto done;
2402          case Xrmi_Imm:
2403             *p++ = 0x68;
2404             p = emit32(p, i->Xin.Push.src->Xrmi.Imm.imm32);
2405             goto done;
2406          case Xrmi_Reg:
2407             *p++ = toUChar(0x50 + iregEnc(i->Xin.Push.src->Xrmi.Reg.reg));
2408             goto done;
2409         default:
2410             goto bad;
2411       }
2412 
2413    case Xin_Call:
2414       if (i->Xin.Call.cond != Xcc_ALWAYS
2415           && i->Xin.Call.rloc.pri != RLPri_None) {
2416          /* The call might not happen (it isn't unconditional) and it
2417             returns a result.  In this case we will need to generate a
2418             control flow diamond to put 0x555..555 in the return
2419             register(s) in the case where the call doesn't happen.  If
2420             this ever becomes necessary, maybe copy code from the ARM
2421             equivalent.  Until that day, just give up. */
2422          goto bad;
2423       }
2424       /* See detailed comment for Xin_Call in getRegUsage_X86Instr above
2425          for explanation of this. */
2426       switch (i->Xin.Call.regparms) {
2427          case 0: irno = iregEnc(hregX86_EAX()); break;
2428          case 1: irno = iregEnc(hregX86_EDX()); break;
2429          case 2: irno = iregEnc(hregX86_ECX()); break;
2430          case 3: irno = iregEnc(hregX86_EDI()); break;
2431          default: vpanic(" emit_X86Instr:call:regparms");
2432       }
2433       /* jump over the following two insns if the condition does not
2434          hold */
2435       if (i->Xin.Call.cond != Xcc_ALWAYS) {
2436          *p++ = toUChar(0x70 + (0xF & (i->Xin.Call.cond ^ 1)));
2437          *p++ = 0x07; /* 7 bytes in the next two insns */
2438       }
2439       /* movl $target, %tmp */
2440       *p++ = toUChar(0xB8 + irno);
2441       p = emit32(p, i->Xin.Call.target);
2442       /* call *%tmp */
2443       *p++ = 0xFF;
2444       *p++ = toUChar(0xD0 + irno);
2445       goto done;
2446 
2447    case Xin_XDirect: {
2448       /* NB: what goes on here has to be very closely coordinated with the
2449          chainXDirect_X86 and unchainXDirect_X86 below. */
2450       /* We're generating chain-me requests here, so we need to be
2451          sure this is actually allowed -- no-redir translations can't
2452          use chain-me's.  Hence: */
2453       vassert(disp_cp_chain_me_to_slowEP != NULL);
2454       vassert(disp_cp_chain_me_to_fastEP != NULL);
2455 
2456       /* Use ptmp for backpatching conditional jumps. */
2457       ptmp = NULL;
2458 
2459       /* First off, if this is conditional, create a conditional
2460          jump over the rest of it. */
2461       if (i->Xin.XDirect.cond != Xcc_ALWAYS) {
2462          /* jmp fwds if !condition */
2463          *p++ = toUChar(0x70 + (0xF & (i->Xin.XDirect.cond ^ 1)));
2464          ptmp = p; /* fill in this bit later */
2465          *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
2466       }
2467 
2468       /* Update the guest EIP. */
2469       /* movl $dstGA, amEIP */
2470       *p++ = 0xC7;
2471       p    = doAMode_M_enc(p, 0, i->Xin.XDirect.amEIP);
2472       p    = emit32(p, i->Xin.XDirect.dstGA);
2473 
2474       /* --- FIRST PATCHABLE BYTE follows --- */
2475       /* VG_(disp_cp_chain_me_to_{slowEP,fastEP}) (where we're calling
2476          to) backs up the return address, so as to find the address of
2477          the first patchable byte.  So: don't change the length of the
2478          two instructions below. */
2479       /* movl $disp_cp_chain_me_to_{slow,fast}EP,%edx; */
2480       *p++ = 0xBA;
2481       const void* disp_cp_chain_me
2482                = i->Xin.XDirect.toFastEP ? disp_cp_chain_me_to_fastEP
2483                                          : disp_cp_chain_me_to_slowEP;
2484       p = emit32(p, (UInt)(Addr)disp_cp_chain_me);
2485       /* call *%edx */
2486       *p++ = 0xFF;
2487       *p++ = 0xD2;
2488       /* --- END of PATCHABLE BYTES --- */
2489 
2490       /* Fix up the conditional jump, if there was one. */
2491       if (i->Xin.XDirect.cond != Xcc_ALWAYS) {
2492          Int delta = p - ptmp;
2493          vassert(delta > 0 && delta < 40);
2494          *ptmp = toUChar(delta-1);
2495       }
2496       goto done;
2497    }
2498 
2499    case Xin_XIndir: {
2500       /* We're generating transfers that could lead indirectly to a
2501          chain-me, so we need to be sure this is actually allowed --
2502          no-redir translations are not allowed to reach normal
2503          translations without going through the scheduler.  That means
2504          no XDirects or XIndirs out from no-redir translations.
2505          Hence: */
2506       vassert(disp_cp_xindir != NULL);
2507 
2508       /* Use ptmp for backpatching conditional jumps. */
2509       ptmp = NULL;
2510 
2511       /* First off, if this is conditional, create a conditional
2512          jump over the rest of it. */
2513       if (i->Xin.XIndir.cond != Xcc_ALWAYS) {
2514          /* jmp fwds if !condition */
2515          *p++ = toUChar(0x70 + (0xF & (i->Xin.XIndir.cond ^ 1)));
2516          ptmp = p; /* fill in this bit later */
2517          *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
2518       }
2519 
2520       /* movl dstGA(a reg), amEIP -- copied from Alu32M MOV case */
2521       *p++ = 0x89;
2522       p = doAMode_M(p, i->Xin.XIndir.dstGA, i->Xin.XIndir.amEIP);
2523 
2524       /* movl $disp_indir, %edx */
2525       *p++ = 0xBA;
2526       p = emit32(p, (UInt)(Addr)disp_cp_xindir);
2527       /* jmp *%edx */
2528       *p++ = 0xFF;
2529       *p++ = 0xE2;
2530 
2531       /* Fix up the conditional jump, if there was one. */
2532       if (i->Xin.XIndir.cond != Xcc_ALWAYS) {
2533          Int delta = p - ptmp;
2534          vassert(delta > 0 && delta < 40);
2535          *ptmp = toUChar(delta-1);
2536       }
2537       goto done;
2538    }
2539 
2540    case Xin_XAssisted: {
2541       /* Use ptmp for backpatching conditional jumps. */
2542       ptmp = NULL;
2543 
2544       /* First off, if this is conditional, create a conditional
2545          jump over the rest of it. */
2546       if (i->Xin.XAssisted.cond != Xcc_ALWAYS) {
2547          /* jmp fwds if !condition */
2548          *p++ = toUChar(0x70 + (0xF & (i->Xin.XAssisted.cond ^ 1)));
2549          ptmp = p; /* fill in this bit later */
2550          *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
2551       }
2552 
2553       /* movl dstGA(a reg), amEIP -- copied from Alu32M MOV case */
2554       *p++ = 0x89;
2555       p = doAMode_M(p, i->Xin.XIndir.dstGA, i->Xin.XIndir.amEIP);
2556       /* movl $magic_number, %ebp. */
2557       UInt trcval = 0;
2558       switch (i->Xin.XAssisted.jk) {
2559          case Ijk_ClientReq:    trcval = VEX_TRC_JMP_CLIENTREQ;    break;
2560          case Ijk_Sys_syscall:  trcval = VEX_TRC_JMP_SYS_SYSCALL;  break;
2561          case Ijk_Sys_int128:   trcval = VEX_TRC_JMP_SYS_INT128;   break;
2562          case Ijk_Sys_int129:   trcval = VEX_TRC_JMP_SYS_INT129;   break;
2563          case Ijk_Sys_int130:   trcval = VEX_TRC_JMP_SYS_INT130;   break;
2564          case Ijk_Sys_int145:   trcval = VEX_TRC_JMP_SYS_INT145;   break;
2565          case Ijk_Sys_int210:   trcval = VEX_TRC_JMP_SYS_INT210;   break;
2566          case Ijk_Sys_sysenter: trcval = VEX_TRC_JMP_SYS_SYSENTER; break;
2567          case Ijk_Yield:        trcval = VEX_TRC_JMP_YIELD;        break;
2568          case Ijk_EmWarn:       trcval = VEX_TRC_JMP_EMWARN;       break;
2569          case Ijk_MapFail:      trcval = VEX_TRC_JMP_MAPFAIL;      break;
2570          case Ijk_NoDecode:     trcval = VEX_TRC_JMP_NODECODE;     break;
2571          case Ijk_InvalICache:  trcval = VEX_TRC_JMP_INVALICACHE;  break;
2572          case Ijk_NoRedir:      trcval = VEX_TRC_JMP_NOREDIR;      break;
2573          case Ijk_SigTRAP:      trcval = VEX_TRC_JMP_SIGTRAP;      break;
2574          case Ijk_SigSEGV:      trcval = VEX_TRC_JMP_SIGSEGV;      break;
2575          case Ijk_Boring:       trcval = VEX_TRC_JMP_BORING;       break;
2576          /* We don't expect to see the following being assisted. */
2577          case Ijk_Ret:
2578          case Ijk_Call:
2579          /* fallthrough */
2580          default:
2581             ppIRJumpKind(i->Xin.XAssisted.jk);
2582             vpanic("emit_X86Instr.Xin_XAssisted: unexpected jump kind");
2583       }
2584       vassert(trcval != 0);
2585       *p++ = 0xBD;
2586       p = emit32(p, trcval);
2587 
2588       /* movl $disp_indir, %edx */
2589       *p++ = 0xBA;
2590       p = emit32(p, (UInt)(Addr)disp_cp_xassisted);
2591       /* jmp *%edx */
2592       *p++ = 0xFF;
2593       *p++ = 0xE2;
2594 
2595       /* Fix up the conditional jump, if there was one. */
2596       if (i->Xin.XAssisted.cond != Xcc_ALWAYS) {
2597          Int delta = p - ptmp;
2598          vassert(delta > 0 && delta < 40);
2599          *ptmp = toUChar(delta-1);
2600       }
2601       goto done;
2602    }
2603 
2604    case Xin_CMov32:
2605       vassert(i->Xin.CMov32.cond != Xcc_ALWAYS);
2606 
2607       /* This generates cmov, which is illegal on P54/P55. */
2608       /*
2609       *p++ = 0x0F;
2610       *p++ = toUChar(0x40 + (0xF & i->Xin.CMov32.cond));
2611       if (i->Xin.CMov32.src->tag == Xrm_Reg) {
2612          p = doAMode_R(p, i->Xin.CMov32.dst, i->Xin.CMov32.src->Xrm.Reg.reg);
2613          goto done;
2614       }
2615       if (i->Xin.CMov32.src->tag == Xrm_Mem) {
2616          p = doAMode_M(p, i->Xin.CMov32.dst, i->Xin.CMov32.src->Xrm.Mem.am);
2617          goto done;
2618       }
2619       */
2620 
2621       /* Alternative version which works on any x86 variant. */
2622       /* jmp fwds if !condition */
2623       *p++ = toUChar(0x70 + (i->Xin.CMov32.cond ^ 1));
2624       *p++ = 0; /* # of bytes in the next bit, which we don't know yet */
2625       ptmp = p;
2626 
2627       switch (i->Xin.CMov32.src->tag) {
2628          case Xrm_Reg:
2629             /* Big sigh.  This is movl E -> G ... */
2630             *p++ = 0x89;
2631             p = doAMode_R(p, i->Xin.CMov32.src->Xrm.Reg.reg,
2632                              i->Xin.CMov32.dst);
2633 
2634             break;
2635          case Xrm_Mem:
2636             /* ... whereas this is movl G -> E.  That's why the args
2637                to doAMode_R appear to be the wrong way round in the
2638                Xrm_Reg case. */
2639             *p++ = 0x8B;
2640             p = doAMode_M(p, i->Xin.CMov32.dst,
2641                              i->Xin.CMov32.src->Xrm.Mem.am);
2642             break;
2643          default:
2644             goto bad;
2645       }
2646       /* Fill in the jump offset. */
2647       *(ptmp-1) = toUChar(p - ptmp);
2648       goto done;
2649 
2650       break;
2651 
2652    case Xin_LoadEX:
2653       if (i->Xin.LoadEX.szSmall == 1 && !i->Xin.LoadEX.syned) {
2654          /* movzbl */
2655          *p++ = 0x0F;
2656          *p++ = 0xB6;
2657          p = doAMode_M(p, i->Xin.LoadEX.dst, i->Xin.LoadEX.src);
2658          goto done;
2659       }
2660       if (i->Xin.LoadEX.szSmall == 2 && !i->Xin.LoadEX.syned) {
2661          /* movzwl */
2662          *p++ = 0x0F;
2663          *p++ = 0xB7;
2664          p = doAMode_M(p, i->Xin.LoadEX.dst, i->Xin.LoadEX.src);
2665          goto done;
2666       }
2667       if (i->Xin.LoadEX.szSmall == 1 && i->Xin.LoadEX.syned) {
2668          /* movsbl */
2669          *p++ = 0x0F;
2670          *p++ = 0xBE;
2671          p = doAMode_M(p, i->Xin.LoadEX.dst, i->Xin.LoadEX.src);
2672          goto done;
2673       }
2674       break;
2675 
2676    case Xin_Set32:
2677       /* Make the destination register be 1 or 0, depending on whether
2678          the relevant condition holds.  We have to dodge and weave
2679          when the destination is %esi or %edi as we cannot directly
2680          emit the native 'setb %reg' for those.  Further complication:
2681          the top 24 bits of the destination should be forced to zero,
2682          but doing 'xor %r,%r' kills the flag(s) we are about to read.
2683          Sigh.  So start off my moving $0 into the dest. */
2684 
2685       /* Do we need to swap in %eax? */
2686       if (iregEnc(i->Xin.Set32.dst) >= 4) {
2687          /* xchg %eax, %dst */
2688          *p++ = toUChar(0x90 + iregEnc(i->Xin.Set32.dst));
2689          /* movl $0, %eax */
2690          *p++ =toUChar(0xB8 + iregEnc(hregX86_EAX()));
2691          p = emit32(p, 0);
2692          /* setb lo8(%eax) */
2693          *p++ = 0x0F;
2694          *p++ = toUChar(0x90 + (0xF & i->Xin.Set32.cond));
2695          p = doAMode_R_enc_reg(p, 0, hregX86_EAX());
2696          /* xchg %eax, %dst */
2697          *p++ = toUChar(0x90 + iregEnc(i->Xin.Set32.dst));
2698       } else {
2699          /* movl $0, %dst */
2700          *p++ = toUChar(0xB8 + iregEnc(i->Xin.Set32.dst));
2701          p = emit32(p, 0);
2702          /* setb lo8(%dst) */
2703          *p++ = 0x0F;
2704          *p++ = toUChar(0x90 + (0xF & i->Xin.Set32.cond));
2705          p = doAMode_R_enc_reg(p, 0, i->Xin.Set32.dst);
2706       }
2707       goto done;
2708 
2709    case Xin_Bsfr32:
2710       *p++ = 0x0F;
2711       if (i->Xin.Bsfr32.isFwds) {
2712          *p++ = 0xBC;
2713       } else {
2714          *p++ = 0xBD;
2715       }
2716       p = doAMode_R(p, i->Xin.Bsfr32.dst, i->Xin.Bsfr32.src);
2717       goto done;
2718 
2719    case Xin_MFence:
2720       /* see comment in hdefs.h re this insn */
2721       if (0) vex_printf("EMIT FENCE\n");
2722       if (i->Xin.MFence.hwcaps & (VEX_HWCAPS_X86_SSE3
2723                                   |VEX_HWCAPS_X86_SSE2)) {
2724          /* mfence */
2725          *p++ = 0x0F; *p++ = 0xAE; *p++ = 0xF0;
2726          goto done;
2727       }
2728       if (i->Xin.MFence.hwcaps & VEX_HWCAPS_X86_MMXEXT) {
2729          /* sfence */
2730          *p++ = 0x0F; *p++ = 0xAE; *p++ = 0xF8;
2731          /* lock addl $0,0(%esp) */
2732          *p++ = 0xF0; *p++ = 0x83; *p++ = 0x44;
2733          *p++ = 0x24; *p++ = 0x00; *p++ = 0x00;
2734          goto done;
2735       }
2736       if (i->Xin.MFence.hwcaps == 0/*baseline, no SSE*/) {
2737          /* lock addl $0,0(%esp) */
2738          *p++ = 0xF0; *p++ = 0x83; *p++ = 0x44;
2739          *p++ = 0x24; *p++ = 0x00; *p++ = 0x00;
2740          goto done;
2741       }
2742       vpanic("emit_X86Instr:mfence:hwcaps");
2743       /*NOTREACHED*/
2744       break;
2745 
2746    case Xin_ACAS:
2747       /* lock */
2748       *p++ = 0xF0;
2749       /* cmpxchg{b,w,l} %ebx,mem.  Expected-value in %eax, new value
2750          in %ebx.  The new-value register is hardwired to be %ebx
2751          since letting it be any integer register gives the problem
2752          that %sil and %dil are unaddressible on x86 and hence we
2753          would have to resort to the same kind of trickery as with
2754          byte-sized Xin.Store, just below.  Given that this isn't
2755          performance critical, it is simpler just to force the
2756          register operand to %ebx (could equally be %ecx or %edx).
2757          (Although %ebx is more consistent with cmpxchg8b.) */
2758       if (i->Xin.ACAS.sz == 2) *p++ = 0x66;
2759       *p++ = 0x0F;
2760       if (i->Xin.ACAS.sz == 1) *p++ = 0xB0; else *p++ = 0xB1;
2761       p = doAMode_M(p, hregX86_EBX(), i->Xin.ACAS.addr);
2762       goto done;
2763 
2764    case Xin_DACAS:
2765       /* lock */
2766       *p++ = 0xF0;
2767       /* cmpxchg8b m64.  Expected-value in %edx:%eax, new value
2768          in %ecx:%ebx.  All 4 regs are hardwired in the ISA, so
2769          aren't encoded in the insn. */
2770       *p++ = 0x0F;
2771       *p++ = 0xC7;
2772       p = doAMode_M_enc(p, 1, i->Xin.DACAS.addr);
2773       goto done;
2774 
2775    case Xin_Store:
2776       if (i->Xin.Store.sz == 2) {
2777          /* This case, at least, is simple, given that we can
2778             reference the low 16 bits of any integer register. */
2779          *p++ = 0x66;
2780          *p++ = 0x89;
2781          p = doAMode_M(p, i->Xin.Store.src, i->Xin.Store.dst);
2782          goto done;
2783       }
2784 
2785       if (i->Xin.Store.sz == 1) {
2786          /* We have to do complex dodging and weaving if src is not
2787             the low 8 bits of %eax/%ebx/%ecx/%edx. */
2788          if (iregEnc(i->Xin.Store.src) < 4) {
2789             /* we're OK, can do it directly */
2790             *p++ = 0x88;
2791             p = doAMode_M(p, i->Xin.Store.src, i->Xin.Store.dst);
2792            goto done;
2793          } else {
2794             /* Bleh.  This means the source is %edi or %esi.  Since
2795                the address mode can only mention three registers, at
2796                least one of %eax/%ebx/%ecx/%edx must be available to
2797                temporarily swap the source into, so the store can
2798                happen.  So we have to look at the regs mentioned
2799                in the amode. */
2800             HReg swap = INVALID_HREG;
2801             HReg  eax = hregX86_EAX(), ebx = hregX86_EBX(),
2802                   ecx = hregX86_ECX(), edx = hregX86_EDX();
2803             HRegUsage u;
2804             initHRegUsage(&u);
2805             addRegUsage_X86AMode(&u, i->Xin.Store.dst);
2806             /**/ if (! HRegUsage__contains(&u, eax)) { swap = eax; }
2807             else if (! HRegUsage__contains(&u, ebx)) { swap = ebx; }
2808             else if (! HRegUsage__contains(&u, ecx)) { swap = ecx; }
2809             else if (! HRegUsage__contains(&u, edx)) { swap = edx; }
2810             vassert(! hregIsInvalid(swap));
2811             /* xchgl %source, %swap. Could do better if swap is %eax. */
2812             *p++ = 0x87;
2813             p = doAMode_R(p, i->Xin.Store.src, swap);
2814             /* movb lo8{%swap}, (dst) */
2815             *p++ = 0x88;
2816             p = doAMode_M(p, swap, i->Xin.Store.dst);
2817             /* xchgl %source, %swap. Could do better if swap is %eax. */
2818             *p++ = 0x87;
2819             p = doAMode_R(p, i->Xin.Store.src, swap);
2820             goto done;
2821          }
2822       } /* if (i->Xin.Store.sz == 1) */
2823       break;
2824 
2825    case Xin_FpUnary:
2826       /* gop %src, %dst
2827          --> ffree %st7 ; fld %st(src) ; fop %st(0) ; fstp %st(1+dst)
2828       */
2829       p = do_ffree_st7(p);
2830       p = do_fld_st(p, 0+fregEnc(i->Xin.FpUnary.src));
2831       p = do_fop1_st(p, i->Xin.FpUnary.op);
2832       p = do_fstp_st(p, 1+fregEnc(i->Xin.FpUnary.dst));
2833       goto done;
2834 
2835    case Xin_FpBinary:
2836       if (i->Xin.FpBinary.op == Xfp_YL2X
2837           || i->Xin.FpBinary.op == Xfp_YL2XP1) {
2838          /* Have to do this specially. */
2839          /* ffree %st7 ; fld %st(srcL) ;
2840             ffree %st7 ; fld %st(srcR+1) ; fyl2x{p1} ; fstp(1+dst) */
2841          p = do_ffree_st7(p);
2842          p = do_fld_st(p, 0+fregEnc(i->Xin.FpBinary.srcL));
2843          p = do_ffree_st7(p);
2844          p = do_fld_st(p, 1+fregEnc(i->Xin.FpBinary.srcR));
2845          *p++ = 0xD9;
2846          *p++ = toUChar(i->Xin.FpBinary.op==Xfp_YL2X ? 0xF1 : 0xF9);
2847          p = do_fstp_st(p, 1+fregEnc(i->Xin.FpBinary.dst));
2848          goto done;
2849       }
2850       if (i->Xin.FpBinary.op == Xfp_ATAN) {
2851          /* Have to do this specially. */
2852          /* ffree %st7 ; fld %st(srcL) ;
2853             ffree %st7 ; fld %st(srcR+1) ; fpatan ; fstp(1+dst) */
2854          p = do_ffree_st7(p);
2855          p = do_fld_st(p, 0+fregEnc(i->Xin.FpBinary.srcL));
2856          p = do_ffree_st7(p);
2857          p = do_fld_st(p, 1+fregEnc(i->Xin.FpBinary.srcR));
2858          *p++ = 0xD9; *p++ = 0xF3;
2859          p = do_fstp_st(p, 1+fregEnc(i->Xin.FpBinary.dst));
2860          goto done;
2861       }
2862       if (i->Xin.FpBinary.op == Xfp_PREM
2863           || i->Xin.FpBinary.op == Xfp_PREM1
2864           || i->Xin.FpBinary.op == Xfp_SCALE) {
2865          /* Have to do this specially. */
2866          /* ffree %st7 ; fld %st(srcR) ;
2867             ffree %st7 ; fld %st(srcL+1) ; fprem/fprem1/fscale ; fstp(2+dst) ;
2868             fincstp ; ffree %st7 */
2869          p = do_ffree_st7(p);
2870          p = do_fld_st(p, 0+fregEnc(i->Xin.FpBinary.srcR));
2871          p = do_ffree_st7(p);
2872          p = do_fld_st(p, 1+fregEnc(i->Xin.FpBinary.srcL));
2873          *p++ = 0xD9;
2874          switch (i->Xin.FpBinary.op) {
2875             case Xfp_PREM: *p++ = 0xF8; break;
2876             case Xfp_PREM1: *p++ = 0xF5; break;
2877             case Xfp_SCALE: *p++ =  0xFD; break;
2878             default: vpanic("emitX86Instr(FpBinary,PREM/PREM1/SCALE)");
2879          }
2880          p = do_fstp_st(p, 2+fregEnc(i->Xin.FpBinary.dst));
2881          *p++ = 0xD9; *p++ = 0xF7;
2882          p = do_ffree_st7(p);
2883          goto done;
2884       }
2885       /* General case */
2886       /* gop %srcL, %srcR, %dst
2887          --> ffree %st7 ; fld %st(srcL) ; fop %st(1+srcR) ; fstp %st(1+dst)
2888       */
2889       p = do_ffree_st7(p);
2890       p = do_fld_st(p, 0+fregEnc(i->Xin.FpBinary.srcL));
2891       p = do_fop2_st(p, i->Xin.FpBinary.op,
2892                         1+fregEnc(i->Xin.FpBinary.srcR));
2893       p = do_fstp_st(p, 1+fregEnc(i->Xin.FpBinary.dst));
2894       goto done;
2895 
2896    case Xin_FpLdSt:
2897       if (i->Xin.FpLdSt.isLoad) {
2898          /* Load from memory into %fakeN.
2899             --> ffree %st(7) ; fld{s/l/t} amode ; fstp st(N+1)
2900          */
2901          p = do_ffree_st7(p);
2902          switch (i->Xin.FpLdSt.sz) {
2903             case 4:
2904                *p++ = 0xD9;
2905                p = doAMode_M_enc(p, 0/*subopcode*/, i->Xin.FpLdSt.addr);
2906                break;
2907             case 8:
2908                *p++ = 0xDD;
2909                p = doAMode_M_enc(p, 0/*subopcode*/, i->Xin.FpLdSt.addr);
2910                break;
2911             case 10:
2912                *p++ = 0xDB;
2913                p = doAMode_M_enc(p, 5/*subopcode*/, i->Xin.FpLdSt.addr);
2914                break;
2915             default:
2916                vpanic("emitX86Instr(FpLdSt,load)");
2917          }
2918          p = do_fstp_st(p, 1+fregEnc(i->Xin.FpLdSt.reg));
2919          goto done;
2920       } else {
2921          /* Store from %fakeN into memory.
2922             --> ffree %st(7) ; fld st(N) ; fstp{l|s} amode
2923 	 */
2924          p = do_ffree_st7(p);
2925          p = do_fld_st(p, 0+fregEnc(i->Xin.FpLdSt.reg));
2926          switch (i->Xin.FpLdSt.sz) {
2927             case 4:
2928                *p++ = 0xD9;
2929                p = doAMode_M_enc(p, 3/*subopcode*/, i->Xin.FpLdSt.addr);
2930                break;
2931             case 8:
2932                *p++ = 0xDD;
2933                p = doAMode_M_enc(p, 3/*subopcode*/, i->Xin.FpLdSt.addr);
2934                break;
2935             case 10:
2936                *p++ = 0xDB;
2937                p = doAMode_M_enc(p, 7/*subopcode*/, i->Xin.FpLdSt.addr);
2938                break;
2939             default:
2940                vpanic("emitX86Instr(FpLdSt,store)");
2941          }
2942          goto done;
2943       }
2944       break;
2945 
2946    case Xin_FpLdStI:
2947       if (i->Xin.FpLdStI.isLoad) {
2948          /* Load from memory into %fakeN, converting from an int.
2949             --> ffree %st(7) ; fild{w/l/ll} amode ; fstp st(N+1)
2950          */
2951          switch (i->Xin.FpLdStI.sz) {
2952             case 8:  opc = 0xDF; subopc_imm = 5; break;
2953             case 4:  opc = 0xDB; subopc_imm = 0; break;
2954             case 2:  vassert(0); opc = 0xDF; subopc_imm = 0; break;
2955             default: vpanic("emitX86Instr(Xin_FpLdStI-load)");
2956          }
2957          p = do_ffree_st7(p);
2958          *p++ = toUChar(opc);
2959          p = doAMode_M_enc(p, subopc_imm/*subopcode*/, i->Xin.FpLdStI.addr);
2960          p = do_fstp_st(p, 1+fregEnc(i->Xin.FpLdStI.reg));
2961          goto done;
2962       } else {
2963          /* Store from %fakeN into memory, converting to an int.
2964             --> ffree %st(7) ; fld st(N) ; fistp{w/l/ll} amode
2965 	 */
2966          switch (i->Xin.FpLdStI.sz) {
2967             case 8:  opc = 0xDF; subopc_imm = 7; break;
2968             case 4:  opc = 0xDB; subopc_imm = 3; break;
2969             case 2:  opc = 0xDF; subopc_imm = 3; break;
2970             default: vpanic("emitX86Instr(Xin_FpLdStI-store)");
2971          }
2972          p = do_ffree_st7(p);
2973          p = do_fld_st(p, 0+fregEnc(i->Xin.FpLdStI.reg));
2974          *p++ = toUChar(opc);
2975          p = doAMode_M_enc(p, subopc_imm/*subopcode*/, i->Xin.FpLdStI.addr);
2976          goto done;
2977       }
2978       break;
2979 
2980    case Xin_Fp64to32:
2981       /* ffree %st7 ; fld %st(src) */
2982       p = do_ffree_st7(p);
2983       p = do_fld_st(p, 0+fregEnc(i->Xin.Fp64to32.src));
2984       /* subl $4, %esp */
2985       *p++ = 0x83; *p++ = 0xEC; *p++ = 0x04;
2986       /* fstps (%esp) */
2987       *p++ = 0xD9; *p++ = 0x1C; *p++ = 0x24;
2988       /* flds (%esp) */
2989       *p++ = 0xD9; *p++ = 0x04; *p++ = 0x24;
2990       /* addl $4, %esp */
2991       *p++ = 0x83; *p++ = 0xC4; *p++ = 0x04;
2992       /* fstp %st(1+dst) */
2993       p = do_fstp_st(p, 1+fregEnc(i->Xin.Fp64to32.dst));
2994       goto done;
2995 
2996    case Xin_FpCMov:
2997       /* jmp fwds if !condition */
2998       *p++ = toUChar(0x70 + (i->Xin.FpCMov.cond ^ 1));
2999       *p++ = 0; /* # of bytes in the next bit, which we don't know yet */
3000       ptmp = p;
3001 
3002       /* ffree %st7 ; fld %st(src) ; fstp %st(1+dst) */
3003       p = do_ffree_st7(p);
3004       p = do_fld_st(p, 0+fregEnc(i->Xin.FpCMov.src));
3005       p = do_fstp_st(p, 1+fregEnc(i->Xin.FpCMov.dst));
3006 
3007       /* Fill in the jump offset. */
3008       *(ptmp-1) = toUChar(p - ptmp);
3009       goto done;
3010 
3011    case Xin_FpLdCW:
3012       *p++ = 0xD9;
3013       p = doAMode_M_enc(p, 5/*subopcode*/, i->Xin.FpLdCW.addr);
3014       goto done;
3015 
3016    case Xin_FpStSW_AX:
3017       /* note, this emits fnstsw %ax, not fstsw %ax */
3018       *p++ = 0xDF;
3019       *p++ = 0xE0;
3020       goto done;
3021 
3022    case Xin_FpCmp:
3023       /* gcmp %fL, %fR, %dst
3024          -> ffree %st7; fpush %fL ; fucomp %(fR+1) ;
3025             fnstsw %ax ; movl %eax, %dst
3026       */
3027       /* ffree %st7 */
3028       p = do_ffree_st7(p);
3029       /* fpush %fL */
3030       p = do_fld_st(p, 0+fregEnc(i->Xin.FpCmp.srcL));
3031       /* fucomp %(fR+1) */
3032       *p++ = 0xDD;
3033       *p++ = toUChar(0xE8 + (7 & (1+fregEnc(i->Xin.FpCmp.srcR))));
3034       /* fnstsw %ax */
3035       *p++ = 0xDF;
3036       *p++ = 0xE0;
3037       /*  movl %eax, %dst */
3038       *p++ = 0x89;
3039       p = doAMode_R(p, hregX86_EAX(), i->Xin.FpCmp.dst);
3040       goto done;
3041 
3042    case Xin_SseConst: {
3043       UShort con = i->Xin.SseConst.con;
3044       p = push_word_from_tags(p, toUShort((con >> 12) & 0xF));
3045       p = push_word_from_tags(p, toUShort((con >> 8) & 0xF));
3046       p = push_word_from_tags(p, toUShort((con >> 4) & 0xF));
3047       p = push_word_from_tags(p, toUShort(con & 0xF));
3048       /* movl (%esp), %xmm-dst */
3049       *p++ = 0x0F;
3050       *p++ = 0x10;
3051       *p++ = toUChar(0x04 + 8 * (7 & vregEnc(i->Xin.SseConst.dst)));
3052       *p++ = 0x24;
3053       /* addl $16, %esp */
3054       *p++ = 0x83;
3055       *p++ = 0xC4;
3056       *p++ = 0x10;
3057       goto done;
3058    }
3059 
3060    case Xin_SseLdSt:
3061       *p++ = 0x0F;
3062       *p++ = toUChar(i->Xin.SseLdSt.isLoad ? 0x10 : 0x11);
3063       p = doAMode_M_enc(p, vregEnc(i->Xin.SseLdSt.reg), i->Xin.SseLdSt.addr);
3064       goto done;
3065 
3066    case Xin_SseLdzLO:
3067       vassert(i->Xin.SseLdzLO.sz == 4 || i->Xin.SseLdzLO.sz == 8);
3068       /* movs[sd] amode, %xmm-dst */
3069       *p++ = toUChar(i->Xin.SseLdzLO.sz==4 ? 0xF3 : 0xF2);
3070       *p++ = 0x0F;
3071       *p++ = 0x10;
3072       p = doAMode_M_enc(p, vregEnc(i->Xin.SseLdzLO.reg), i->Xin.SseLdzLO.addr);
3073       goto done;
3074 
3075    case Xin_Sse32Fx4:
3076       xtra = 0;
3077       *p++ = 0x0F;
3078       switch (i->Xin.Sse32Fx4.op) {
3079          case Xsse_ADDF:   *p++ = 0x58; break;
3080          case Xsse_DIVF:   *p++ = 0x5E; break;
3081          case Xsse_MAXF:   *p++ = 0x5F; break;
3082          case Xsse_MINF:   *p++ = 0x5D; break;
3083          case Xsse_MULF:   *p++ = 0x59; break;
3084          case Xsse_RCPF:   *p++ = 0x53; break;
3085          case Xsse_RSQRTF: *p++ = 0x52; break;
3086          case Xsse_SQRTF:  *p++ = 0x51; break;
3087          case Xsse_SUBF:   *p++ = 0x5C; break;
3088          case Xsse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
3089          case Xsse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
3090          case Xsse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
3091          case Xsse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
3092          default: goto bad;
3093       }
3094       p = doAMode_R_enc_enc(p, vregEnc(i->Xin.Sse32Fx4.dst),
3095                                vregEnc(i->Xin.Sse32Fx4.src) );
3096       if (xtra & 0x100)
3097          *p++ = toUChar(xtra & 0xFF);
3098       goto done;
3099 
3100    case Xin_Sse64Fx2:
3101       xtra = 0;
3102       *p++ = 0x66;
3103       *p++ = 0x0F;
3104       switch (i->Xin.Sse64Fx2.op) {
3105          case Xsse_ADDF:   *p++ = 0x58; break;
3106          case Xsse_DIVF:   *p++ = 0x5E; break;
3107          case Xsse_MAXF:   *p++ = 0x5F; break;
3108          case Xsse_MINF:   *p++ = 0x5D; break;
3109          case Xsse_MULF:   *p++ = 0x59; break;
3110          case Xsse_RCPF:   *p++ = 0x53; break;
3111          case Xsse_RSQRTF: *p++ = 0x52; break;
3112          case Xsse_SQRTF:  *p++ = 0x51; break;
3113          case Xsse_SUBF:   *p++ = 0x5C; break;
3114          case Xsse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
3115          case Xsse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
3116          case Xsse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
3117          case Xsse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
3118          default: goto bad;
3119       }
3120       p = doAMode_R_enc_enc(p, vregEnc(i->Xin.Sse64Fx2.dst),
3121                                vregEnc(i->Xin.Sse64Fx2.src) );
3122       if (xtra & 0x100)
3123          *p++ = toUChar(xtra & 0xFF);
3124       goto done;
3125 
3126    case Xin_Sse32FLo:
3127       xtra = 0;
3128       *p++ = 0xF3;
3129       *p++ = 0x0F;
3130       switch (i->Xin.Sse32FLo.op) {
3131          case Xsse_ADDF:   *p++ = 0x58; break;
3132          case Xsse_DIVF:   *p++ = 0x5E; break;
3133          case Xsse_MAXF:   *p++ = 0x5F; break;
3134          case Xsse_MINF:   *p++ = 0x5D; break;
3135          case Xsse_MULF:   *p++ = 0x59; break;
3136          case Xsse_RCPF:   *p++ = 0x53; break;
3137          case Xsse_RSQRTF: *p++ = 0x52; break;
3138          case Xsse_SQRTF:  *p++ = 0x51; break;
3139          case Xsse_SUBF:   *p++ = 0x5C; break;
3140          case Xsse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
3141          case Xsse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
3142          case Xsse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
3143          case Xsse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
3144          default: goto bad;
3145       }
3146       p = doAMode_R_enc_enc(p, vregEnc(i->Xin.Sse32FLo.dst),
3147                                vregEnc(i->Xin.Sse32FLo.src) );
3148       if (xtra & 0x100)
3149          *p++ = toUChar(xtra & 0xFF);
3150       goto done;
3151 
3152    case Xin_Sse64FLo:
3153       xtra = 0;
3154       *p++ = 0xF2;
3155       *p++ = 0x0F;
3156       switch (i->Xin.Sse64FLo.op) {
3157          case Xsse_ADDF:   *p++ = 0x58; break;
3158          case Xsse_DIVF:   *p++ = 0x5E; break;
3159          case Xsse_MAXF:   *p++ = 0x5F; break;
3160          case Xsse_MINF:   *p++ = 0x5D; break;
3161          case Xsse_MULF:   *p++ = 0x59; break;
3162          case Xsse_RCPF:   *p++ = 0x53; break;
3163          case Xsse_RSQRTF: *p++ = 0x52; break;
3164          case Xsse_SQRTF:  *p++ = 0x51; break;
3165          case Xsse_SUBF:   *p++ = 0x5C; break;
3166          case Xsse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
3167          case Xsse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
3168          case Xsse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
3169          case Xsse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
3170          default: goto bad;
3171       }
3172       p = doAMode_R_enc_enc(p, vregEnc(i->Xin.Sse64FLo.dst),
3173                                vregEnc(i->Xin.Sse64FLo.src) );
3174       if (xtra & 0x100)
3175          *p++ = toUChar(xtra & 0xFF);
3176       goto done;
3177 
3178    case Xin_SseReRg:
3179 #     define XX(_n) *p++ = (_n)
3180       switch (i->Xin.SseReRg.op) {
3181          case Xsse_MOV:     /*movups*/ XX(0x0F); XX(0x10); break;
3182          case Xsse_OR:                 XX(0x0F); XX(0x56); break;
3183          case Xsse_XOR:                XX(0x0F); XX(0x57); break;
3184          case Xsse_AND:                XX(0x0F); XX(0x54); break;
3185          case Xsse_PACKSSD:  XX(0x66); XX(0x0F); XX(0x6B); break;
3186          case Xsse_PACKSSW:  XX(0x66); XX(0x0F); XX(0x63); break;
3187          case Xsse_PACKUSW:  XX(0x66); XX(0x0F); XX(0x67); break;
3188          case Xsse_ADD8:     XX(0x66); XX(0x0F); XX(0xFC); break;
3189          case Xsse_ADD16:    XX(0x66); XX(0x0F); XX(0xFD); break;
3190          case Xsse_ADD32:    XX(0x66); XX(0x0F); XX(0xFE); break;
3191          case Xsse_ADD64:    XX(0x66); XX(0x0F); XX(0xD4); break;
3192          case Xsse_QADD8S:   XX(0x66); XX(0x0F); XX(0xEC); break;
3193          case Xsse_QADD16S:  XX(0x66); XX(0x0F); XX(0xED); break;
3194          case Xsse_QADD8U:   XX(0x66); XX(0x0F); XX(0xDC); break;
3195          case Xsse_QADD16U:  XX(0x66); XX(0x0F); XX(0xDD); break;
3196          case Xsse_AVG8U:    XX(0x66); XX(0x0F); XX(0xE0); break;
3197          case Xsse_AVG16U:   XX(0x66); XX(0x0F); XX(0xE3); break;
3198          case Xsse_CMPEQ8:   XX(0x66); XX(0x0F); XX(0x74); break;
3199          case Xsse_CMPEQ16:  XX(0x66); XX(0x0F); XX(0x75); break;
3200          case Xsse_CMPEQ32:  XX(0x66); XX(0x0F); XX(0x76); break;
3201          case Xsse_CMPGT8S:  XX(0x66); XX(0x0F); XX(0x64); break;
3202          case Xsse_CMPGT16S: XX(0x66); XX(0x0F); XX(0x65); break;
3203          case Xsse_CMPGT32S: XX(0x66); XX(0x0F); XX(0x66); break;
3204          case Xsse_MAX16S:   XX(0x66); XX(0x0F); XX(0xEE); break;
3205          case Xsse_MAX8U:    XX(0x66); XX(0x0F); XX(0xDE); break;
3206          case Xsse_MIN16S:   XX(0x66); XX(0x0F); XX(0xEA); break;
3207          case Xsse_MIN8U:    XX(0x66); XX(0x0F); XX(0xDA); break;
3208          case Xsse_MULHI16U: XX(0x66); XX(0x0F); XX(0xE4); break;
3209          case Xsse_MULHI16S: XX(0x66); XX(0x0F); XX(0xE5); break;
3210          case Xsse_MUL16:    XX(0x66); XX(0x0F); XX(0xD5); break;
3211          case Xsse_SHL16:    XX(0x66); XX(0x0F); XX(0xF1); break;
3212          case Xsse_SHL32:    XX(0x66); XX(0x0F); XX(0xF2); break;
3213          case Xsse_SHL64:    XX(0x66); XX(0x0F); XX(0xF3); break;
3214          case Xsse_SAR16:    XX(0x66); XX(0x0F); XX(0xE1); break;
3215          case Xsse_SAR32:    XX(0x66); XX(0x0F); XX(0xE2); break;
3216          case Xsse_SHR16:    XX(0x66); XX(0x0F); XX(0xD1); break;
3217          case Xsse_SHR32:    XX(0x66); XX(0x0F); XX(0xD2); break;
3218          case Xsse_SHR64:    XX(0x66); XX(0x0F); XX(0xD3); break;
3219          case Xsse_SUB8:     XX(0x66); XX(0x0F); XX(0xF8); break;
3220          case Xsse_SUB16:    XX(0x66); XX(0x0F); XX(0xF9); break;
3221          case Xsse_SUB32:    XX(0x66); XX(0x0F); XX(0xFA); break;
3222          case Xsse_SUB64:    XX(0x66); XX(0x0F); XX(0xFB); break;
3223          case Xsse_QSUB8S:   XX(0x66); XX(0x0F); XX(0xE8); break;
3224          case Xsse_QSUB16S:  XX(0x66); XX(0x0F); XX(0xE9); break;
3225          case Xsse_QSUB8U:   XX(0x66); XX(0x0F); XX(0xD8); break;
3226          case Xsse_QSUB16U:  XX(0x66); XX(0x0F); XX(0xD9); break;
3227          case Xsse_UNPCKHB:  XX(0x66); XX(0x0F); XX(0x68); break;
3228          case Xsse_UNPCKHW:  XX(0x66); XX(0x0F); XX(0x69); break;
3229          case Xsse_UNPCKHD:  XX(0x66); XX(0x0F); XX(0x6A); break;
3230          case Xsse_UNPCKHQ:  XX(0x66); XX(0x0F); XX(0x6D); break;
3231          case Xsse_UNPCKLB:  XX(0x66); XX(0x0F); XX(0x60); break;
3232          case Xsse_UNPCKLW:  XX(0x66); XX(0x0F); XX(0x61); break;
3233          case Xsse_UNPCKLD:  XX(0x66); XX(0x0F); XX(0x62); break;
3234          case Xsse_UNPCKLQ:  XX(0x66); XX(0x0F); XX(0x6C); break;
3235          default: goto bad;
3236       }
3237       p = doAMode_R_enc_enc(p, vregEnc(i->Xin.SseReRg.dst),
3238                                vregEnc(i->Xin.SseReRg.src) );
3239 #     undef XX
3240       goto done;
3241 
3242    case Xin_SseCMov:
3243       /* jmp fwds if !condition */
3244       *p++ = toUChar(0x70 + (i->Xin.SseCMov.cond ^ 1));
3245       *p++ = 0; /* # of bytes in the next bit, which we don't know yet */
3246       ptmp = p;
3247 
3248       /* movaps %src, %dst */
3249       *p++ = 0x0F;
3250       *p++ = 0x28;
3251       p = doAMode_R_enc_enc(p, vregEnc(i->Xin.SseCMov.dst),
3252                                vregEnc(i->Xin.SseCMov.src) );
3253 
3254       /* Fill in the jump offset. */
3255       *(ptmp-1) = toUChar(p - ptmp);
3256       goto done;
3257 
3258    case Xin_SseShuf:
3259       *p++ = 0x66;
3260       *p++ = 0x0F;
3261       *p++ = 0x70;
3262       p = doAMode_R_enc_enc(p, vregEnc(i->Xin.SseShuf.dst),
3263                                vregEnc(i->Xin.SseShuf.src) );
3264       *p++ = (UChar)(i->Xin.SseShuf.order);
3265       goto done;
3266 
3267    case Xin_EvCheck: {
3268       /* We generate:
3269             (3 bytes)  decl 4(%ebp)    4 == offsetof(host_EvC_COUNTER)
3270             (2 bytes)  jns  nofail     expected taken
3271             (3 bytes)  jmp* 0(%ebp)    0 == offsetof(host_EvC_FAILADDR)
3272             nofail:
3273       */
3274       /* This is heavily asserted re instruction lengths.  It needs to
3275          be.  If we get given unexpected forms of .amCounter or
3276          .amFailAddr -- basically, anything that's not of the form
3277          uimm7(%ebp) -- they are likely to fail. */
3278       /* Note also that after the decl we must be very careful not to
3279          read the carry flag, else we get a partial flags stall.
3280          js/jns avoids that, though. */
3281       UChar* p0 = p;
3282       /* ---  decl 8(%ebp) --- */
3283       /* "1" because + there's no register in this encoding;
3284          instead the register + field is used as a sub opcode.  The
3285          encoding for "decl r/m32" + is FF /1, hence the "1". */
3286       *p++ = 0xFF;
3287       p = doAMode_M_enc(p, 1, i->Xin.EvCheck.amCounter);
3288       vassert(p - p0 == 3);
3289       /* --- jns nofail --- */
3290       *p++ = 0x79;
3291       *p++ = 0x03; /* need to check this 0x03 after the next insn */
3292       vassert(p - p0 == 5);
3293       /* --- jmp* 0(%ebp) --- */
3294       /* The encoding is FF /4. */
3295       *p++ = 0xFF;
3296       p = doAMode_M_enc(p, 4, i->Xin.EvCheck.amFailAddr);
3297       vassert(p - p0 == 8); /* also ensures that 0x03 offset above is ok */
3298       /* And crosscheck .. */
3299       vassert(evCheckSzB_X86() == 8);
3300       goto done;
3301    }
3302 
3303    case Xin_ProfInc: {
3304       /* We generate   addl $1,NotKnownYet
3305                        adcl $0,NotKnownYet+4
3306          in the expectation that a later call to LibVEX_patchProfCtr
3307          will be used to fill in the immediate fields once the right
3308          value is known.
3309            83 05  00 00 00 00  01
3310            83 15  00 00 00 00  00
3311       */
3312       *p++ = 0x83; *p++ = 0x05;
3313       *p++ = 0x00; *p++ = 0x00; *p++ = 0x00; *p++ = 0x00;
3314       *p++ = 0x01;
3315       *p++ = 0x83; *p++ = 0x15;
3316       *p++ = 0x00; *p++ = 0x00; *p++ = 0x00; *p++ = 0x00;
3317       *p++ = 0x00;
3318       /* Tell the caller .. */
3319       vassert(!(*is_profInc));
3320       *is_profInc = True;
3321       goto done;
3322    }
3323 
3324    default:
3325       goto bad;
3326    }
3327 
3328   bad:
3329    ppX86Instr(i, mode64);
3330    vpanic("emit_X86Instr");
3331    /*NOTREACHED*/
3332 
3333   done:
3334    vassert(p - &buf[0] <= 32);
3335    return p - &buf[0];
3336 }
3337 
3338 
3339 /* How big is an event check?  See case for Xin_EvCheck in
3340    emit_X86Instr just above.  That crosschecks what this returns, so
3341    we can tell if we're inconsistent. */
evCheckSzB_X86(void)3342 Int evCheckSzB_X86 (void)
3343 {
3344    return 8;
3345 }
3346 
3347 
3348 /* NB: what goes on here has to be very closely coordinated with the
3349    emitInstr case for XDirect, above. */
chainXDirect_X86(VexEndness endness_host,void * place_to_chain,const void * disp_cp_chain_me_EXPECTED,const void * place_to_jump_to)3350 VexInvalRange chainXDirect_X86 ( VexEndness endness_host,
3351                                  void* place_to_chain,
3352                                  const void* disp_cp_chain_me_EXPECTED,
3353                                  const void* place_to_jump_to )
3354 {
3355    vassert(endness_host == VexEndnessLE);
3356 
3357    /* What we're expecting to see is:
3358         movl $disp_cp_chain_me_EXPECTED, %edx
3359         call *%edx
3360       viz
3361         BA <4 bytes value == disp_cp_chain_me_EXPECTED>
3362         FF D2
3363    */
3364    UChar* p = (UChar*)place_to_chain;
3365    vassert(p[0] == 0xBA);
3366    vassert(read_misaligned_UInt_LE(&p[1])
3367            == (UInt)(Addr)disp_cp_chain_me_EXPECTED);
3368    vassert(p[5] == 0xFF);
3369    vassert(p[6] == 0xD2);
3370    /* And what we want to change it to is:
3371           jmp disp32   where disp32 is relative to the next insn
3372           ud2;
3373         viz
3374           E9 <4 bytes == disp32>
3375           0F 0B
3376       The replacement has the same length as the original.
3377    */
3378    /* This is the delta we need to put into a JMP d32 insn.  It's
3379       relative to the start of the next insn, hence the -5.  */
3380    Long delta = (Long)((const UChar *)place_to_jump_to - p) - 5;
3381 
3382    /* And make the modifications. */
3383    p[0] = 0xE9;
3384    write_misaligned_UInt_LE(&p[1], (UInt)(ULong)delta);
3385    p[5] = 0x0F; p[6] = 0x0B;
3386    /* sanity check on the delta -- top 32 are all 0 or all 1 */
3387    delta >>= 32;
3388    vassert(delta == 0LL || delta == -1LL);
3389    VexInvalRange vir = { (HWord)place_to_chain, 7 };
3390    return vir;
3391 }
3392 
3393 
3394 /* NB: what goes on here has to be very closely coordinated with the
3395    emitInstr case for XDirect, above. */
unchainXDirect_X86(VexEndness endness_host,void * place_to_unchain,const void * place_to_jump_to_EXPECTED,const void * disp_cp_chain_me)3396 VexInvalRange unchainXDirect_X86 ( VexEndness endness_host,
3397                                    void* place_to_unchain,
3398                                    const void* place_to_jump_to_EXPECTED,
3399                                    const void* disp_cp_chain_me )
3400 {
3401    vassert(endness_host == VexEndnessLE);
3402 
3403    /* What we're expecting to see is:
3404           jmp d32
3405           ud2;
3406        viz
3407           E9 <4 bytes == disp32>
3408           0F 0B
3409    */
3410    UChar* p     = (UChar*)place_to_unchain;
3411    Bool   valid = False;
3412    if (p[0] == 0xE9
3413        && p[5] == 0x0F && p[6]  == 0x0B) {
3414       /* Check the offset is right. */
3415       Int s32 = (Int)read_misaligned_UInt_LE(&p[1]);
3416       if ((UChar*)p + 5 + s32 == place_to_jump_to_EXPECTED) {
3417          valid = True;
3418          if (0)
3419             vex_printf("QQQ unchainXDirect_X86: found valid\n");
3420       }
3421    }
3422    vassert(valid);
3423    /* And what we want to change it to is:
3424          movl $disp_cp_chain_me, %edx
3425          call *%edx
3426       viz
3427          BA <4 bytes value == disp_cp_chain_me_EXPECTED>
3428          FF D2
3429       So it's the same length (convenient, huh).
3430    */
3431    p[0] = 0xBA;
3432    write_misaligned_UInt_LE(&p[1], (UInt)(Addr)disp_cp_chain_me);
3433    p[5] = 0xFF;
3434    p[6] = 0xD2;
3435    VexInvalRange vir = { (HWord)place_to_unchain, 7 };
3436    return vir;
3437 }
3438 
3439 
3440 /* Patch the counter address into a profile inc point, as previously
3441    created by the Xin_ProfInc case for emit_X86Instr. */
patchProfInc_X86(VexEndness endness_host,void * place_to_patch,const ULong * location_of_counter)3442 VexInvalRange patchProfInc_X86 ( VexEndness endness_host,
3443                                  void*  place_to_patch,
3444                                  const ULong* location_of_counter )
3445 {
3446    vassert(endness_host == VexEndnessLE);
3447    vassert(sizeof(ULong*) == 4);
3448    UChar* p = (UChar*)place_to_patch;
3449    vassert(p[0] == 0x83);
3450    vassert(p[1] == 0x05);
3451    vassert(p[2] == 0x00);
3452    vassert(p[3] == 0x00);
3453    vassert(p[4] == 0x00);
3454    vassert(p[5] == 0x00);
3455    vassert(p[6] == 0x01);
3456    vassert(p[7] == 0x83);
3457    vassert(p[8] == 0x15);
3458    vassert(p[9] == 0x00);
3459    vassert(p[10] == 0x00);
3460    vassert(p[11] == 0x00);
3461    vassert(p[12] == 0x00);
3462    vassert(p[13] == 0x00);
3463    UInt imm32 = (UInt)(Addr)location_of_counter;
3464    p[2] = imm32 & 0xFF; imm32 >>= 8;
3465    p[3] = imm32 & 0xFF; imm32 >>= 8;
3466    p[4] = imm32 & 0xFF; imm32 >>= 8;
3467    p[5] = imm32 & 0xFF;
3468    imm32 = 4 + (UInt)(Addr)location_of_counter;
3469    p[9]  = imm32 & 0xFF; imm32 >>= 8;
3470    p[10] = imm32 & 0xFF; imm32 >>= 8;
3471    p[11] = imm32 & 0xFF; imm32 >>= 8;
3472    p[12] = imm32 & 0xFF;
3473    VexInvalRange vir = { (HWord)place_to_patch, 14 };
3474    return vir;
3475 }
3476 
3477 
3478 /*---------------------------------------------------------------*/
3479 /*--- end                                     host_x86_defs.c ---*/
3480 /*---------------------------------------------------------------*/
3481