1
2 /*---------------------------------------------------------------*/
3 /*--- begin host_amd64_defs.c ---*/
4 /*---------------------------------------------------------------*/
5
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
9
10 Copyright (C) 2004-2017 OpenWorks LLP
11 info@open-works.net
12
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 02110-1301, USA.
27
28 The GNU General Public License is contained in the file COPYING.
29
30 Neither the names of the U.S. Department of Energy nor the
31 University of California nor the names of its contributors may be
32 used to endorse or promote products derived from this software
33 without prior written permission.
34 */
35
36 #include "libvex_basictypes.h"
37 #include "libvex.h"
38 #include "libvex_trc_values.h"
39
40 #include "main_util.h"
41 #include "host_generic_regs.h"
42 #include "host_amd64_defs.h"
43
44
45 /* --------- Registers. --------- */
46
getRRegUniverse_AMD64(void)47 const RRegUniverse* getRRegUniverse_AMD64 ( void )
48 {
49 /* The real-register universe is a big constant, so we just want to
50 initialise it once. */
51 static RRegUniverse rRegUniverse_AMD64;
52 static Bool rRegUniverse_AMD64_initted = False;
53
54 /* Handy shorthand, nothing more */
55 RRegUniverse* ru = &rRegUniverse_AMD64;
56
57 /* This isn't thread-safe. Sigh. */
58 if (LIKELY(rRegUniverse_AMD64_initted))
59 return ru;
60
61 RRegUniverse__init(ru);
62
63 /* Add the registers. The initial segment of this array must be
64 those available for allocation by reg-alloc, and those that
65 follow are not available for allocation. */
66 ru->allocable_start[HRcInt64] = ru->size;
67 ru->regs[ru->size++] = hregAMD64_R12();
68 ru->regs[ru->size++] = hregAMD64_R13();
69 ru->regs[ru->size++] = hregAMD64_R14();
70 ru->regs[ru->size++] = hregAMD64_R15();
71 ru->regs[ru->size++] = hregAMD64_RBX();
72 ru->regs[ru->size++] = hregAMD64_RSI();
73 ru->regs[ru->size++] = hregAMD64_RDI();
74 ru->regs[ru->size++] = hregAMD64_R8();
75 ru->regs[ru->size++] = hregAMD64_R9();
76 ru->regs[ru->size++] = hregAMD64_R10();
77 ru->allocable_end[HRcInt64] = ru->size - 1;
78
79 ru->allocable_start[HRcVec128] = ru->size;
80 ru->regs[ru->size++] = hregAMD64_XMM3();
81 ru->regs[ru->size++] = hregAMD64_XMM4();
82 ru->regs[ru->size++] = hregAMD64_XMM5();
83 ru->regs[ru->size++] = hregAMD64_XMM6();
84 ru->regs[ru->size++] = hregAMD64_XMM7();
85 ru->regs[ru->size++] = hregAMD64_XMM8();
86 ru->regs[ru->size++] = hregAMD64_XMM9();
87 ru->regs[ru->size++] = hregAMD64_XMM10();
88 ru->regs[ru->size++] = hregAMD64_XMM11();
89 ru->regs[ru->size++] = hregAMD64_XMM12();
90 ru->allocable_end[HRcVec128] = ru->size - 1;
91 ru->allocable = ru->size;
92
93 /* And other regs, not available to the allocator. */
94 ru->regs[ru->size++] = hregAMD64_RAX();
95 ru->regs[ru->size++] = hregAMD64_RCX();
96 ru->regs[ru->size++] = hregAMD64_RDX();
97 ru->regs[ru->size++] = hregAMD64_RSP();
98 ru->regs[ru->size++] = hregAMD64_RBP();
99 ru->regs[ru->size++] = hregAMD64_R11();
100 ru->regs[ru->size++] = hregAMD64_XMM0();
101 ru->regs[ru->size++] = hregAMD64_XMM1();
102
103 rRegUniverse_AMD64_initted = True;
104
105 RRegUniverse__check_is_sane(ru);
106 return ru;
107 }
108
109
ppHRegAMD64(HReg reg)110 UInt ppHRegAMD64 ( HReg reg )
111 {
112 Int r;
113 static const HChar* ireg64_names[16]
114 = { "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
115 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15" };
116 /* Be generic for all virtual regs. */
117 if (hregIsVirtual(reg)) {
118 return ppHReg(reg);
119 }
120 /* But specific for real regs. */
121 switch (hregClass(reg)) {
122 case HRcInt64:
123 r = hregEncoding(reg);
124 vassert(r >= 0 && r < 16);
125 return vex_printf("%s", ireg64_names[r]);
126 case HRcVec128:
127 r = hregEncoding(reg);
128 vassert(r >= 0 && r < 16);
129 return vex_printf("%%xmm%d", r);
130 default:
131 vpanic("ppHRegAMD64");
132 }
133 }
134
ppHRegAMD64_lo32(HReg reg)135 static UInt ppHRegAMD64_lo32 ( HReg reg )
136 {
137 Int r;
138 static const HChar* ireg32_names[16]
139 = { "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
140 "%r8d", "%r9d", "%r10d", "%r11d", "%r12d", "%r13d", "%r14d", "%r15d" };
141 /* Be generic for all virtual regs. */
142 if (hregIsVirtual(reg)) {
143 UInt written = ppHReg(reg);
144 written += vex_printf("d");
145 return written;
146 }
147 /* But specific for real regs. */
148 switch (hregClass(reg)) {
149 case HRcInt64:
150 r = hregEncoding(reg);
151 vassert(r >= 0 && r < 16);
152 return vex_printf("%s", ireg32_names[r]);
153 default:
154 vpanic("ppHRegAMD64_lo32: invalid regclass");
155 }
156 }
157
158
159 /* --------- Condition codes, Intel encoding. --------- */
160
showAMD64CondCode(AMD64CondCode cond)161 const HChar* showAMD64CondCode ( AMD64CondCode cond )
162 {
163 switch (cond) {
164 case Acc_O: return "o";
165 case Acc_NO: return "no";
166 case Acc_B: return "b";
167 case Acc_NB: return "nb";
168 case Acc_Z: return "z";
169 case Acc_NZ: return "nz";
170 case Acc_BE: return "be";
171 case Acc_NBE: return "nbe";
172 case Acc_S: return "s";
173 case Acc_NS: return "ns";
174 case Acc_P: return "p";
175 case Acc_NP: return "np";
176 case Acc_L: return "l";
177 case Acc_NL: return "nl";
178 case Acc_LE: return "le";
179 case Acc_NLE: return "nle";
180 case Acc_ALWAYS: return "ALWAYS";
181 default: vpanic("ppAMD64CondCode");
182 }
183 }
184
185
186 /* --------- AMD64AMode: memory address expressions. --------- */
187
AMD64AMode_IR(UInt imm32,HReg reg)188 AMD64AMode* AMD64AMode_IR ( UInt imm32, HReg reg ) {
189 AMD64AMode* am = LibVEX_Alloc_inline(sizeof(AMD64AMode));
190 am->tag = Aam_IR;
191 am->Aam.IR.imm = imm32;
192 am->Aam.IR.reg = reg;
193 return am;
194 }
AMD64AMode_IRRS(UInt imm32,HReg base,HReg indEx,Int shift)195 AMD64AMode* AMD64AMode_IRRS ( UInt imm32, HReg base, HReg indEx, Int shift ) {
196 AMD64AMode* am = LibVEX_Alloc_inline(sizeof(AMD64AMode));
197 am->tag = Aam_IRRS;
198 am->Aam.IRRS.imm = imm32;
199 am->Aam.IRRS.base = base;
200 am->Aam.IRRS.index = indEx;
201 am->Aam.IRRS.shift = shift;
202 vassert(shift >= 0 && shift <= 3);
203 return am;
204 }
205
ppAMD64AMode(AMD64AMode * am)206 void ppAMD64AMode ( AMD64AMode* am ) {
207 switch (am->tag) {
208 case Aam_IR:
209 if (am->Aam.IR.imm == 0)
210 vex_printf("(");
211 else
212 vex_printf("0x%x(", am->Aam.IR.imm);
213 ppHRegAMD64(am->Aam.IR.reg);
214 vex_printf(")");
215 return;
216 case Aam_IRRS:
217 vex_printf("0x%x(", am->Aam.IRRS.imm);
218 ppHRegAMD64(am->Aam.IRRS.base);
219 vex_printf(",");
220 ppHRegAMD64(am->Aam.IRRS.index);
221 vex_printf(",%d)", 1 << am->Aam.IRRS.shift);
222 return;
223 default:
224 vpanic("ppAMD64AMode");
225 }
226 }
227
addRegUsage_AMD64AMode(HRegUsage * u,AMD64AMode * am)228 static void addRegUsage_AMD64AMode ( HRegUsage* u, AMD64AMode* am ) {
229 switch (am->tag) {
230 case Aam_IR:
231 addHRegUse(u, HRmRead, am->Aam.IR.reg);
232 return;
233 case Aam_IRRS:
234 addHRegUse(u, HRmRead, am->Aam.IRRS.base);
235 addHRegUse(u, HRmRead, am->Aam.IRRS.index);
236 return;
237 default:
238 vpanic("addRegUsage_AMD64AMode");
239 }
240 }
241
mapRegs_AMD64AMode(HRegRemap * m,AMD64AMode * am)242 static void mapRegs_AMD64AMode ( HRegRemap* m, AMD64AMode* am ) {
243 switch (am->tag) {
244 case Aam_IR:
245 am->Aam.IR.reg = lookupHRegRemap(m, am->Aam.IR.reg);
246 return;
247 case Aam_IRRS:
248 am->Aam.IRRS.base = lookupHRegRemap(m, am->Aam.IRRS.base);
249 am->Aam.IRRS.index = lookupHRegRemap(m, am->Aam.IRRS.index);
250 return;
251 default:
252 vpanic("mapRegs_AMD64AMode");
253 }
254 }
255
256 /* --------- Operand, which can be reg, immediate or memory. --------- */
257
AMD64RMI_Imm(UInt imm32)258 AMD64RMI* AMD64RMI_Imm ( UInt imm32 ) {
259 AMD64RMI* op = LibVEX_Alloc_inline(sizeof(AMD64RMI));
260 op->tag = Armi_Imm;
261 op->Armi.Imm.imm32 = imm32;
262 return op;
263 }
AMD64RMI_Reg(HReg reg)264 AMD64RMI* AMD64RMI_Reg ( HReg reg ) {
265 AMD64RMI* op = LibVEX_Alloc_inline(sizeof(AMD64RMI));
266 op->tag = Armi_Reg;
267 op->Armi.Reg.reg = reg;
268 return op;
269 }
AMD64RMI_Mem(AMD64AMode * am)270 AMD64RMI* AMD64RMI_Mem ( AMD64AMode* am ) {
271 AMD64RMI* op = LibVEX_Alloc_inline(sizeof(AMD64RMI));
272 op->tag = Armi_Mem;
273 op->Armi.Mem.am = am;
274 return op;
275 }
276
ppAMD64RMI_wrk(AMD64RMI * op,Bool lo32)277 static void ppAMD64RMI_wrk ( AMD64RMI* op, Bool lo32 ) {
278 switch (op->tag) {
279 case Armi_Imm:
280 vex_printf("$0x%x", op->Armi.Imm.imm32);
281 return;
282 case Armi_Reg:
283 if (lo32)
284 ppHRegAMD64_lo32(op->Armi.Reg.reg);
285 else
286 ppHRegAMD64(op->Armi.Reg.reg);
287 return;
288 case Armi_Mem:
289 ppAMD64AMode(op->Armi.Mem.am);
290 return;
291 default:
292 vpanic("ppAMD64RMI");
293 }
294 }
ppAMD64RMI(AMD64RMI * op)295 void ppAMD64RMI ( AMD64RMI* op ) {
296 ppAMD64RMI_wrk(op, False/*!lo32*/);
297 }
ppAMD64RMI_lo32(AMD64RMI * op)298 void ppAMD64RMI_lo32 ( AMD64RMI* op ) {
299 ppAMD64RMI_wrk(op, True/*lo32*/);
300 }
301
302 /* An AMD64RMI can only be used in a "read" context (what would it mean
303 to write or modify a literal?) and so we enumerate its registers
304 accordingly. */
addRegUsage_AMD64RMI(HRegUsage * u,AMD64RMI * op)305 static void addRegUsage_AMD64RMI ( HRegUsage* u, AMD64RMI* op ) {
306 switch (op->tag) {
307 case Armi_Imm:
308 return;
309 case Armi_Reg:
310 addHRegUse(u, HRmRead, op->Armi.Reg.reg);
311 return;
312 case Armi_Mem:
313 addRegUsage_AMD64AMode(u, op->Armi.Mem.am);
314 return;
315 default:
316 vpanic("addRegUsage_AMD64RMI");
317 }
318 }
319
mapRegs_AMD64RMI(HRegRemap * m,AMD64RMI * op)320 static void mapRegs_AMD64RMI ( HRegRemap* m, AMD64RMI* op ) {
321 switch (op->tag) {
322 case Armi_Imm:
323 return;
324 case Armi_Reg:
325 op->Armi.Reg.reg = lookupHRegRemap(m, op->Armi.Reg.reg);
326 return;
327 case Armi_Mem:
328 mapRegs_AMD64AMode(m, op->Armi.Mem.am);
329 return;
330 default:
331 vpanic("mapRegs_AMD64RMI");
332 }
333 }
334
335
336 /* --------- Operand, which can be reg or immediate only. --------- */
337
AMD64RI_Imm(UInt imm32)338 AMD64RI* AMD64RI_Imm ( UInt imm32 ) {
339 AMD64RI* op = LibVEX_Alloc_inline(sizeof(AMD64RI));
340 op->tag = Ari_Imm;
341 op->Ari.Imm.imm32 = imm32;
342 return op;
343 }
AMD64RI_Reg(HReg reg)344 AMD64RI* AMD64RI_Reg ( HReg reg ) {
345 AMD64RI* op = LibVEX_Alloc_inline(sizeof(AMD64RI));
346 op->tag = Ari_Reg;
347 op->Ari.Reg.reg = reg;
348 return op;
349 }
350
ppAMD64RI(AMD64RI * op)351 void ppAMD64RI ( AMD64RI* op ) {
352 switch (op->tag) {
353 case Ari_Imm:
354 vex_printf("$0x%x", op->Ari.Imm.imm32);
355 return;
356 case Ari_Reg:
357 ppHRegAMD64(op->Ari.Reg.reg);
358 return;
359 default:
360 vpanic("ppAMD64RI");
361 }
362 }
363
364 /* An AMD64RI can only be used in a "read" context (what would it mean
365 to write or modify a literal?) and so we enumerate its registers
366 accordingly. */
addRegUsage_AMD64RI(HRegUsage * u,AMD64RI * op)367 static void addRegUsage_AMD64RI ( HRegUsage* u, AMD64RI* op ) {
368 switch (op->tag) {
369 case Ari_Imm:
370 return;
371 case Ari_Reg:
372 addHRegUse(u, HRmRead, op->Ari.Reg.reg);
373 return;
374 default:
375 vpanic("addRegUsage_AMD64RI");
376 }
377 }
378
mapRegs_AMD64RI(HRegRemap * m,AMD64RI * op)379 static void mapRegs_AMD64RI ( HRegRemap* m, AMD64RI* op ) {
380 switch (op->tag) {
381 case Ari_Imm:
382 return;
383 case Ari_Reg:
384 op->Ari.Reg.reg = lookupHRegRemap(m, op->Ari.Reg.reg);
385 return;
386 default:
387 vpanic("mapRegs_AMD64RI");
388 }
389 }
390
391
392 /* --------- Operand, which can be reg or memory only. --------- */
393
AMD64RM_Reg(HReg reg)394 AMD64RM* AMD64RM_Reg ( HReg reg ) {
395 AMD64RM* op = LibVEX_Alloc_inline(sizeof(AMD64RM));
396 op->tag = Arm_Reg;
397 op->Arm.Reg.reg = reg;
398 return op;
399 }
AMD64RM_Mem(AMD64AMode * am)400 AMD64RM* AMD64RM_Mem ( AMD64AMode* am ) {
401 AMD64RM* op = LibVEX_Alloc_inline(sizeof(AMD64RM));
402 op->tag = Arm_Mem;
403 op->Arm.Mem.am = am;
404 return op;
405 }
406
ppAMD64RM(AMD64RM * op)407 void ppAMD64RM ( AMD64RM* op ) {
408 switch (op->tag) {
409 case Arm_Mem:
410 ppAMD64AMode(op->Arm.Mem.am);
411 return;
412 case Arm_Reg:
413 ppHRegAMD64(op->Arm.Reg.reg);
414 return;
415 default:
416 vpanic("ppAMD64RM");
417 }
418 }
419
420 /* Because an AMD64RM can be both a source or destination operand, we
421 have to supply a mode -- pertaining to the operand as a whole --
422 indicating how it's being used. */
addRegUsage_AMD64RM(HRegUsage * u,AMD64RM * op,HRegMode mode)423 static void addRegUsage_AMD64RM ( HRegUsage* u, AMD64RM* op, HRegMode mode ) {
424 switch (op->tag) {
425 case Arm_Mem:
426 /* Memory is read, written or modified. So we just want to
427 know the regs read by the amode. */
428 addRegUsage_AMD64AMode(u, op->Arm.Mem.am);
429 return;
430 case Arm_Reg:
431 /* reg is read, written or modified. Add it in the
432 appropriate way. */
433 addHRegUse(u, mode, op->Arm.Reg.reg);
434 return;
435 default:
436 vpanic("addRegUsage_AMD64RM");
437 }
438 }
439
mapRegs_AMD64RM(HRegRemap * m,AMD64RM * op)440 static void mapRegs_AMD64RM ( HRegRemap* m, AMD64RM* op )
441 {
442 switch (op->tag) {
443 case Arm_Mem:
444 mapRegs_AMD64AMode(m, op->Arm.Mem.am);
445 return;
446 case Arm_Reg:
447 op->Arm.Reg.reg = lookupHRegRemap(m, op->Arm.Reg.reg);
448 return;
449 default:
450 vpanic("mapRegs_AMD64RM");
451 }
452 }
453
454
455 /* --------- Instructions. --------- */
456
showAMD64ScalarSz(Int sz)457 static const HChar* showAMD64ScalarSz ( Int sz ) {
458 switch (sz) {
459 case 2: return "w";
460 case 4: return "l";
461 case 8: return "q";
462 default: vpanic("showAMD64ScalarSz");
463 }
464 }
465
showAMD64UnaryOp(AMD64UnaryOp op)466 const HChar* showAMD64UnaryOp ( AMD64UnaryOp op ) {
467 switch (op) {
468 case Aun_NOT: return "not";
469 case Aun_NEG: return "neg";
470 default: vpanic("showAMD64UnaryOp");
471 }
472 }
473
showAMD64AluOp(AMD64AluOp op)474 const HChar* showAMD64AluOp ( AMD64AluOp op ) {
475 switch (op) {
476 case Aalu_MOV: return "mov";
477 case Aalu_CMP: return "cmp";
478 case Aalu_ADD: return "add";
479 case Aalu_SUB: return "sub";
480 case Aalu_ADC: return "adc";
481 case Aalu_SBB: return "sbb";
482 case Aalu_AND: return "and";
483 case Aalu_OR: return "or";
484 case Aalu_XOR: return "xor";
485 case Aalu_MUL: return "imul";
486 default: vpanic("showAMD64AluOp");
487 }
488 }
489
showAMD64ShiftOp(AMD64ShiftOp op)490 const HChar* showAMD64ShiftOp ( AMD64ShiftOp op ) {
491 switch (op) {
492 case Ash_SHL: return "shl";
493 case Ash_SHR: return "shr";
494 case Ash_SAR: return "sar";
495 default: vpanic("showAMD64ShiftOp");
496 }
497 }
498
showA87FpOp(A87FpOp op)499 const HChar* showA87FpOp ( A87FpOp op ) {
500 switch (op) {
501 case Afp_SCALE: return "scale";
502 case Afp_ATAN: return "atan";
503 case Afp_YL2X: return "yl2x";
504 case Afp_YL2XP1: return "yl2xp1";
505 case Afp_PREM: return "prem";
506 case Afp_PREM1: return "prem1";
507 case Afp_SQRT: return "sqrt";
508 case Afp_SIN: return "sin";
509 case Afp_COS: return "cos";
510 case Afp_TAN: return "tan";
511 case Afp_ROUND: return "round";
512 case Afp_2XM1: return "2xm1";
513 default: vpanic("showA87FpOp");
514 }
515 }
516
showAMD64SseOp(AMD64SseOp op)517 const HChar* showAMD64SseOp ( AMD64SseOp op ) {
518 switch (op) {
519 case Asse_MOV: return "movups";
520 case Asse_ADDF: return "add";
521 case Asse_SUBF: return "sub";
522 case Asse_MULF: return "mul";
523 case Asse_DIVF: return "div";
524 case Asse_MAXF: return "max";
525 case Asse_MINF: return "min";
526 case Asse_CMPEQF: return "cmpFeq";
527 case Asse_CMPLTF: return "cmpFlt";
528 case Asse_CMPLEF: return "cmpFle";
529 case Asse_CMPUNF: return "cmpFun";
530 case Asse_RCPF: return "rcp";
531 case Asse_RSQRTF: return "rsqrt";
532 case Asse_SQRTF: return "sqrt";
533 case Asse_I2F: return "cvtdq2ps.";
534 case Asse_F2I: return "cvtps2dq.";
535 case Asse_AND: return "and";
536 case Asse_OR: return "or";
537 case Asse_XOR: return "xor";
538 case Asse_ANDN: return "andn";
539 case Asse_ADD8: return "paddb";
540 case Asse_ADD16: return "paddw";
541 case Asse_ADD32: return "paddd";
542 case Asse_ADD64: return "paddq";
543 case Asse_QADD8U: return "paddusb";
544 case Asse_QADD16U: return "paddusw";
545 case Asse_QADD8S: return "paddsb";
546 case Asse_QADD16S: return "paddsw";
547 case Asse_SUB8: return "psubb";
548 case Asse_SUB16: return "psubw";
549 case Asse_SUB32: return "psubd";
550 case Asse_SUB64: return "psubq";
551 case Asse_QSUB8U: return "psubusb";
552 case Asse_QSUB16U: return "psubusw";
553 case Asse_QSUB8S: return "psubsb";
554 case Asse_QSUB16S: return "psubsw";
555 case Asse_MUL16: return "pmullw";
556 case Asse_MULHI16U: return "pmulhuw";
557 case Asse_MULHI16S: return "pmulhw";
558 case Asse_AVG8U: return "pavgb";
559 case Asse_AVG16U: return "pavgw";
560 case Asse_MAX16S: return "pmaxw";
561 case Asse_MAX8U: return "pmaxub";
562 case Asse_MIN16S: return "pminw";
563 case Asse_MIN8U: return "pminub";
564 case Asse_CMPEQ8: return "pcmpeqb";
565 case Asse_CMPEQ16: return "pcmpeqw";
566 case Asse_CMPEQ32: return "pcmpeqd";
567 case Asse_CMPGT8S: return "pcmpgtb";
568 case Asse_CMPGT16S: return "pcmpgtw";
569 case Asse_CMPGT32S: return "pcmpgtd";
570 case Asse_SHL16: return "psllw";
571 case Asse_SHL32: return "pslld";
572 case Asse_SHL64: return "psllq";
573 case Asse_SHL128: return "pslldq";
574 case Asse_SHR16: return "psrlw";
575 case Asse_SHR32: return "psrld";
576 case Asse_SHR64: return "psrlq";
577 case Asse_SHR128: return "psrldq";
578 case Asse_SAR16: return "psraw";
579 case Asse_SAR32: return "psrad";
580 case Asse_PACKSSD: return "packssdw";
581 case Asse_PACKSSW: return "packsswb";
582 case Asse_PACKUSW: return "packuswb";
583 case Asse_UNPCKHB: return "punpckhb";
584 case Asse_UNPCKHW: return "punpckhw";
585 case Asse_UNPCKHD: return "punpckhd";
586 case Asse_UNPCKHQ: return "punpckhq";
587 case Asse_UNPCKLB: return "punpcklb";
588 case Asse_UNPCKLW: return "punpcklw";
589 case Asse_UNPCKLD: return "punpckld";
590 case Asse_UNPCKLQ: return "punpcklq";
591 case Asse_PSHUFB: return "pshufb";
592 default: vpanic("showAMD64SseOp");
593 }
594 }
595
AMD64Instr_Imm64(ULong imm64,HReg dst)596 AMD64Instr* AMD64Instr_Imm64 ( ULong imm64, HReg dst ) {
597 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
598 i->tag = Ain_Imm64;
599 i->Ain.Imm64.imm64 = imm64;
600 i->Ain.Imm64.dst = dst;
601 return i;
602 }
AMD64Instr_Alu64R(AMD64AluOp op,AMD64RMI * src,HReg dst)603 AMD64Instr* AMD64Instr_Alu64R ( AMD64AluOp op, AMD64RMI* src, HReg dst ) {
604 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
605 i->tag = Ain_Alu64R;
606 i->Ain.Alu64R.op = op;
607 i->Ain.Alu64R.src = src;
608 i->Ain.Alu64R.dst = dst;
609 return i;
610 }
AMD64Instr_Alu64M(AMD64AluOp op,AMD64RI * src,AMD64AMode * dst)611 AMD64Instr* AMD64Instr_Alu64M ( AMD64AluOp op, AMD64RI* src, AMD64AMode* dst ) {
612 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
613 i->tag = Ain_Alu64M;
614 i->Ain.Alu64M.op = op;
615 i->Ain.Alu64M.src = src;
616 i->Ain.Alu64M.dst = dst;
617 vassert(op != Aalu_MUL);
618 return i;
619 }
AMD64Instr_Sh64(AMD64ShiftOp op,UInt src,HReg dst)620 AMD64Instr* AMD64Instr_Sh64 ( AMD64ShiftOp op, UInt src, HReg dst ) {
621 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
622 i->tag = Ain_Sh64;
623 i->Ain.Sh64.op = op;
624 i->Ain.Sh64.src = src;
625 i->Ain.Sh64.dst = dst;
626 return i;
627 }
AMD64Instr_Test64(UInt imm32,HReg dst)628 AMD64Instr* AMD64Instr_Test64 ( UInt imm32, HReg dst ) {
629 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
630 i->tag = Ain_Test64;
631 i->Ain.Test64.imm32 = imm32;
632 i->Ain.Test64.dst = dst;
633 return i;
634 }
AMD64Instr_Unary64(AMD64UnaryOp op,HReg dst)635 AMD64Instr* AMD64Instr_Unary64 ( AMD64UnaryOp op, HReg dst ) {
636 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
637 i->tag = Ain_Unary64;
638 i->Ain.Unary64.op = op;
639 i->Ain.Unary64.dst = dst;
640 return i;
641 }
AMD64Instr_Lea64(AMD64AMode * am,HReg dst)642 AMD64Instr* AMD64Instr_Lea64 ( AMD64AMode* am, HReg dst ) {
643 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
644 i->tag = Ain_Lea64;
645 i->Ain.Lea64.am = am;
646 i->Ain.Lea64.dst = dst;
647 return i;
648 }
AMD64Instr_Alu32R(AMD64AluOp op,AMD64RMI * src,HReg dst)649 AMD64Instr* AMD64Instr_Alu32R ( AMD64AluOp op, AMD64RMI* src, HReg dst ) {
650 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
651 i->tag = Ain_Alu32R;
652 i->Ain.Alu32R.op = op;
653 i->Ain.Alu32R.src = src;
654 i->Ain.Alu32R.dst = dst;
655 switch (op) {
656 case Aalu_ADD: case Aalu_SUB: case Aalu_CMP:
657 case Aalu_AND: case Aalu_OR: case Aalu_XOR: break;
658 default: vassert(0);
659 }
660 return i;
661 }
AMD64Instr_MulL(Bool syned,AMD64RM * src)662 AMD64Instr* AMD64Instr_MulL ( Bool syned, AMD64RM* src ) {
663 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
664 i->tag = Ain_MulL;
665 i->Ain.MulL.syned = syned;
666 i->Ain.MulL.src = src;
667 return i;
668 }
AMD64Instr_Div(Bool syned,Int sz,AMD64RM * src)669 AMD64Instr* AMD64Instr_Div ( Bool syned, Int sz, AMD64RM* src ) {
670 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
671 i->tag = Ain_Div;
672 i->Ain.Div.syned = syned;
673 i->Ain.Div.sz = sz;
674 i->Ain.Div.src = src;
675 vassert(sz == 4 || sz == 8);
676 return i;
677 }
AMD64Instr_Push(AMD64RMI * src)678 AMD64Instr* AMD64Instr_Push( AMD64RMI* src ) {
679 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
680 i->tag = Ain_Push;
681 i->Ain.Push.src = src;
682 return i;
683 }
AMD64Instr_Call(AMD64CondCode cond,Addr64 target,Int regparms,RetLoc rloc)684 AMD64Instr* AMD64Instr_Call ( AMD64CondCode cond, Addr64 target, Int regparms,
685 RetLoc rloc ) {
686 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
687 i->tag = Ain_Call;
688 i->Ain.Call.cond = cond;
689 i->Ain.Call.target = target;
690 i->Ain.Call.regparms = regparms;
691 i->Ain.Call.rloc = rloc;
692 vassert(regparms >= 0 && regparms <= 6);
693 vassert(is_sane_RetLoc(rloc));
694 return i;
695 }
696
AMD64Instr_XDirect(Addr64 dstGA,AMD64AMode * amRIP,AMD64CondCode cond,Bool toFastEP)697 AMD64Instr* AMD64Instr_XDirect ( Addr64 dstGA, AMD64AMode* amRIP,
698 AMD64CondCode cond, Bool toFastEP ) {
699 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
700 i->tag = Ain_XDirect;
701 i->Ain.XDirect.dstGA = dstGA;
702 i->Ain.XDirect.amRIP = amRIP;
703 i->Ain.XDirect.cond = cond;
704 i->Ain.XDirect.toFastEP = toFastEP;
705 return i;
706 }
AMD64Instr_XIndir(HReg dstGA,AMD64AMode * amRIP,AMD64CondCode cond)707 AMD64Instr* AMD64Instr_XIndir ( HReg dstGA, AMD64AMode* amRIP,
708 AMD64CondCode cond ) {
709 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
710 i->tag = Ain_XIndir;
711 i->Ain.XIndir.dstGA = dstGA;
712 i->Ain.XIndir.amRIP = amRIP;
713 i->Ain.XIndir.cond = cond;
714 return i;
715 }
AMD64Instr_XAssisted(HReg dstGA,AMD64AMode * amRIP,AMD64CondCode cond,IRJumpKind jk)716 AMD64Instr* AMD64Instr_XAssisted ( HReg dstGA, AMD64AMode* amRIP,
717 AMD64CondCode cond, IRJumpKind jk ) {
718 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
719 i->tag = Ain_XAssisted;
720 i->Ain.XAssisted.dstGA = dstGA;
721 i->Ain.XAssisted.amRIP = amRIP;
722 i->Ain.XAssisted.cond = cond;
723 i->Ain.XAssisted.jk = jk;
724 return i;
725 }
726
AMD64Instr_CMov64(AMD64CondCode cond,HReg src,HReg dst)727 AMD64Instr* AMD64Instr_CMov64 ( AMD64CondCode cond, HReg src, HReg dst ) {
728 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
729 i->tag = Ain_CMov64;
730 i->Ain.CMov64.cond = cond;
731 i->Ain.CMov64.src = src;
732 i->Ain.CMov64.dst = dst;
733 vassert(cond != Acc_ALWAYS);
734 return i;
735 }
AMD64Instr_CLoad(AMD64CondCode cond,UChar szB,AMD64AMode * addr,HReg dst)736 AMD64Instr* AMD64Instr_CLoad ( AMD64CondCode cond, UChar szB,
737 AMD64AMode* addr, HReg dst ) {
738 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
739 i->tag = Ain_CLoad;
740 i->Ain.CLoad.cond = cond;
741 i->Ain.CLoad.szB = szB;
742 i->Ain.CLoad.addr = addr;
743 i->Ain.CLoad.dst = dst;
744 vassert(cond != Acc_ALWAYS && (szB == 4 || szB == 8));
745 return i;
746 }
AMD64Instr_CStore(AMD64CondCode cond,UChar szB,HReg src,AMD64AMode * addr)747 AMD64Instr* AMD64Instr_CStore ( AMD64CondCode cond, UChar szB,
748 HReg src, AMD64AMode* addr ) {
749 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
750 i->tag = Ain_CStore;
751 i->Ain.CStore.cond = cond;
752 i->Ain.CStore.szB = szB;
753 i->Ain.CStore.src = src;
754 i->Ain.CStore.addr = addr;
755 vassert(cond != Acc_ALWAYS && (szB == 4 || szB == 8));
756 return i;
757 }
AMD64Instr_MovxLQ(Bool syned,HReg src,HReg dst)758 AMD64Instr* AMD64Instr_MovxLQ ( Bool syned, HReg src, HReg dst ) {
759 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
760 i->tag = Ain_MovxLQ;
761 i->Ain.MovxLQ.syned = syned;
762 i->Ain.MovxLQ.src = src;
763 i->Ain.MovxLQ.dst = dst;
764 return i;
765 }
AMD64Instr_LoadEX(UChar szSmall,Bool syned,AMD64AMode * src,HReg dst)766 AMD64Instr* AMD64Instr_LoadEX ( UChar szSmall, Bool syned,
767 AMD64AMode* src, HReg dst ) {
768 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
769 i->tag = Ain_LoadEX;
770 i->Ain.LoadEX.szSmall = szSmall;
771 i->Ain.LoadEX.syned = syned;
772 i->Ain.LoadEX.src = src;
773 i->Ain.LoadEX.dst = dst;
774 vassert(szSmall == 1 || szSmall == 2 || szSmall == 4);
775 return i;
776 }
AMD64Instr_Store(UChar sz,HReg src,AMD64AMode * dst)777 AMD64Instr* AMD64Instr_Store ( UChar sz, HReg src, AMD64AMode* dst ) {
778 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
779 i->tag = Ain_Store;
780 i->Ain.Store.sz = sz;
781 i->Ain.Store.src = src;
782 i->Ain.Store.dst = dst;
783 vassert(sz == 1 || sz == 2 || sz == 4);
784 return i;
785 }
AMD64Instr_Set64(AMD64CondCode cond,HReg dst)786 AMD64Instr* AMD64Instr_Set64 ( AMD64CondCode cond, HReg dst ) {
787 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
788 i->tag = Ain_Set64;
789 i->Ain.Set64.cond = cond;
790 i->Ain.Set64.dst = dst;
791 return i;
792 }
AMD64Instr_Bsfr64(Bool isFwds,HReg src,HReg dst)793 AMD64Instr* AMD64Instr_Bsfr64 ( Bool isFwds, HReg src, HReg dst ) {
794 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
795 i->tag = Ain_Bsfr64;
796 i->Ain.Bsfr64.isFwds = isFwds;
797 i->Ain.Bsfr64.src = src;
798 i->Ain.Bsfr64.dst = dst;
799 return i;
800 }
AMD64Instr_MFence(void)801 AMD64Instr* AMD64Instr_MFence ( void ) {
802 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
803 i->tag = Ain_MFence;
804 return i;
805 }
AMD64Instr_ACAS(AMD64AMode * addr,UChar sz)806 AMD64Instr* AMD64Instr_ACAS ( AMD64AMode* addr, UChar sz ) {
807 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
808 i->tag = Ain_ACAS;
809 i->Ain.ACAS.addr = addr;
810 i->Ain.ACAS.sz = sz;
811 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1);
812 return i;
813 }
AMD64Instr_DACAS(AMD64AMode * addr,UChar sz)814 AMD64Instr* AMD64Instr_DACAS ( AMD64AMode* addr, UChar sz ) {
815 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
816 i->tag = Ain_DACAS;
817 i->Ain.DACAS.addr = addr;
818 i->Ain.DACAS.sz = sz;
819 vassert(sz == 8 || sz == 4);
820 return i;
821 }
822
AMD64Instr_A87Free(Int nregs)823 AMD64Instr* AMD64Instr_A87Free ( Int nregs )
824 {
825 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
826 i->tag = Ain_A87Free;
827 i->Ain.A87Free.nregs = nregs;
828 vassert(nregs >= 1 && nregs <= 7);
829 return i;
830 }
AMD64Instr_A87PushPop(AMD64AMode * addr,Bool isPush,UChar szB)831 AMD64Instr* AMD64Instr_A87PushPop ( AMD64AMode* addr, Bool isPush, UChar szB )
832 {
833 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
834 i->tag = Ain_A87PushPop;
835 i->Ain.A87PushPop.addr = addr;
836 i->Ain.A87PushPop.isPush = isPush;
837 i->Ain.A87PushPop.szB = szB;
838 vassert(szB == 8 || szB == 4);
839 return i;
840 }
AMD64Instr_A87FpOp(A87FpOp op)841 AMD64Instr* AMD64Instr_A87FpOp ( A87FpOp op )
842 {
843 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
844 i->tag = Ain_A87FpOp;
845 i->Ain.A87FpOp.op = op;
846 return i;
847 }
AMD64Instr_A87LdCW(AMD64AMode * addr)848 AMD64Instr* AMD64Instr_A87LdCW ( AMD64AMode* addr )
849 {
850 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
851 i->tag = Ain_A87LdCW;
852 i->Ain.A87LdCW.addr = addr;
853 return i;
854 }
AMD64Instr_A87StSW(AMD64AMode * addr)855 AMD64Instr* AMD64Instr_A87StSW ( AMD64AMode* addr )
856 {
857 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
858 i->tag = Ain_A87StSW;
859 i->Ain.A87StSW.addr = addr;
860 return i;
861 }
AMD64Instr_LdMXCSR(AMD64AMode * addr)862 AMD64Instr* AMD64Instr_LdMXCSR ( AMD64AMode* addr ) {
863 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
864 i->tag = Ain_LdMXCSR;
865 i->Ain.LdMXCSR.addr = addr;
866 return i;
867 }
AMD64Instr_SseUComIS(Int sz,HReg srcL,HReg srcR,HReg dst)868 AMD64Instr* AMD64Instr_SseUComIS ( Int sz, HReg srcL, HReg srcR, HReg dst ) {
869 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
870 i->tag = Ain_SseUComIS;
871 i->Ain.SseUComIS.sz = toUChar(sz);
872 i->Ain.SseUComIS.srcL = srcL;
873 i->Ain.SseUComIS.srcR = srcR;
874 i->Ain.SseUComIS.dst = dst;
875 vassert(sz == 4 || sz == 8);
876 return i;
877 }
AMD64Instr_SseSI2SF(Int szS,Int szD,HReg src,HReg dst)878 AMD64Instr* AMD64Instr_SseSI2SF ( Int szS, Int szD, HReg src, HReg dst ) {
879 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
880 i->tag = Ain_SseSI2SF;
881 i->Ain.SseSI2SF.szS = toUChar(szS);
882 i->Ain.SseSI2SF.szD = toUChar(szD);
883 i->Ain.SseSI2SF.src = src;
884 i->Ain.SseSI2SF.dst = dst;
885 vassert(szS == 4 || szS == 8);
886 vassert(szD == 4 || szD == 8);
887 return i;
888 }
AMD64Instr_SseSF2SI(Int szS,Int szD,HReg src,HReg dst)889 AMD64Instr* AMD64Instr_SseSF2SI ( Int szS, Int szD, HReg src, HReg dst ) {
890 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
891 i->tag = Ain_SseSF2SI;
892 i->Ain.SseSF2SI.szS = toUChar(szS);
893 i->Ain.SseSF2SI.szD = toUChar(szD);
894 i->Ain.SseSF2SI.src = src;
895 i->Ain.SseSF2SI.dst = dst;
896 vassert(szS == 4 || szS == 8);
897 vassert(szD == 4 || szD == 8);
898 return i;
899 }
AMD64Instr_SseSDSS(Bool from64,HReg src,HReg dst)900 AMD64Instr* AMD64Instr_SseSDSS ( Bool from64, HReg src, HReg dst )
901 {
902 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
903 i->tag = Ain_SseSDSS;
904 i->Ain.SseSDSS.from64 = from64;
905 i->Ain.SseSDSS.src = src;
906 i->Ain.SseSDSS.dst = dst;
907 return i;
908 }
AMD64Instr_SseLdSt(Bool isLoad,Int sz,HReg reg,AMD64AMode * addr)909 AMD64Instr* AMD64Instr_SseLdSt ( Bool isLoad, Int sz,
910 HReg reg, AMD64AMode* addr ) {
911 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
912 i->tag = Ain_SseLdSt;
913 i->Ain.SseLdSt.isLoad = isLoad;
914 i->Ain.SseLdSt.sz = toUChar(sz);
915 i->Ain.SseLdSt.reg = reg;
916 i->Ain.SseLdSt.addr = addr;
917 vassert(sz == 4 || sz == 8 || sz == 16);
918 return i;
919 }
AMD64Instr_SseCStore(AMD64CondCode cond,HReg src,AMD64AMode * addr)920 AMD64Instr* AMD64Instr_SseCStore ( AMD64CondCode cond,
921 HReg src, AMD64AMode* addr )
922 {
923 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
924 i->tag = Ain_SseCStore;
925 i->Ain.SseCStore.cond = cond;
926 i->Ain.SseCStore.src = src;
927 i->Ain.SseCStore.addr = addr;
928 vassert(cond != Acc_ALWAYS);
929 return i;
930 }
AMD64Instr_SseCLoad(AMD64CondCode cond,AMD64AMode * addr,HReg dst)931 AMD64Instr* AMD64Instr_SseCLoad ( AMD64CondCode cond,
932 AMD64AMode* addr, HReg dst )
933 {
934 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
935 i->tag = Ain_SseCLoad;
936 i->Ain.SseCLoad.cond = cond;
937 i->Ain.SseCLoad.addr = addr;
938 i->Ain.SseCLoad.dst = dst;
939 vassert(cond != Acc_ALWAYS);
940 return i;
941 }
AMD64Instr_SseLdzLO(Int sz,HReg reg,AMD64AMode * addr)942 AMD64Instr* AMD64Instr_SseLdzLO ( Int sz, HReg reg, AMD64AMode* addr )
943 {
944 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
945 i->tag = Ain_SseLdzLO;
946 i->Ain.SseLdzLO.sz = sz;
947 i->Ain.SseLdzLO.reg = reg;
948 i->Ain.SseLdzLO.addr = addr;
949 vassert(sz == 4 || sz == 8);
950 return i;
951 }
AMD64Instr_Sse32Fx4(AMD64SseOp op,HReg src,HReg dst)952 AMD64Instr* AMD64Instr_Sse32Fx4 ( AMD64SseOp op, HReg src, HReg dst ) {
953 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
954 i->tag = Ain_Sse32Fx4;
955 i->Ain.Sse32Fx4.op = op;
956 i->Ain.Sse32Fx4.src = src;
957 i->Ain.Sse32Fx4.dst = dst;
958 vassert(op != Asse_MOV);
959 return i;
960 }
AMD64Instr_Sse32FLo(AMD64SseOp op,HReg src,HReg dst)961 AMD64Instr* AMD64Instr_Sse32FLo ( AMD64SseOp op, HReg src, HReg dst ) {
962 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
963 i->tag = Ain_Sse32FLo;
964 i->Ain.Sse32FLo.op = op;
965 i->Ain.Sse32FLo.src = src;
966 i->Ain.Sse32FLo.dst = dst;
967 vassert(op != Asse_MOV);
968 return i;
969 }
AMD64Instr_Sse64Fx2(AMD64SseOp op,HReg src,HReg dst)970 AMD64Instr* AMD64Instr_Sse64Fx2 ( AMD64SseOp op, HReg src, HReg dst ) {
971 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
972 i->tag = Ain_Sse64Fx2;
973 i->Ain.Sse64Fx2.op = op;
974 i->Ain.Sse64Fx2.src = src;
975 i->Ain.Sse64Fx2.dst = dst;
976 vassert(op != Asse_MOV);
977 return i;
978 }
AMD64Instr_Sse64FLo(AMD64SseOp op,HReg src,HReg dst)979 AMD64Instr* AMD64Instr_Sse64FLo ( AMD64SseOp op, HReg src, HReg dst ) {
980 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
981 i->tag = Ain_Sse64FLo;
982 i->Ain.Sse64FLo.op = op;
983 i->Ain.Sse64FLo.src = src;
984 i->Ain.Sse64FLo.dst = dst;
985 vassert(op != Asse_MOV);
986 return i;
987 }
AMD64Instr_SseReRg(AMD64SseOp op,HReg re,HReg rg)988 AMD64Instr* AMD64Instr_SseReRg ( AMD64SseOp op, HReg re, HReg rg ) {
989 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
990 i->tag = Ain_SseReRg;
991 i->Ain.SseReRg.op = op;
992 i->Ain.SseReRg.src = re;
993 i->Ain.SseReRg.dst = rg;
994 return i;
995 }
AMD64Instr_SseCMov(AMD64CondCode cond,HReg src,HReg dst)996 AMD64Instr* AMD64Instr_SseCMov ( AMD64CondCode cond, HReg src, HReg dst ) {
997 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
998 i->tag = Ain_SseCMov;
999 i->Ain.SseCMov.cond = cond;
1000 i->Ain.SseCMov.src = src;
1001 i->Ain.SseCMov.dst = dst;
1002 vassert(cond != Acc_ALWAYS);
1003 return i;
1004 }
AMD64Instr_SseShuf(Int order,HReg src,HReg dst)1005 AMD64Instr* AMD64Instr_SseShuf ( Int order, HReg src, HReg dst ) {
1006 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
1007 i->tag = Ain_SseShuf;
1008 i->Ain.SseShuf.order = order;
1009 i->Ain.SseShuf.src = src;
1010 i->Ain.SseShuf.dst = dst;
1011 vassert(order >= 0 && order <= 0xFF);
1012 return i;
1013 }
AMD64Instr_SseShiftN(AMD64SseOp op,UInt shiftBits,HReg dst)1014 AMD64Instr* AMD64Instr_SseShiftN ( AMD64SseOp op,
1015 UInt shiftBits, HReg dst ) {
1016 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
1017 i->tag = Ain_SseShiftN;
1018 i->Ain.SseShiftN.op = op;
1019 i->Ain.SseShiftN.shiftBits = shiftBits;
1020 i->Ain.SseShiftN.dst = dst;
1021 return i;
1022 }
AMD64Instr_SseMOVQ(HReg gpr,HReg xmm,Bool toXMM)1023 AMD64Instr* AMD64Instr_SseMOVQ ( HReg gpr, HReg xmm, Bool toXMM ) {
1024 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
1025 i->tag = Ain_SseMOVQ;
1026 i->Ain.SseMOVQ.gpr = gpr;
1027 i->Ain.SseMOVQ.xmm = xmm;
1028 i->Ain.SseMOVQ.toXMM = toXMM;
1029 vassert(hregClass(gpr) == HRcInt64);
1030 vassert(hregClass(xmm) == HRcVec128);
1031 return i;
1032 }
1033 //uu AMD64Instr* AMD64Instr_AvxLdSt ( Bool isLoad,
1034 //uu HReg reg, AMD64AMode* addr ) {
1035 //uu AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
1036 //uu i->tag = Ain_AvxLdSt;
1037 //uu i->Ain.AvxLdSt.isLoad = isLoad;
1038 //uu i->Ain.AvxLdSt.reg = reg;
1039 //uu i->Ain.AvxLdSt.addr = addr;
1040 //uu return i;
1041 //uu }
1042 //uu AMD64Instr* AMD64Instr_AvxReRg ( AMD64SseOp op, HReg re, HReg rg ) {
1043 //uu AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
1044 //uu i->tag = Ain_AvxReRg;
1045 //uu i->Ain.AvxReRg.op = op;
1046 //uu i->Ain.AvxReRg.src = re;
1047 //uu i->Ain.AvxReRg.dst = rg;
1048 //uu return i;
1049 //uu }
AMD64Instr_EvCheck(AMD64AMode * amCounter,AMD64AMode * amFailAddr)1050 AMD64Instr* AMD64Instr_EvCheck ( AMD64AMode* amCounter,
1051 AMD64AMode* amFailAddr ) {
1052 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
1053 i->tag = Ain_EvCheck;
1054 i->Ain.EvCheck.amCounter = amCounter;
1055 i->Ain.EvCheck.amFailAddr = amFailAddr;
1056 return i;
1057 }
AMD64Instr_ProfInc(void)1058 AMD64Instr* AMD64Instr_ProfInc ( void ) {
1059 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
1060 i->tag = Ain_ProfInc;
1061 return i;
1062 }
1063
ppAMD64Instr(const AMD64Instr * i,Bool mode64)1064 void ppAMD64Instr ( const AMD64Instr* i, Bool mode64 )
1065 {
1066 vassert(mode64 == True);
1067 switch (i->tag) {
1068 case Ain_Imm64:
1069 vex_printf("movabsq $0x%llx,", i->Ain.Imm64.imm64);
1070 ppHRegAMD64(i->Ain.Imm64.dst);
1071 return;
1072 case Ain_Alu64R:
1073 vex_printf("%sq ", showAMD64AluOp(i->Ain.Alu64R.op));
1074 ppAMD64RMI(i->Ain.Alu64R.src);
1075 vex_printf(",");
1076 ppHRegAMD64(i->Ain.Alu64R.dst);
1077 return;
1078 case Ain_Alu64M:
1079 vex_printf("%sq ", showAMD64AluOp(i->Ain.Alu64M.op));
1080 ppAMD64RI(i->Ain.Alu64M.src);
1081 vex_printf(",");
1082 ppAMD64AMode(i->Ain.Alu64M.dst);
1083 return;
1084 case Ain_Sh64:
1085 vex_printf("%sq ", showAMD64ShiftOp(i->Ain.Sh64.op));
1086 if (i->Ain.Sh64.src == 0)
1087 vex_printf("%%cl,");
1088 else
1089 vex_printf("$%d,", (Int)i->Ain.Sh64.src);
1090 ppHRegAMD64(i->Ain.Sh64.dst);
1091 return;
1092 case Ain_Test64:
1093 vex_printf("testq $%d,", (Int)i->Ain.Test64.imm32);
1094 ppHRegAMD64(i->Ain.Test64.dst);
1095 return;
1096 case Ain_Unary64:
1097 vex_printf("%sq ", showAMD64UnaryOp(i->Ain.Unary64.op));
1098 ppHRegAMD64(i->Ain.Unary64.dst);
1099 return;
1100 case Ain_Lea64:
1101 vex_printf("leaq ");
1102 ppAMD64AMode(i->Ain.Lea64.am);
1103 vex_printf(",");
1104 ppHRegAMD64(i->Ain.Lea64.dst);
1105 return;
1106 case Ain_Alu32R:
1107 vex_printf("%sl ", showAMD64AluOp(i->Ain.Alu32R.op));
1108 ppAMD64RMI_lo32(i->Ain.Alu32R.src);
1109 vex_printf(",");
1110 ppHRegAMD64_lo32(i->Ain.Alu32R.dst);
1111 return;
1112 case Ain_MulL:
1113 vex_printf("%cmulq ", i->Ain.MulL.syned ? 's' : 'u');
1114 ppAMD64RM(i->Ain.MulL.src);
1115 return;
1116 case Ain_Div:
1117 vex_printf("%cdiv%s ",
1118 i->Ain.Div.syned ? 's' : 'u',
1119 showAMD64ScalarSz(i->Ain.Div.sz));
1120 ppAMD64RM(i->Ain.Div.src);
1121 return;
1122 case Ain_Push:
1123 vex_printf("pushq ");
1124 ppAMD64RMI(i->Ain.Push.src);
1125 return;
1126 case Ain_Call:
1127 vex_printf("call%s[%d,",
1128 i->Ain.Call.cond==Acc_ALWAYS
1129 ? "" : showAMD64CondCode(i->Ain.Call.cond),
1130 i->Ain.Call.regparms );
1131 ppRetLoc(i->Ain.Call.rloc);
1132 vex_printf("] 0x%llx", i->Ain.Call.target);
1133 break;
1134
1135 case Ain_XDirect:
1136 vex_printf("(xDirect) ");
1137 vex_printf("if (%%rflags.%s) { ",
1138 showAMD64CondCode(i->Ain.XDirect.cond));
1139 vex_printf("movabsq $0x%llx,%%r11; ", i->Ain.XDirect.dstGA);
1140 vex_printf("movq %%r11,");
1141 ppAMD64AMode(i->Ain.XDirect.amRIP);
1142 vex_printf("; ");
1143 vex_printf("movabsq $disp_cp_chain_me_to_%sEP,%%r11; call *%%r11 }",
1144 i->Ain.XDirect.toFastEP ? "fast" : "slow");
1145 return;
1146 case Ain_XIndir:
1147 vex_printf("(xIndir) ");
1148 vex_printf("if (%%rflags.%s) { ",
1149 showAMD64CondCode(i->Ain.XIndir.cond));
1150 vex_printf("movq ");
1151 ppHRegAMD64(i->Ain.XIndir.dstGA);
1152 vex_printf(",");
1153 ppAMD64AMode(i->Ain.XIndir.amRIP);
1154 vex_printf("; movabsq $disp_indir,%%r11; jmp *%%r11 }");
1155 return;
1156 case Ain_XAssisted:
1157 vex_printf("(xAssisted) ");
1158 vex_printf("if (%%rflags.%s) { ",
1159 showAMD64CondCode(i->Ain.XAssisted.cond));
1160 vex_printf("movq ");
1161 ppHRegAMD64(i->Ain.XAssisted.dstGA);
1162 vex_printf(",");
1163 ppAMD64AMode(i->Ain.XAssisted.amRIP);
1164 vex_printf("; movl $IRJumpKind_to_TRCVAL(%d),%%rbp",
1165 (Int)i->Ain.XAssisted.jk);
1166 vex_printf("; movabsq $disp_assisted,%%r11; jmp *%%r11 }");
1167 return;
1168
1169 case Ain_CMov64:
1170 vex_printf("cmov%s ", showAMD64CondCode(i->Ain.CMov64.cond));
1171 ppHRegAMD64(i->Ain.CMov64.src);
1172 vex_printf(",");
1173 ppHRegAMD64(i->Ain.CMov64.dst);
1174 return;
1175 case Ain_CLoad:
1176 vex_printf("if (%%rflags.%s) { ",
1177 showAMD64CondCode(i->Ain.CLoad.cond));
1178 vex_printf("mov%c ", i->Ain.CLoad.szB == 4 ? 'l' : 'q');
1179 ppAMD64AMode(i->Ain.CLoad.addr);
1180 vex_printf(", ");
1181 (i->Ain.CLoad.szB == 4 ? ppHRegAMD64_lo32 : ppHRegAMD64)
1182 (i->Ain.CLoad.dst);
1183 vex_printf(" }");
1184 return;
1185 case Ain_CStore:
1186 vex_printf("if (%%rflags.%s) { ",
1187 showAMD64CondCode(i->Ain.CStore.cond));
1188 vex_printf("mov%c ", i->Ain.CStore.szB == 4 ? 'l' : 'q');
1189 (i->Ain.CStore.szB == 4 ? ppHRegAMD64_lo32 : ppHRegAMD64)
1190 (i->Ain.CStore.src);
1191 vex_printf(", ");
1192 ppAMD64AMode(i->Ain.CStore.addr);
1193 vex_printf(" }");
1194 return;
1195
1196 case Ain_MovxLQ:
1197 vex_printf("mov%clq ", i->Ain.MovxLQ.syned ? 's' : 'z');
1198 ppHRegAMD64_lo32(i->Ain.MovxLQ.src);
1199 vex_printf(",");
1200 ppHRegAMD64(i->Ain.MovxLQ.dst);
1201 return;
1202 case Ain_LoadEX:
1203 if (i->Ain.LoadEX.szSmall==4 && !i->Ain.LoadEX.syned) {
1204 vex_printf("movl ");
1205 ppAMD64AMode(i->Ain.LoadEX.src);
1206 vex_printf(",");
1207 ppHRegAMD64_lo32(i->Ain.LoadEX.dst);
1208 } else {
1209 vex_printf("mov%c%cq ",
1210 i->Ain.LoadEX.syned ? 's' : 'z',
1211 i->Ain.LoadEX.szSmall==1
1212 ? 'b'
1213 : (i->Ain.LoadEX.szSmall==2 ? 'w' : 'l'));
1214 ppAMD64AMode(i->Ain.LoadEX.src);
1215 vex_printf(",");
1216 ppHRegAMD64(i->Ain.LoadEX.dst);
1217 }
1218 return;
1219 case Ain_Store:
1220 vex_printf("mov%c ", i->Ain.Store.sz==1 ? 'b'
1221 : (i->Ain.Store.sz==2 ? 'w' : 'l'));
1222 ppHRegAMD64(i->Ain.Store.src);
1223 vex_printf(",");
1224 ppAMD64AMode(i->Ain.Store.dst);
1225 return;
1226 case Ain_Set64:
1227 vex_printf("setq%s ", showAMD64CondCode(i->Ain.Set64.cond));
1228 ppHRegAMD64(i->Ain.Set64.dst);
1229 return;
1230 case Ain_Bsfr64:
1231 vex_printf("bs%cq ", i->Ain.Bsfr64.isFwds ? 'f' : 'r');
1232 ppHRegAMD64(i->Ain.Bsfr64.src);
1233 vex_printf(",");
1234 ppHRegAMD64(i->Ain.Bsfr64.dst);
1235 return;
1236 case Ain_MFence:
1237 vex_printf("mfence" );
1238 return;
1239 case Ain_ACAS:
1240 vex_printf("lock cmpxchg%c ",
1241 i->Ain.ACAS.sz==1 ? 'b' : i->Ain.ACAS.sz==2 ? 'w'
1242 : i->Ain.ACAS.sz==4 ? 'l' : 'q' );
1243 vex_printf("{%%rax->%%rbx},");
1244 ppAMD64AMode(i->Ain.ACAS.addr);
1245 return;
1246 case Ain_DACAS:
1247 vex_printf("lock cmpxchg%db {%%rdx:%%rax->%%rcx:%%rbx},",
1248 (Int)(2 * i->Ain.DACAS.sz));
1249 ppAMD64AMode(i->Ain.DACAS.addr);
1250 return;
1251 case Ain_A87Free:
1252 vex_printf("ffree %%st(7..%d)", 8 - i->Ain.A87Free.nregs );
1253 break;
1254 case Ain_A87PushPop:
1255 vex_printf(i->Ain.A87PushPop.isPush ? "fld%c " : "fstp%c ",
1256 i->Ain.A87PushPop.szB == 4 ? 's' : 'l');
1257 ppAMD64AMode(i->Ain.A87PushPop.addr);
1258 break;
1259 case Ain_A87FpOp:
1260 vex_printf("f%s", showA87FpOp(i->Ain.A87FpOp.op));
1261 break;
1262 case Ain_A87LdCW:
1263 vex_printf("fldcw ");
1264 ppAMD64AMode(i->Ain.A87LdCW.addr);
1265 break;
1266 case Ain_A87StSW:
1267 vex_printf("fstsw ");
1268 ppAMD64AMode(i->Ain.A87StSW.addr);
1269 break;
1270 case Ain_LdMXCSR:
1271 vex_printf("ldmxcsr ");
1272 ppAMD64AMode(i->Ain.LdMXCSR.addr);
1273 break;
1274 case Ain_SseUComIS:
1275 vex_printf("ucomis%s ", i->Ain.SseUComIS.sz==4 ? "s" : "d");
1276 ppHRegAMD64(i->Ain.SseUComIS.srcL);
1277 vex_printf(",");
1278 ppHRegAMD64(i->Ain.SseUComIS.srcR);
1279 vex_printf(" ; pushfq ; popq ");
1280 ppHRegAMD64(i->Ain.SseUComIS.dst);
1281 break;
1282 case Ain_SseSI2SF:
1283 vex_printf("cvtsi2s%s ", i->Ain.SseSI2SF.szD==4 ? "s" : "d");
1284 (i->Ain.SseSI2SF.szS==4 ? ppHRegAMD64_lo32 : ppHRegAMD64)
1285 (i->Ain.SseSI2SF.src);
1286 vex_printf(",");
1287 ppHRegAMD64(i->Ain.SseSI2SF.dst);
1288 break;
1289 case Ain_SseSF2SI:
1290 vex_printf("cvts%s2si ", i->Ain.SseSF2SI.szS==4 ? "s" : "d");
1291 ppHRegAMD64(i->Ain.SseSF2SI.src);
1292 vex_printf(",");
1293 (i->Ain.SseSF2SI.szD==4 ? ppHRegAMD64_lo32 : ppHRegAMD64)
1294 (i->Ain.SseSF2SI.dst);
1295 break;
1296 case Ain_SseSDSS:
1297 vex_printf(i->Ain.SseSDSS.from64 ? "cvtsd2ss " : "cvtss2sd ");
1298 ppHRegAMD64(i->Ain.SseSDSS.src);
1299 vex_printf(",");
1300 ppHRegAMD64(i->Ain.SseSDSS.dst);
1301 break;
1302 case Ain_SseLdSt:
1303 switch (i->Ain.SseLdSt.sz) {
1304 case 4: vex_printf("movss "); break;
1305 case 8: vex_printf("movsd "); break;
1306 case 16: vex_printf("movups "); break;
1307 default: vassert(0);
1308 }
1309 if (i->Ain.SseLdSt.isLoad) {
1310 ppAMD64AMode(i->Ain.SseLdSt.addr);
1311 vex_printf(",");
1312 ppHRegAMD64(i->Ain.SseLdSt.reg);
1313 } else {
1314 ppHRegAMD64(i->Ain.SseLdSt.reg);
1315 vex_printf(",");
1316 ppAMD64AMode(i->Ain.SseLdSt.addr);
1317 }
1318 return;
1319 case Ain_SseCStore:
1320 vex_printf("if (%%rflags.%s) { ",
1321 showAMD64CondCode(i->Ain.SseCStore.cond));
1322 vex_printf("movups ");
1323 ppHRegAMD64(i->Ain.SseCStore.src);
1324 vex_printf(", ");
1325 ppAMD64AMode(i->Ain.SseCStore.addr);
1326 vex_printf(" }");
1327 return;
1328 case Ain_SseCLoad:
1329 vex_printf("if (%%rflags.%s) { ",
1330 showAMD64CondCode(i->Ain.SseCLoad.cond));
1331 vex_printf("movups ");
1332 ppAMD64AMode(i->Ain.SseCLoad.addr);
1333 vex_printf(", ");
1334 ppHRegAMD64(i->Ain.SseCLoad.dst);
1335 vex_printf(" }");
1336 return;
1337 case Ain_SseLdzLO:
1338 vex_printf("movs%s ", i->Ain.SseLdzLO.sz==4 ? "s" : "d");
1339 ppAMD64AMode(i->Ain.SseLdzLO.addr);
1340 vex_printf(",");
1341 ppHRegAMD64(i->Ain.SseLdzLO.reg);
1342 return;
1343 case Ain_Sse32Fx4:
1344 vex_printf("%sps ", showAMD64SseOp(i->Ain.Sse32Fx4.op));
1345 ppHRegAMD64(i->Ain.Sse32Fx4.src);
1346 vex_printf(",");
1347 ppHRegAMD64(i->Ain.Sse32Fx4.dst);
1348 return;
1349 case Ain_Sse32FLo:
1350 vex_printf("%sss ", showAMD64SseOp(i->Ain.Sse32FLo.op));
1351 ppHRegAMD64(i->Ain.Sse32FLo.src);
1352 vex_printf(",");
1353 ppHRegAMD64(i->Ain.Sse32FLo.dst);
1354 return;
1355 case Ain_Sse64Fx2:
1356 vex_printf("%spd ", showAMD64SseOp(i->Ain.Sse64Fx2.op));
1357 ppHRegAMD64(i->Ain.Sse64Fx2.src);
1358 vex_printf(",");
1359 ppHRegAMD64(i->Ain.Sse64Fx2.dst);
1360 return;
1361 case Ain_Sse64FLo:
1362 vex_printf("%ssd ", showAMD64SseOp(i->Ain.Sse64FLo.op));
1363 ppHRegAMD64(i->Ain.Sse64FLo.src);
1364 vex_printf(",");
1365 ppHRegAMD64(i->Ain.Sse64FLo.dst);
1366 return;
1367 case Ain_SseReRg:
1368 vex_printf("%s ", showAMD64SseOp(i->Ain.SseReRg.op));
1369 ppHRegAMD64(i->Ain.SseReRg.src);
1370 vex_printf(",");
1371 ppHRegAMD64(i->Ain.SseReRg.dst);
1372 return;
1373 case Ain_SseCMov:
1374 vex_printf("cmov%s ", showAMD64CondCode(i->Ain.SseCMov.cond));
1375 ppHRegAMD64(i->Ain.SseCMov.src);
1376 vex_printf(",");
1377 ppHRegAMD64(i->Ain.SseCMov.dst);
1378 return;
1379 case Ain_SseShuf:
1380 vex_printf("pshufd $0x%x,", (UInt)i->Ain.SseShuf.order);
1381 ppHRegAMD64(i->Ain.SseShuf.src);
1382 vex_printf(",");
1383 ppHRegAMD64(i->Ain.SseShuf.dst);
1384 return;
1385 case Ain_SseShiftN:
1386 vex_printf("%s $%u, ", showAMD64SseOp(i->Ain.SseShiftN.op),
1387 i->Ain.SseShiftN.shiftBits);
1388 ppHRegAMD64(i->Ain.SseShiftN.dst);
1389 return;
1390 case Ain_SseMOVQ:
1391 vex_printf("movq ");
1392 if (i->Ain.SseMOVQ.toXMM) {
1393 ppHRegAMD64(i->Ain.SseMOVQ.gpr);
1394 vex_printf(",");
1395 ppHRegAMD64(i->Ain.SseMOVQ.xmm);
1396 } else {
1397 ppHRegAMD64(i->Ain.SseMOVQ.xmm);
1398 vex_printf(",");
1399 ppHRegAMD64(i->Ain.SseMOVQ.gpr);
1400 };
1401 return;
1402 //uu case Ain_AvxLdSt:
1403 //uu vex_printf("vmovups ");
1404 //uu if (i->Ain.AvxLdSt.isLoad) {
1405 //uu ppAMD64AMode(i->Ain.AvxLdSt.addr);
1406 //uu vex_printf(",");
1407 //uu ppHRegAMD64(i->Ain.AvxLdSt.reg);
1408 //uu } else {
1409 //uu ppHRegAMD64(i->Ain.AvxLdSt.reg);
1410 //uu vex_printf(",");
1411 //uu ppAMD64AMode(i->Ain.AvxLdSt.addr);
1412 //uu }
1413 //uu return;
1414 //uu case Ain_AvxReRg:
1415 //uu vex_printf("v%s ", showAMD64SseOp(i->Ain.SseReRg.op));
1416 //uu ppHRegAMD64(i->Ain.AvxReRg.src);
1417 //uu vex_printf(",");
1418 //uu ppHRegAMD64(i->Ain.AvxReRg.dst);
1419 //uu return;
1420 case Ain_EvCheck:
1421 vex_printf("(evCheck) decl ");
1422 ppAMD64AMode(i->Ain.EvCheck.amCounter);
1423 vex_printf("; jns nofail; jmp *");
1424 ppAMD64AMode(i->Ain.EvCheck.amFailAddr);
1425 vex_printf("; nofail:");
1426 return;
1427 case Ain_ProfInc:
1428 vex_printf("(profInc) movabsq $NotKnownYet, %%r11; incq (%%r11)");
1429 return;
1430 default:
1431 vpanic("ppAMD64Instr");
1432 }
1433 }
1434
1435 /* --------- Helpers for register allocation. --------- */
1436
getRegUsage_AMD64Instr(HRegUsage * u,const AMD64Instr * i,Bool mode64)1437 void getRegUsage_AMD64Instr ( HRegUsage* u, const AMD64Instr* i, Bool mode64 )
1438 {
1439 Bool unary;
1440 vassert(mode64 == True);
1441 initHRegUsage(u);
1442 switch (i->tag) {
1443 case Ain_Imm64:
1444 addHRegUse(u, HRmWrite, i->Ain.Imm64.dst);
1445 return;
1446 case Ain_Alu64R:
1447 addRegUsage_AMD64RMI(u, i->Ain.Alu64R.src);
1448 if (i->Ain.Alu64R.op == Aalu_MOV) {
1449 addHRegUse(u, HRmWrite, i->Ain.Alu64R.dst);
1450
1451 if (i->Ain.Alu64R.src->tag == Armi_Reg) {
1452 u->isRegRegMove = True;
1453 u->regMoveSrc = i->Ain.Alu64R.src->Armi.Reg.reg;
1454 u->regMoveDst = i->Ain.Alu64R.dst;
1455 }
1456 return;
1457 }
1458 if (i->Ain.Alu64R.op == Aalu_CMP) {
1459 addHRegUse(u, HRmRead, i->Ain.Alu64R.dst);
1460 return;
1461 }
1462 addHRegUse(u, HRmModify, i->Ain.Alu64R.dst);
1463 return;
1464 case Ain_Alu64M:
1465 addRegUsage_AMD64RI(u, i->Ain.Alu64M.src);
1466 addRegUsage_AMD64AMode(u, i->Ain.Alu64M.dst);
1467 return;
1468 case Ain_Sh64:
1469 addHRegUse(u, HRmModify, i->Ain.Sh64.dst);
1470 if (i->Ain.Sh64.src == 0)
1471 addHRegUse(u, HRmRead, hregAMD64_RCX());
1472 return;
1473 case Ain_Test64:
1474 addHRegUse(u, HRmRead, i->Ain.Test64.dst);
1475 return;
1476 case Ain_Unary64:
1477 addHRegUse(u, HRmModify, i->Ain.Unary64.dst);
1478 return;
1479 case Ain_Lea64:
1480 addRegUsage_AMD64AMode(u, i->Ain.Lea64.am);
1481 addHRegUse(u, HRmWrite, i->Ain.Lea64.dst);
1482 return;
1483 case Ain_Alu32R:
1484 vassert(i->Ain.Alu32R.op != Aalu_MOV);
1485 addRegUsage_AMD64RMI(u, i->Ain.Alu32R.src);
1486 if (i->Ain.Alu32R.op == Aalu_CMP) {
1487 addHRegUse(u, HRmRead, i->Ain.Alu32R.dst);
1488 return;
1489 }
1490 addHRegUse(u, HRmModify, i->Ain.Alu32R.dst);
1491 return;
1492 case Ain_MulL:
1493 addRegUsage_AMD64RM(u, i->Ain.MulL.src, HRmRead);
1494 addHRegUse(u, HRmModify, hregAMD64_RAX());
1495 addHRegUse(u, HRmWrite, hregAMD64_RDX());
1496 return;
1497 case Ain_Div:
1498 addRegUsage_AMD64RM(u, i->Ain.Div.src, HRmRead);
1499 addHRegUse(u, HRmModify, hregAMD64_RAX());
1500 addHRegUse(u, HRmModify, hregAMD64_RDX());
1501 return;
1502 case Ain_Push:
1503 addRegUsage_AMD64RMI(u, i->Ain.Push.src);
1504 addHRegUse(u, HRmModify, hregAMD64_RSP());
1505 return;
1506 case Ain_Call:
1507 /* This is a bit subtle. */
1508 /* First off, claim it trashes all the caller-saved regs
1509 which fall within the register allocator's jurisdiction.
1510 These I believe to be: rax rcx rdx rdi rsi r8 r9 r10
1511 and all the xmm registers. */
1512 addHRegUse(u, HRmWrite, hregAMD64_RAX());
1513 addHRegUse(u, HRmWrite, hregAMD64_RCX());
1514 addHRegUse(u, HRmWrite, hregAMD64_RDX());
1515 addHRegUse(u, HRmWrite, hregAMD64_RDI());
1516 addHRegUse(u, HRmWrite, hregAMD64_RSI());
1517 addHRegUse(u, HRmWrite, hregAMD64_R8());
1518 addHRegUse(u, HRmWrite, hregAMD64_R9());
1519 addHRegUse(u, HRmWrite, hregAMD64_R10());
1520 addHRegUse(u, HRmWrite, hregAMD64_XMM0());
1521 addHRegUse(u, HRmWrite, hregAMD64_XMM1());
1522 addHRegUse(u, HRmWrite, hregAMD64_XMM3());
1523 addHRegUse(u, HRmWrite, hregAMD64_XMM4());
1524 addHRegUse(u, HRmWrite, hregAMD64_XMM5());
1525 addHRegUse(u, HRmWrite, hregAMD64_XMM6());
1526 addHRegUse(u, HRmWrite, hregAMD64_XMM7());
1527 addHRegUse(u, HRmWrite, hregAMD64_XMM8());
1528 addHRegUse(u, HRmWrite, hregAMD64_XMM9());
1529 addHRegUse(u, HRmWrite, hregAMD64_XMM10());
1530 addHRegUse(u, HRmWrite, hregAMD64_XMM11());
1531 addHRegUse(u, HRmWrite, hregAMD64_XMM12());
1532
1533 /* Now we have to state any parameter-carrying registers
1534 which might be read. This depends on the regparmness. */
1535 switch (i->Ain.Call.regparms) {
1536 case 6: addHRegUse(u, HRmRead, hregAMD64_R9()); /*fallthru*/
1537 case 5: addHRegUse(u, HRmRead, hregAMD64_R8()); /*fallthru*/
1538 case 4: addHRegUse(u, HRmRead, hregAMD64_RCX()); /*fallthru*/
1539 case 3: addHRegUse(u, HRmRead, hregAMD64_RDX()); /*fallthru*/
1540 case 2: addHRegUse(u, HRmRead, hregAMD64_RSI()); /*fallthru*/
1541 case 1: addHRegUse(u, HRmRead, hregAMD64_RDI()); break;
1542 case 0: break;
1543 default: vpanic("getRegUsage_AMD64Instr:Call:regparms");
1544 }
1545 /* Finally, there is the issue that the insn trashes a
1546 register because the literal target address has to be
1547 loaded into a register. Fortunately, r11 is stated in the
1548 ABI as a scratch register, and so seems a suitable victim. */
1549 addHRegUse(u, HRmWrite, hregAMD64_R11());
1550 /* Upshot of this is that the assembler really must use r11,
1551 and no other, as a destination temporary. */
1552 return;
1553 /* XDirect/XIndir/XAssisted are also a bit subtle. They
1554 conditionally exit the block. Hence we only need to list (1)
1555 the registers that they read, and (2) the registers that they
1556 write in the case where the block is not exited. (2) is
1557 empty, hence only (1) is relevant here. */
1558 case Ain_XDirect:
1559 /* Don't bother to mention the write to %r11, since it is not
1560 available to the allocator. */
1561 addRegUsage_AMD64AMode(u, i->Ain.XDirect.amRIP);
1562 return;
1563 case Ain_XIndir:
1564 /* Ditto re %r11 */
1565 addHRegUse(u, HRmRead, i->Ain.XIndir.dstGA);
1566 addRegUsage_AMD64AMode(u, i->Ain.XIndir.amRIP);
1567 return;
1568 case Ain_XAssisted:
1569 /* Ditto re %r11 and %rbp (the baseblock ptr) */
1570 addHRegUse(u, HRmRead, i->Ain.XAssisted.dstGA);
1571 addRegUsage_AMD64AMode(u, i->Ain.XAssisted.amRIP);
1572 return;
1573 case Ain_CMov64:
1574 addHRegUse(u, HRmRead, i->Ain.CMov64.src);
1575 addHRegUse(u, HRmModify, i->Ain.CMov64.dst);
1576 return;
1577 case Ain_CLoad:
1578 addRegUsage_AMD64AMode(u, i->Ain.CLoad.addr);
1579 addHRegUse(u, HRmModify, i->Ain.CLoad.dst);
1580 return;
1581 case Ain_CStore:
1582 addRegUsage_AMD64AMode(u, i->Ain.CStore.addr);
1583 addHRegUse(u, HRmRead, i->Ain.CStore.src);
1584 return;
1585 case Ain_MovxLQ:
1586 addHRegUse(u, HRmRead, i->Ain.MovxLQ.src);
1587 addHRegUse(u, HRmWrite, i->Ain.MovxLQ.dst);
1588 return;
1589 case Ain_LoadEX:
1590 addRegUsage_AMD64AMode(u, i->Ain.LoadEX.src);
1591 addHRegUse(u, HRmWrite, i->Ain.LoadEX.dst);
1592 return;
1593 case Ain_Store:
1594 addHRegUse(u, HRmRead, i->Ain.Store.src);
1595 addRegUsage_AMD64AMode(u, i->Ain.Store.dst);
1596 return;
1597 case Ain_Set64:
1598 addHRegUse(u, HRmWrite, i->Ain.Set64.dst);
1599 return;
1600 case Ain_Bsfr64:
1601 addHRegUse(u, HRmRead, i->Ain.Bsfr64.src);
1602 addHRegUse(u, HRmWrite, i->Ain.Bsfr64.dst);
1603 return;
1604 case Ain_MFence:
1605 return;
1606 case Ain_ACAS:
1607 addRegUsage_AMD64AMode(u, i->Ain.ACAS.addr);
1608 addHRegUse(u, HRmRead, hregAMD64_RBX());
1609 addHRegUse(u, HRmModify, hregAMD64_RAX());
1610 return;
1611 case Ain_DACAS:
1612 addRegUsage_AMD64AMode(u, i->Ain.DACAS.addr);
1613 addHRegUse(u, HRmRead, hregAMD64_RCX());
1614 addHRegUse(u, HRmRead, hregAMD64_RBX());
1615 addHRegUse(u, HRmModify, hregAMD64_RDX());
1616 addHRegUse(u, HRmModify, hregAMD64_RAX());
1617 return;
1618 case Ain_A87Free:
1619 return;
1620 case Ain_A87PushPop:
1621 addRegUsage_AMD64AMode(u, i->Ain.A87PushPop.addr);
1622 return;
1623 case Ain_A87FpOp:
1624 return;
1625 case Ain_A87LdCW:
1626 addRegUsage_AMD64AMode(u, i->Ain.A87LdCW.addr);
1627 return;
1628 case Ain_A87StSW:
1629 addRegUsage_AMD64AMode(u, i->Ain.A87StSW.addr);
1630 return;
1631 case Ain_LdMXCSR:
1632 addRegUsage_AMD64AMode(u, i->Ain.LdMXCSR.addr);
1633 return;
1634 case Ain_SseUComIS:
1635 addHRegUse(u, HRmRead, i->Ain.SseUComIS.srcL);
1636 addHRegUse(u, HRmRead, i->Ain.SseUComIS.srcR);
1637 addHRegUse(u, HRmWrite, i->Ain.SseUComIS.dst);
1638 return;
1639 case Ain_SseSI2SF:
1640 addHRegUse(u, HRmRead, i->Ain.SseSI2SF.src);
1641 addHRegUse(u, HRmWrite, i->Ain.SseSI2SF.dst);
1642 return;
1643 case Ain_SseSF2SI:
1644 addHRegUse(u, HRmRead, i->Ain.SseSF2SI.src);
1645 addHRegUse(u, HRmWrite, i->Ain.SseSF2SI.dst);
1646 return;
1647 case Ain_SseSDSS:
1648 addHRegUse(u, HRmRead, i->Ain.SseSDSS.src);
1649 addHRegUse(u, HRmWrite, i->Ain.SseSDSS.dst);
1650 return;
1651 case Ain_SseLdSt:
1652 addRegUsage_AMD64AMode(u, i->Ain.SseLdSt.addr);
1653 addHRegUse(u, i->Ain.SseLdSt.isLoad ? HRmWrite : HRmRead,
1654 i->Ain.SseLdSt.reg);
1655 return;
1656 case Ain_SseCStore:
1657 addRegUsage_AMD64AMode(u, i->Ain.SseCStore.addr);
1658 addHRegUse(u, HRmRead, i->Ain.SseCStore.src);
1659 return;
1660 case Ain_SseCLoad:
1661 addRegUsage_AMD64AMode(u, i->Ain.SseCLoad.addr);
1662 addHRegUse(u, HRmModify, i->Ain.SseCLoad.dst);
1663 return;
1664 case Ain_SseLdzLO:
1665 addRegUsage_AMD64AMode(u, i->Ain.SseLdzLO.addr);
1666 addHRegUse(u, HRmWrite, i->Ain.SseLdzLO.reg);
1667 return;
1668 case Ain_Sse32Fx4:
1669 vassert(i->Ain.Sse32Fx4.op != Asse_MOV);
1670 unary = toBool( i->Ain.Sse32Fx4.op == Asse_RCPF
1671 || i->Ain.Sse32Fx4.op == Asse_RSQRTF
1672 || i->Ain.Sse32Fx4.op == Asse_SQRTF
1673 || i->Ain.Sse32Fx4.op == Asse_I2F
1674 || i->Ain.Sse32Fx4.op == Asse_F2I );
1675 addHRegUse(u, HRmRead, i->Ain.Sse32Fx4.src);
1676 addHRegUse(u, unary ? HRmWrite : HRmModify,
1677 i->Ain.Sse32Fx4.dst);
1678 return;
1679 case Ain_Sse32FLo:
1680 vassert(i->Ain.Sse32FLo.op != Asse_MOV);
1681 unary = toBool( i->Ain.Sse32FLo.op == Asse_RCPF
1682 || i->Ain.Sse32FLo.op == Asse_RSQRTF
1683 || i->Ain.Sse32FLo.op == Asse_SQRTF );
1684 addHRegUse(u, HRmRead, i->Ain.Sse32FLo.src);
1685 addHRegUse(u, unary ? HRmWrite : HRmModify,
1686 i->Ain.Sse32FLo.dst);
1687 return;
1688 case Ain_Sse64Fx2:
1689 vassert(i->Ain.Sse64Fx2.op != Asse_MOV);
1690 unary = toBool( i->Ain.Sse64Fx2.op == Asse_RCPF
1691 || i->Ain.Sse64Fx2.op == Asse_RSQRTF
1692 || i->Ain.Sse64Fx2.op == Asse_SQRTF );
1693 addHRegUse(u, HRmRead, i->Ain.Sse64Fx2.src);
1694 addHRegUse(u, unary ? HRmWrite : HRmModify,
1695 i->Ain.Sse64Fx2.dst);
1696 return;
1697 case Ain_Sse64FLo:
1698 vassert(i->Ain.Sse64FLo.op != Asse_MOV);
1699 unary = toBool( i->Ain.Sse64FLo.op == Asse_RCPF
1700 || i->Ain.Sse64FLo.op == Asse_RSQRTF
1701 || i->Ain.Sse64FLo.op == Asse_SQRTF );
1702 addHRegUse(u, HRmRead, i->Ain.Sse64FLo.src);
1703 addHRegUse(u, unary ? HRmWrite : HRmModify,
1704 i->Ain.Sse64FLo.dst);
1705 return;
1706 case Ain_SseReRg:
1707 if ( (i->Ain.SseReRg.op == Asse_XOR
1708 || i->Ain.SseReRg.op == Asse_CMPEQ32)
1709 && sameHReg(i->Ain.SseReRg.src, i->Ain.SseReRg.dst)) {
1710 /* reg-alloc needs to understand 'xor r,r' and 'cmpeqd
1711 r,r' as a write of a value to r, and independent of any
1712 previous value in r */
1713 /* (as opposed to a rite of passage :-) */
1714 addHRegUse(u, HRmWrite, i->Ain.SseReRg.dst);
1715 } else {
1716 addHRegUse(u, HRmRead, i->Ain.SseReRg.src);
1717 addHRegUse(u, i->Ain.SseReRg.op == Asse_MOV
1718 ? HRmWrite : HRmModify,
1719 i->Ain.SseReRg.dst);
1720
1721 if (i->Ain.SseReRg.op == Asse_MOV) {
1722 u->isRegRegMove = True;
1723 u->regMoveSrc = i->Ain.SseReRg.src;
1724 u->regMoveDst = i->Ain.SseReRg.dst;
1725 }
1726 }
1727 return;
1728 case Ain_SseCMov:
1729 addHRegUse(u, HRmRead, i->Ain.SseCMov.src);
1730 addHRegUse(u, HRmModify, i->Ain.SseCMov.dst);
1731 return;
1732 case Ain_SseShuf:
1733 addHRegUse(u, HRmRead, i->Ain.SseShuf.src);
1734 addHRegUse(u, HRmWrite, i->Ain.SseShuf.dst);
1735 return;
1736 case Ain_SseShiftN:
1737 addHRegUse(u, HRmModify, i->Ain.SseShiftN.dst);
1738 return;
1739 case Ain_SseMOVQ:
1740 addHRegUse(u, i->Ain.SseMOVQ.toXMM ? HRmRead : HRmWrite,
1741 i->Ain.SseMOVQ.gpr);
1742 addHRegUse(u, i->Ain.SseMOVQ.toXMM ? HRmWrite : HRmRead,
1743 i->Ain.SseMOVQ.xmm);
1744 return;
1745 //uu case Ain_AvxLdSt:
1746 //uu addRegUsage_AMD64AMode(u, i->Ain.AvxLdSt.addr);
1747 //uu addHRegUse(u, i->Ain.AvxLdSt.isLoad ? HRmWrite : HRmRead,
1748 //uu i->Ain.AvxLdSt.reg);
1749 //uu return;
1750 //uu case Ain_AvxReRg:
1751 //uu if ( (i->Ain.AvxReRg.op == Asse_XOR
1752 //uu || i->Ain.AvxReRg.op == Asse_CMPEQ32)
1753 //uu && i->Ain.AvxReRg.src == i->Ain.AvxReRg.dst) {
1754 //uu /* See comments on the case for Ain_SseReRg. */
1755 //uu addHRegUse(u, HRmWrite, i->Ain.AvxReRg.dst);
1756 //uu } else {
1757 //uu addHRegUse(u, HRmRead, i->Ain.AvxReRg.src);
1758 //uu addHRegUse(u, i->Ain.AvxReRg.op == Asse_MOV
1759 //uu ? HRmWrite : HRmModify,
1760 //uu i->Ain.AvxReRg.dst);
1761 //uu
1762 //uu if (i->Ain.AvxReRg.op == Asse_MOV) {
1763 //uu u->isRegRegMove = True;
1764 //uu u->regMoveSrc = i->Ain.AvxReRg.src;
1765 //uu u->regMoveDst = i->Ain.AvxReRg.dst;
1766 //uu }
1767 //uu }
1768 //uu return;
1769 case Ain_EvCheck:
1770 /* We expect both amodes only to mention %rbp, so this is in
1771 fact pointless, since %rbp isn't allocatable, but anyway.. */
1772 addRegUsage_AMD64AMode(u, i->Ain.EvCheck.amCounter);
1773 addRegUsage_AMD64AMode(u, i->Ain.EvCheck.amFailAddr);
1774 return;
1775 case Ain_ProfInc:
1776 addHRegUse(u, HRmWrite, hregAMD64_R11());
1777 return;
1778 default:
1779 ppAMD64Instr(i, mode64);
1780 vpanic("getRegUsage_AMD64Instr");
1781 }
1782 }
1783
1784 /* local helper */
mapReg(HRegRemap * m,HReg * r)1785 static inline void mapReg(HRegRemap* m, HReg* r)
1786 {
1787 *r = lookupHRegRemap(m, *r);
1788 }
1789
mapRegs_AMD64Instr(HRegRemap * m,AMD64Instr * i,Bool mode64)1790 void mapRegs_AMD64Instr ( HRegRemap* m, AMD64Instr* i, Bool mode64 )
1791 {
1792 vassert(mode64 == True);
1793 switch (i->tag) {
1794 case Ain_Imm64:
1795 mapReg(m, &i->Ain.Imm64.dst);
1796 return;
1797 case Ain_Alu64R:
1798 mapRegs_AMD64RMI(m, i->Ain.Alu64R.src);
1799 mapReg(m, &i->Ain.Alu64R.dst);
1800 return;
1801 case Ain_Alu64M:
1802 mapRegs_AMD64RI(m, i->Ain.Alu64M.src);
1803 mapRegs_AMD64AMode(m, i->Ain.Alu64M.dst);
1804 return;
1805 case Ain_Sh64:
1806 mapReg(m, &i->Ain.Sh64.dst);
1807 return;
1808 case Ain_Test64:
1809 mapReg(m, &i->Ain.Test64.dst);
1810 return;
1811 case Ain_Unary64:
1812 mapReg(m, &i->Ain.Unary64.dst);
1813 return;
1814 case Ain_Lea64:
1815 mapRegs_AMD64AMode(m, i->Ain.Lea64.am);
1816 mapReg(m, &i->Ain.Lea64.dst);
1817 return;
1818 case Ain_Alu32R:
1819 mapRegs_AMD64RMI(m, i->Ain.Alu32R.src);
1820 mapReg(m, &i->Ain.Alu32R.dst);
1821 return;
1822 case Ain_MulL:
1823 mapRegs_AMD64RM(m, i->Ain.MulL.src);
1824 return;
1825 case Ain_Div:
1826 mapRegs_AMD64RM(m, i->Ain.Div.src);
1827 return;
1828 case Ain_Push:
1829 mapRegs_AMD64RMI(m, i->Ain.Push.src);
1830 return;
1831 case Ain_Call:
1832 return;
1833 case Ain_XDirect:
1834 mapRegs_AMD64AMode(m, i->Ain.XDirect.amRIP);
1835 return;
1836 case Ain_XIndir:
1837 mapReg(m, &i->Ain.XIndir.dstGA);
1838 mapRegs_AMD64AMode(m, i->Ain.XIndir.amRIP);
1839 return;
1840 case Ain_XAssisted:
1841 mapReg(m, &i->Ain.XAssisted.dstGA);
1842 mapRegs_AMD64AMode(m, i->Ain.XAssisted.amRIP);
1843 return;
1844 case Ain_CMov64:
1845 mapReg(m, &i->Ain.CMov64.src);
1846 mapReg(m, &i->Ain.CMov64.dst);
1847 return;
1848 case Ain_CLoad:
1849 mapRegs_AMD64AMode(m, i->Ain.CLoad.addr);
1850 mapReg(m, &i->Ain.CLoad.dst);
1851 return;
1852 case Ain_CStore:
1853 mapRegs_AMD64AMode(m, i->Ain.CStore.addr);
1854 mapReg(m, &i->Ain.CStore.src);
1855 return;
1856 case Ain_MovxLQ:
1857 mapReg(m, &i->Ain.MovxLQ.src);
1858 mapReg(m, &i->Ain.MovxLQ.dst);
1859 return;
1860 case Ain_LoadEX:
1861 mapRegs_AMD64AMode(m, i->Ain.LoadEX.src);
1862 mapReg(m, &i->Ain.LoadEX.dst);
1863 return;
1864 case Ain_Store:
1865 mapReg(m, &i->Ain.Store.src);
1866 mapRegs_AMD64AMode(m, i->Ain.Store.dst);
1867 return;
1868 case Ain_Set64:
1869 mapReg(m, &i->Ain.Set64.dst);
1870 return;
1871 case Ain_Bsfr64:
1872 mapReg(m, &i->Ain.Bsfr64.src);
1873 mapReg(m, &i->Ain.Bsfr64.dst);
1874 return;
1875 case Ain_MFence:
1876 return;
1877 case Ain_ACAS:
1878 mapRegs_AMD64AMode(m, i->Ain.ACAS.addr);
1879 return;
1880 case Ain_DACAS:
1881 mapRegs_AMD64AMode(m, i->Ain.DACAS.addr);
1882 return;
1883 case Ain_A87Free:
1884 return;
1885 case Ain_A87PushPop:
1886 mapRegs_AMD64AMode(m, i->Ain.A87PushPop.addr);
1887 return;
1888 case Ain_A87FpOp:
1889 return;
1890 case Ain_A87LdCW:
1891 mapRegs_AMD64AMode(m, i->Ain.A87LdCW.addr);
1892 return;
1893 case Ain_A87StSW:
1894 mapRegs_AMD64AMode(m, i->Ain.A87StSW.addr);
1895 return;
1896 case Ain_LdMXCSR:
1897 mapRegs_AMD64AMode(m, i->Ain.LdMXCSR.addr);
1898 return;
1899 case Ain_SseUComIS:
1900 mapReg(m, &i->Ain.SseUComIS.srcL);
1901 mapReg(m, &i->Ain.SseUComIS.srcR);
1902 mapReg(m, &i->Ain.SseUComIS.dst);
1903 return;
1904 case Ain_SseSI2SF:
1905 mapReg(m, &i->Ain.SseSI2SF.src);
1906 mapReg(m, &i->Ain.SseSI2SF.dst);
1907 return;
1908 case Ain_SseSF2SI:
1909 mapReg(m, &i->Ain.SseSF2SI.src);
1910 mapReg(m, &i->Ain.SseSF2SI.dst);
1911 return;
1912 case Ain_SseSDSS:
1913 mapReg(m, &i->Ain.SseSDSS.src);
1914 mapReg(m, &i->Ain.SseSDSS.dst);
1915 return;
1916 case Ain_SseLdSt:
1917 mapReg(m, &i->Ain.SseLdSt.reg);
1918 mapRegs_AMD64AMode(m, i->Ain.SseLdSt.addr);
1919 break;
1920 case Ain_SseCStore:
1921 mapRegs_AMD64AMode(m, i->Ain.SseCStore.addr);
1922 mapReg(m, &i->Ain.SseCStore.src);
1923 return;
1924 case Ain_SseCLoad:
1925 mapRegs_AMD64AMode(m, i->Ain.SseCLoad.addr);
1926 mapReg(m, &i->Ain.SseCLoad.dst);
1927 return;
1928 case Ain_SseLdzLO:
1929 mapReg(m, &i->Ain.SseLdzLO.reg);
1930 mapRegs_AMD64AMode(m, i->Ain.SseLdzLO.addr);
1931 break;
1932 case Ain_Sse32Fx4:
1933 mapReg(m, &i->Ain.Sse32Fx4.src);
1934 mapReg(m, &i->Ain.Sse32Fx4.dst);
1935 return;
1936 case Ain_Sse32FLo:
1937 mapReg(m, &i->Ain.Sse32FLo.src);
1938 mapReg(m, &i->Ain.Sse32FLo.dst);
1939 return;
1940 case Ain_Sse64Fx2:
1941 mapReg(m, &i->Ain.Sse64Fx2.src);
1942 mapReg(m, &i->Ain.Sse64Fx2.dst);
1943 return;
1944 case Ain_Sse64FLo:
1945 mapReg(m, &i->Ain.Sse64FLo.src);
1946 mapReg(m, &i->Ain.Sse64FLo.dst);
1947 return;
1948 case Ain_SseReRg:
1949 mapReg(m, &i->Ain.SseReRg.src);
1950 mapReg(m, &i->Ain.SseReRg.dst);
1951 return;
1952 case Ain_SseCMov:
1953 mapReg(m, &i->Ain.SseCMov.src);
1954 mapReg(m, &i->Ain.SseCMov.dst);
1955 return;
1956 case Ain_SseShuf:
1957 mapReg(m, &i->Ain.SseShuf.src);
1958 mapReg(m, &i->Ain.SseShuf.dst);
1959 return;
1960 case Ain_SseShiftN:
1961 mapReg(m, &i->Ain.SseShiftN.dst);
1962 return;
1963 case Ain_SseMOVQ:
1964 mapReg(m, &i->Ain.SseMOVQ.gpr);
1965 mapReg(m, &i->Ain.SseMOVQ.xmm);
1966 return;
1967 //uu case Ain_AvxLdSt:
1968 //uu mapReg(m, &i->Ain.AvxLdSt.reg);
1969 //uu mapRegs_AMD64AMode(m, i->Ain.AvxLdSt.addr);
1970 //uu break;
1971 //uu case Ain_AvxReRg:
1972 //uu mapReg(m, &i->Ain.AvxReRg.src);
1973 //uu mapReg(m, &i->Ain.AvxReRg.dst);
1974 //uu return;
1975 case Ain_EvCheck:
1976 /* We expect both amodes only to mention %rbp, so this is in
1977 fact pointless, since %rbp isn't allocatable, but anyway.. */
1978 mapRegs_AMD64AMode(m, i->Ain.EvCheck.amCounter);
1979 mapRegs_AMD64AMode(m, i->Ain.EvCheck.amFailAddr);
1980 return;
1981 case Ain_ProfInc:
1982 /* hardwires r11 -- nothing to modify. */
1983 return;
1984 default:
1985 ppAMD64Instr(i, mode64);
1986 vpanic("mapRegs_AMD64Instr");
1987 }
1988 }
1989
1990 /* Generate amd64 spill/reload instructions under the direction of the
1991 register allocator. Note it's critical these don't write the
1992 condition codes. */
1993
genSpill_AMD64(HInstr ** i1,HInstr ** i2,HReg rreg,Int offsetB,Bool mode64)1994 void genSpill_AMD64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
1995 HReg rreg, Int offsetB, Bool mode64 )
1996 {
1997 AMD64AMode* am;
1998 vassert(offsetB >= 0);
1999 vassert(!hregIsVirtual(rreg));
2000 vassert(mode64 == True);
2001 *i1 = *i2 = NULL;
2002 am = AMD64AMode_IR(offsetB, hregAMD64_RBP());
2003 switch (hregClass(rreg)) {
2004 case HRcInt64:
2005 *i1 = AMD64Instr_Alu64M ( Aalu_MOV, AMD64RI_Reg(rreg), am );
2006 return;
2007 case HRcVec128:
2008 *i1 = AMD64Instr_SseLdSt ( False/*store*/, 16, rreg, am );
2009 return;
2010 default:
2011 ppHRegClass(hregClass(rreg));
2012 vpanic("genSpill_AMD64: unimplemented regclass");
2013 }
2014 }
2015
genReload_AMD64(HInstr ** i1,HInstr ** i2,HReg rreg,Int offsetB,Bool mode64)2016 void genReload_AMD64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
2017 HReg rreg, Int offsetB, Bool mode64 )
2018 {
2019 AMD64AMode* am;
2020 vassert(offsetB >= 0);
2021 vassert(!hregIsVirtual(rreg));
2022 vassert(mode64 == True);
2023 *i1 = *i2 = NULL;
2024 am = AMD64AMode_IR(offsetB, hregAMD64_RBP());
2025 switch (hregClass(rreg)) {
2026 case HRcInt64:
2027 *i1 = AMD64Instr_Alu64R ( Aalu_MOV, AMD64RMI_Mem(am), rreg );
2028 return;
2029 case HRcVec128:
2030 *i1 = AMD64Instr_SseLdSt ( True/*load*/, 16, rreg, am );
2031 return;
2032 default:
2033 ppHRegClass(hregClass(rreg));
2034 vpanic("genReload_AMD64: unimplemented regclass");
2035 }
2036 }
2037
genMove_AMD64(HReg from,HReg to,Bool mode64)2038 AMD64Instr* genMove_AMD64(HReg from, HReg to, Bool mode64)
2039 {
2040 switch (hregClass(from)) {
2041 case HRcInt64:
2042 return AMD64Instr_Alu64R(Aalu_MOV, AMD64RMI_Reg(from), to);
2043 case HRcVec128:
2044 return AMD64Instr_SseReRg(Asse_MOV, from, to);
2045 default:
2046 ppHRegClass(hregClass(from));
2047 vpanic("genMove_AMD64: unimplemented regclass");
2048 }
2049 }
2050
directReload_AMD64(AMD64Instr * i,HReg vreg,Short spill_off)2051 AMD64Instr* directReload_AMD64( AMD64Instr* i, HReg vreg, Short spill_off )
2052 {
2053 vassert(spill_off >= 0 && spill_off < 10000); /* let's say */
2054
2055 /* Deal with form: src=RMI_Reg, dst=Reg where src == vreg
2056 Convert to: src=RMI_Mem, dst=Reg
2057 */
2058 if (i->tag == Ain_Alu64R
2059 && (i->Ain.Alu64R.op == Aalu_MOV || i->Ain.Alu64R.op == Aalu_OR
2060 || i->Ain.Alu64R.op == Aalu_XOR)
2061 && i->Ain.Alu64R.src->tag == Armi_Reg
2062 && sameHReg(i->Ain.Alu64R.src->Armi.Reg.reg, vreg)) {
2063 vassert(! sameHReg(i->Ain.Alu64R.dst, vreg));
2064 return AMD64Instr_Alu64R(
2065 i->Ain.Alu64R.op,
2066 AMD64RMI_Mem( AMD64AMode_IR( spill_off, hregAMD64_RBP())),
2067 i->Ain.Alu64R.dst
2068 );
2069 }
2070
2071 /* Deal with form: src=RMI_Imm, dst=Reg where dst == vreg
2072 Convert to: src=RI_Imm, dst=Mem
2073 */
2074 if (i->tag == Ain_Alu64R
2075 && (i->Ain.Alu64R.op == Aalu_CMP)
2076 && i->Ain.Alu64R.src->tag == Armi_Imm
2077 && sameHReg(i->Ain.Alu64R.dst, vreg)) {
2078 return AMD64Instr_Alu64M(
2079 i->Ain.Alu64R.op,
2080 AMD64RI_Imm( i->Ain.Alu64R.src->Armi.Imm.imm32 ),
2081 AMD64AMode_IR( spill_off, hregAMD64_RBP())
2082 );
2083 }
2084
2085 return NULL;
2086 }
2087
2088
2089 /* --------- The amd64 assembler (bleh.) --------- */
2090
2091 /* Produce the low three bits of an integer register number. */
iregEnc210(HReg r)2092 inline static UInt iregEnc210 ( HReg r )
2093 {
2094 UInt n;
2095 vassert(hregClass(r) == HRcInt64);
2096 vassert(!hregIsVirtual(r));
2097 n = hregEncoding(r);
2098 vassert(n <= 15);
2099 return n & 7;
2100 }
2101
2102 /* Produce bit 3 of an integer register number. */
iregEnc3(HReg r)2103 inline static UInt iregEnc3 ( HReg r )
2104 {
2105 UInt n;
2106 vassert(hregClass(r) == HRcInt64);
2107 vassert(!hregIsVirtual(r));
2108 n = hregEncoding(r);
2109 vassert(n <= 15);
2110 return (n >> 3) & 1;
2111 }
2112
2113 /* Produce a complete 4-bit integer register number. */
iregEnc3210(HReg r)2114 inline static UInt iregEnc3210 ( HReg r )
2115 {
2116 UInt n;
2117 vassert(hregClass(r) == HRcInt64);
2118 vassert(!hregIsVirtual(r));
2119 n = hregEncoding(r);
2120 vassert(n <= 15);
2121 return n;
2122 }
2123
2124 /* Produce a complete 4-bit integer register number. */
vregEnc3210(HReg r)2125 inline static UInt vregEnc3210 ( HReg r )
2126 {
2127 UInt n;
2128 vassert(hregClass(r) == HRcVec128);
2129 vassert(!hregIsVirtual(r));
2130 n = hregEncoding(r);
2131 vassert(n <= 15);
2132 return n;
2133 }
2134
mkModRegRM(UInt mod,UInt reg,UInt regmem)2135 inline static UChar mkModRegRM ( UInt mod, UInt reg, UInt regmem )
2136 {
2137 vassert(mod < 4);
2138 vassert((reg|regmem) < 8);
2139 return (UChar)( ((mod & 3) << 6) | ((reg & 7) << 3) | (regmem & 7) );
2140 }
2141
mkSIB(UInt shift,UInt regindex,UInt regbase)2142 inline static UChar mkSIB ( UInt shift, UInt regindex, UInt regbase )
2143 {
2144 vassert(shift < 4);
2145 vassert((regindex|regbase) < 8);
2146 return (UChar)( ((shift & 3) << 6) | ((regindex & 7) << 3) | (regbase & 7) );
2147 }
2148
emit32(UChar * p,UInt w32)2149 static UChar* emit32 ( UChar* p, UInt w32 )
2150 {
2151 *p++ = toUChar((w32) & 0x000000FF);
2152 *p++ = toUChar((w32 >> 8) & 0x000000FF);
2153 *p++ = toUChar((w32 >> 16) & 0x000000FF);
2154 *p++ = toUChar((w32 >> 24) & 0x000000FF);
2155 return p;
2156 }
2157
emit64(UChar * p,ULong w64)2158 static UChar* emit64 ( UChar* p, ULong w64 )
2159 {
2160 p = emit32(p, toUInt(w64 & 0xFFFFFFFF));
2161 p = emit32(p, toUInt((w64 >> 32) & 0xFFFFFFFF));
2162 return p;
2163 }
2164
2165 /* Does a sign-extend of the lowest 8 bits give
2166 the original number? */
fits8bits(UInt w32)2167 static Bool fits8bits ( UInt w32 )
2168 {
2169 Int i32 = (Int)w32;
2170 return toBool(i32 == ((Int)(w32 << 24) >> 24));
2171 }
2172 /* Can the lower 32 bits be signedly widened to produce the whole
2173 64-bit value? In other words, are the top 33 bits either all 0 or
2174 all 1 ? */
fitsIn32Bits(ULong x)2175 static Bool fitsIn32Bits ( ULong x )
2176 {
2177 Long y1;
2178 y1 = x << 32;
2179 y1 >>=/*s*/ 32;
2180 return toBool(x == y1);
2181 }
2182
2183
2184 /* Forming mod-reg-rm bytes and scale-index-base bytes.
2185
2186 greg, 0(ereg) | ereg is not any of: RSP RBP R12 R13
2187 = 00 greg ereg
2188
2189 greg, d8(ereg) | ereg is neither of: RSP R12
2190 = 01 greg ereg, d8
2191
2192 greg, d32(ereg) | ereg is neither of: RSP R12
2193 = 10 greg ereg, d32
2194
2195 greg, d8(ereg) | ereg is either: RSP R12
2196 = 01 greg 100, 0x24, d8
2197 (lowest bit of rex distinguishes R12/RSP)
2198
2199 greg, d32(ereg) | ereg is either: RSP R12
2200 = 10 greg 100, 0x24, d32
2201 (lowest bit of rex distinguishes R12/RSP)
2202
2203 -----------------------------------------------
2204
2205 greg, d8(base,index,scale)
2206 | index != RSP
2207 = 01 greg 100, scale index base, d8
2208
2209 greg, d32(base,index,scale)
2210 | index != RSP
2211 = 10 greg 100, scale index base, d32
2212 */
doAMode_M__wrk(UChar * p,UInt gregEnc3210,AMD64AMode * am)2213 static UChar* doAMode_M__wrk ( UChar* p, UInt gregEnc3210, AMD64AMode* am )
2214 {
2215 UInt gregEnc210 = gregEnc3210 & 7;
2216 if (am->tag == Aam_IR) {
2217 if (am->Aam.IR.imm == 0
2218 && ! sameHReg(am->Aam.IR.reg, hregAMD64_RSP())
2219 && ! sameHReg(am->Aam.IR.reg, hregAMD64_RBP())
2220 && ! sameHReg(am->Aam.IR.reg, hregAMD64_R12())
2221 && ! sameHReg(am->Aam.IR.reg, hregAMD64_R13())
2222 ) {
2223 *p++ = mkModRegRM(0, gregEnc210, iregEnc210(am->Aam.IR.reg));
2224 return p;
2225 }
2226 if (fits8bits(am->Aam.IR.imm)
2227 && ! sameHReg(am->Aam.IR.reg, hregAMD64_RSP())
2228 && ! sameHReg(am->Aam.IR.reg, hregAMD64_R12())
2229 ) {
2230 *p++ = mkModRegRM(1, gregEnc210, iregEnc210(am->Aam.IR.reg));
2231 *p++ = toUChar(am->Aam.IR.imm & 0xFF);
2232 return p;
2233 }
2234 if (! sameHReg(am->Aam.IR.reg, hregAMD64_RSP())
2235 && ! sameHReg(am->Aam.IR.reg, hregAMD64_R12())
2236 ) {
2237 *p++ = mkModRegRM(2, gregEnc210, iregEnc210(am->Aam.IR.reg));
2238 p = emit32(p, am->Aam.IR.imm);
2239 return p;
2240 }
2241 if ((sameHReg(am->Aam.IR.reg, hregAMD64_RSP())
2242 || sameHReg(am->Aam.IR.reg, hregAMD64_R12()))
2243 && fits8bits(am->Aam.IR.imm)) {
2244 *p++ = mkModRegRM(1, gregEnc210, 4);
2245 *p++ = 0x24;
2246 *p++ = toUChar(am->Aam.IR.imm & 0xFF);
2247 return p;
2248 }
2249 if (/* (sameHReg(am->Aam.IR.reg, hregAMD64_RSP())
2250 || wait for test case for RSP case */
2251 sameHReg(am->Aam.IR.reg, hregAMD64_R12())) {
2252 *p++ = mkModRegRM(2, gregEnc210, 4);
2253 *p++ = 0x24;
2254 p = emit32(p, am->Aam.IR.imm);
2255 return p;
2256 }
2257 ppAMD64AMode(am);
2258 vpanic("doAMode_M: can't emit amode IR");
2259 /*NOTREACHED*/
2260 }
2261 if (am->tag == Aam_IRRS) {
2262 if (fits8bits(am->Aam.IRRS.imm)
2263 && ! sameHReg(am->Aam.IRRS.index, hregAMD64_RSP())) {
2264 *p++ = mkModRegRM(1, gregEnc210, 4);
2265 *p++ = mkSIB(am->Aam.IRRS.shift, iregEnc210(am->Aam.IRRS.index),
2266 iregEnc210(am->Aam.IRRS.base));
2267 *p++ = toUChar(am->Aam.IRRS.imm & 0xFF);
2268 return p;
2269 }
2270 if (! sameHReg(am->Aam.IRRS.index, hregAMD64_RSP())) {
2271 *p++ = mkModRegRM(2, gregEnc210, 4);
2272 *p++ = mkSIB(am->Aam.IRRS.shift, iregEnc210(am->Aam.IRRS.index),
2273 iregEnc210(am->Aam.IRRS.base));
2274 p = emit32(p, am->Aam.IRRS.imm);
2275 return p;
2276 }
2277 ppAMD64AMode(am);
2278 vpanic("doAMode_M: can't emit amode IRRS");
2279 /*NOTREACHED*/
2280 }
2281 vpanic("doAMode_M: unknown amode");
2282 /*NOTREACHED*/
2283 }
2284
doAMode_M(UChar * p,HReg greg,AMD64AMode * am)2285 static UChar* doAMode_M ( UChar* p, HReg greg, AMD64AMode* am )
2286 {
2287 return doAMode_M__wrk(p, iregEnc3210(greg), am);
2288 }
2289
doAMode_M_enc(UChar * p,UInt gregEnc3210,AMD64AMode * am)2290 static UChar* doAMode_M_enc ( UChar* p, UInt gregEnc3210, AMD64AMode* am )
2291 {
2292 vassert(gregEnc3210 < 16);
2293 return doAMode_M__wrk(p, gregEnc3210, am);
2294 }
2295
2296
2297 /* Emit a mod-reg-rm byte when the rm bit denotes a reg. */
2298 inline
doAMode_R__wrk(UChar * p,UInt gregEnc3210,UInt eregEnc3210)2299 static UChar* doAMode_R__wrk ( UChar* p, UInt gregEnc3210, UInt eregEnc3210 )
2300 {
2301 *p++ = mkModRegRM(3, gregEnc3210 & 7, eregEnc3210 & 7);
2302 return p;
2303 }
2304
doAMode_R(UChar * p,HReg greg,HReg ereg)2305 static UChar* doAMode_R ( UChar* p, HReg greg, HReg ereg )
2306 {
2307 return doAMode_R__wrk(p, iregEnc3210(greg), iregEnc3210(ereg));
2308 }
2309
doAMode_R_enc_reg(UChar * p,UInt gregEnc3210,HReg ereg)2310 static UChar* doAMode_R_enc_reg ( UChar* p, UInt gregEnc3210, HReg ereg )
2311 {
2312 vassert(gregEnc3210 < 16);
2313 return doAMode_R__wrk(p, gregEnc3210, iregEnc3210(ereg));
2314 }
2315
doAMode_R_reg_enc(UChar * p,HReg greg,UInt eregEnc3210)2316 static UChar* doAMode_R_reg_enc ( UChar* p, HReg greg, UInt eregEnc3210 )
2317 {
2318 vassert(eregEnc3210 < 16);
2319 return doAMode_R__wrk(p, iregEnc3210(greg), eregEnc3210);
2320 }
2321
doAMode_R_enc_enc(UChar * p,UInt gregEnc3210,UInt eregEnc3210)2322 static UChar* doAMode_R_enc_enc ( UChar* p, UInt gregEnc3210, UInt eregEnc3210 )
2323 {
2324 vassert( (gregEnc3210|eregEnc3210) < 16);
2325 return doAMode_R__wrk(p, gregEnc3210, eregEnc3210);
2326 }
2327
2328
2329 /* Clear the W bit on a REX byte, thereby changing the operand size
2330 back to whatever that instruction's default operand size is. */
clearWBit(UChar rex)2331 static inline UChar clearWBit ( UChar rex )
2332 {
2333 return rex & ~(1<<3);
2334 }
2335
setWBit(UChar rex)2336 static inline UChar setWBit ( UChar rex )
2337 {
2338 return rex | (1<<3);
2339 }
2340
2341
2342 /* Make up a REX byte, with W=1 (size=64), for a (greg,amode) pair. */
rexAMode_M__wrk(UInt gregEnc3210,AMD64AMode * am)2343 inline static UChar rexAMode_M__wrk ( UInt gregEnc3210, AMD64AMode* am )
2344 {
2345 if (am->tag == Aam_IR) {
2346 UChar W = 1; /* we want 64-bit mode */
2347 UChar R = (gregEnc3210 >> 3) & 1;
2348 UChar X = 0; /* not relevant */
2349 UChar B = iregEnc3(am->Aam.IR.reg);
2350 return 0x40 + ((W << 3) | (R << 2) | (X << 1) | (B << 0));
2351 }
2352 if (am->tag == Aam_IRRS) {
2353 UChar W = 1; /* we want 64-bit mode */
2354 UChar R = (gregEnc3210 >> 3) & 1;
2355 UChar X = iregEnc3(am->Aam.IRRS.index);
2356 UChar B = iregEnc3(am->Aam.IRRS.base);
2357 return 0x40 + ((W << 3) | (R << 2) | (X << 1) | (B << 0));
2358 }
2359 vassert(0);
2360 return 0; /*NOTREACHED*/
2361 }
2362
rexAMode_M(HReg greg,AMD64AMode * am)2363 static UChar rexAMode_M ( HReg greg, AMD64AMode* am )
2364 {
2365 return rexAMode_M__wrk(iregEnc3210(greg), am);
2366 }
2367
rexAMode_M_enc(UInt gregEnc3210,AMD64AMode * am)2368 static UChar rexAMode_M_enc ( UInt gregEnc3210, AMD64AMode* am )
2369 {
2370 vassert(gregEnc3210 < 16);
2371 return rexAMode_M__wrk(gregEnc3210, am);
2372 }
2373
2374
2375 /* Make up a REX byte, with W=1 (size=64), for a (greg,ereg) pair. */
rexAMode_R__wrk(UInt gregEnc3210,UInt eregEnc3210)2376 inline static UChar rexAMode_R__wrk ( UInt gregEnc3210, UInt eregEnc3210 )
2377 {
2378 UChar W = 1; /* we want 64-bit mode */
2379 UChar R = (gregEnc3210 >> 3) & 1;
2380 UChar X = 0; /* not relevant */
2381 UChar B = (eregEnc3210 >> 3) & 1;
2382 return 0x40 + ((W << 3) | (R << 2) | (X << 1) | (B << 0));
2383 }
2384
rexAMode_R(HReg greg,HReg ereg)2385 static UChar rexAMode_R ( HReg greg, HReg ereg )
2386 {
2387 return rexAMode_R__wrk(iregEnc3210(greg), iregEnc3210(ereg));
2388 }
2389
rexAMode_R_enc_reg(UInt gregEnc3210,HReg ereg)2390 static UChar rexAMode_R_enc_reg ( UInt gregEnc3210, HReg ereg )
2391 {
2392 vassert(gregEnc3210 < 16);
2393 return rexAMode_R__wrk(gregEnc3210, iregEnc3210(ereg));
2394 }
2395
rexAMode_R_reg_enc(HReg greg,UInt eregEnc3210)2396 static UChar rexAMode_R_reg_enc ( HReg greg, UInt eregEnc3210 )
2397 {
2398 vassert(eregEnc3210 < 16);
2399 return rexAMode_R__wrk(iregEnc3210(greg), eregEnc3210);
2400 }
2401
rexAMode_R_enc_enc(UInt gregEnc3210,UInt eregEnc3210)2402 static UChar rexAMode_R_enc_enc ( UInt gregEnc3210, UInt eregEnc3210 )
2403 {
2404 vassert((gregEnc3210|eregEnc3210) < 16);
2405 return rexAMode_R__wrk(gregEnc3210, eregEnc3210);
2406 }
2407
2408
2409 //uu /* May 2012: this VEX prefix stuff is currently unused, but has
2410 //uu verified correct (I reckon). Certainly it has been known to
2411 //uu produce correct VEX prefixes during testing. */
2412 //uu
2413 //uu /* Assemble a 2 or 3 byte VEX prefix from parts. rexR, rexX, rexB and
2414 //uu notVvvvv need to be not-ed before packing. mmmmm, rexW, L and pp go
2415 //uu in verbatim. There's no range checking on the bits. */
2416 //uu static UInt packVexPrefix ( UInt rexR, UInt rexX, UInt rexB,
2417 //uu UInt mmmmm, UInt rexW, UInt notVvvv,
2418 //uu UInt L, UInt pp )
2419 //uu {
2420 //uu UChar byte0 = 0;
2421 //uu UChar byte1 = 0;
2422 //uu UChar byte2 = 0;
2423 //uu if (rexX == 0 && rexB == 0 && mmmmm == 1 && rexW == 0) {
2424 //uu /* 2 byte encoding is possible. */
2425 //uu byte0 = 0xC5;
2426 //uu byte1 = ((rexR ^ 1) << 7) | ((notVvvv ^ 0xF) << 3)
2427 //uu | (L << 2) | pp;
2428 //uu } else {
2429 //uu /* 3 byte encoding is needed. */
2430 //uu byte0 = 0xC4;
2431 //uu byte1 = ((rexR ^ 1) << 7) | ((rexX ^ 1) << 6)
2432 //uu | ((rexB ^ 1) << 5) | mmmmm;
2433 //uu byte2 = (rexW << 7) | ((notVvvv ^ 0xF) << 3) | (L << 2) | pp;
2434 //uu }
2435 //uu return (((UInt)byte2) << 16) | (((UInt)byte1) << 8) | ((UInt)byte0);
2436 //uu }
2437 //uu
2438 //uu /* Make up a VEX prefix for a (greg,amode) pair. First byte in bits
2439 //uu 7:0 of result, second in 15:8, third (for a 3 byte prefix) in
2440 //uu 23:16. Has m-mmmm set to indicate a prefix of 0F, pp set to
2441 //uu indicate no SIMD prefix, W=0 (ignore), L=1 (size=256), and
2442 //uu vvvv=1111 (unused 3rd reg). */
2443 //uu static UInt vexAMode_M ( HReg greg, AMD64AMode* am )
2444 //uu {
2445 //uu UChar L = 1; /* size = 256 */
2446 //uu UChar pp = 0; /* no SIMD prefix */
2447 //uu UChar mmmmm = 1; /* 0F */
2448 //uu UChar notVvvv = 0; /* unused */
2449 //uu UChar rexW = 0;
2450 //uu UChar rexR = 0;
2451 //uu UChar rexX = 0;
2452 //uu UChar rexB = 0;
2453 //uu /* Same logic as in rexAMode_M. */
2454 //uu if (am->tag == Aam_IR) {
2455 //uu rexR = iregEnc3(greg);
2456 //uu rexX = 0; /* not relevant */
2457 //uu rexB = iregEnc3(am->Aam.IR.reg);
2458 //uu }
2459 //uu else if (am->tag == Aam_IRRS) {
2460 //uu rexR = iregEnc3(greg);
2461 //uu rexX = iregEnc3(am->Aam.IRRS.index);
2462 //uu rexB = iregEnc3(am->Aam.IRRS.base);
2463 //uu } else {
2464 //uu vassert(0);
2465 //uu }
2466 //uu return packVexPrefix( rexR, rexX, rexB, mmmmm, rexW, notVvvv, L, pp );
2467 //uu }
2468 //uu
2469 //uu static UChar* emitVexPrefix ( UChar* p, UInt vex )
2470 //uu {
2471 //uu switch (vex & 0xFF) {
2472 //uu case 0xC5:
2473 //uu *p++ = 0xC5;
2474 //uu *p++ = (vex >> 8) & 0xFF;
2475 //uu vassert(0 == (vex >> 16));
2476 //uu break;
2477 //uu case 0xC4:
2478 //uu *p++ = 0xC4;
2479 //uu *p++ = (vex >> 8) & 0xFF;
2480 //uu *p++ = (vex >> 16) & 0xFF;
2481 //uu vassert(0 == (vex >> 24));
2482 //uu break;
2483 //uu default:
2484 //uu vassert(0);
2485 //uu }
2486 //uu return p;
2487 //uu }
2488
2489
2490 /* Emit ffree %st(N) */
do_ffree_st(UChar * p,Int n)2491 static UChar* do_ffree_st ( UChar* p, Int n )
2492 {
2493 vassert(n >= 0 && n <= 7);
2494 *p++ = 0xDD;
2495 *p++ = toUChar(0xC0 + n);
2496 return p;
2497 }
2498
2499 /* Emit an instruction into buf and return the number of bytes used.
2500 Note that buf is not the insn's final place, and therefore it is
2501 imperative to emit position-independent code. If the emitted
2502 instruction was a profiler inc, set *is_profInc to True, else
2503 leave it unchanged. */
2504
emit_AMD64Instr(Bool * is_profInc,UChar * buf,Int nbuf,const AMD64Instr * i,Bool mode64,VexEndness endness_host,const void * disp_cp_chain_me_to_slowEP,const void * disp_cp_chain_me_to_fastEP,const void * disp_cp_xindir,const void * disp_cp_xassisted)2505 Int emit_AMD64Instr ( /*MB_MOD*/Bool* is_profInc,
2506 UChar* buf, Int nbuf, const AMD64Instr* i,
2507 Bool mode64, VexEndness endness_host,
2508 const void* disp_cp_chain_me_to_slowEP,
2509 const void* disp_cp_chain_me_to_fastEP,
2510 const void* disp_cp_xindir,
2511 const void* disp_cp_xassisted )
2512 {
2513 UInt /*irno,*/ opc, opc_rr, subopc_imm, opc_imma, opc_cl, opc_imm, subopc;
2514 UInt xtra;
2515 UInt reg;
2516 UChar rex;
2517 UChar* p = &buf[0];
2518 UChar* ptmp;
2519 Int j;
2520 vassert(nbuf >= 64);
2521 vassert(mode64 == True);
2522
2523 /* vex_printf("asm "); ppAMD64Instr(i, mode64); vex_printf("\n"); */
2524
2525 switch (i->tag) {
2526
2527 case Ain_Imm64:
2528 if (i->Ain.Imm64.imm64 <= 0xFFFFFULL) {
2529 /* Use the short form (load into 32 bit reg, + default
2530 widening rule) for constants under 1 million. We could
2531 use this form for the range 0 to 0x7FFFFFFF inclusive, but
2532 limit it to a smaller range for verifiability purposes. */
2533 if (1 & iregEnc3(i->Ain.Imm64.dst))
2534 *p++ = 0x41;
2535 *p++ = 0xB8 + iregEnc210(i->Ain.Imm64.dst);
2536 p = emit32(p, (UInt)i->Ain.Imm64.imm64);
2537 } else {
2538 *p++ = toUChar(0x48 + (1 & iregEnc3(i->Ain.Imm64.dst)));
2539 *p++ = toUChar(0xB8 + iregEnc210(i->Ain.Imm64.dst));
2540 p = emit64(p, i->Ain.Imm64.imm64);
2541 }
2542 goto done;
2543
2544 case Ain_Alu64R:
2545 /* Deal specially with MOV */
2546 if (i->Ain.Alu64R.op == Aalu_MOV) {
2547 switch (i->Ain.Alu64R.src->tag) {
2548 case Armi_Imm:
2549 if (0 == (i->Ain.Alu64R.src->Armi.Imm.imm32 & ~0xFFFFF)) {
2550 /* Actually we could use this form for constants in
2551 the range 0 through 0x7FFFFFFF inclusive, but
2552 limit it to a small range for verifiability
2553 purposes. */
2554 /* Generate "movl $imm32, 32-bit-register" and let
2555 the default zero-extend rule cause the upper half
2556 of the dst to be zeroed out too. This saves 1
2557 and sometimes 2 bytes compared to the more
2558 obvious encoding in the 'else' branch. */
2559 if (1 & iregEnc3(i->Ain.Alu64R.dst))
2560 *p++ = 0x41;
2561 *p++ = 0xB8 + iregEnc210(i->Ain.Alu64R.dst);
2562 p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32);
2563 } else {
2564 *p++ = toUChar(0x48 + (1 & iregEnc3(i->Ain.Alu64R.dst)));
2565 *p++ = 0xC7;
2566 *p++ = toUChar(0xC0 + iregEnc210(i->Ain.Alu64R.dst));
2567 p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32);
2568 }
2569 goto done;
2570 case Armi_Reg:
2571 *p++ = rexAMode_R( i->Ain.Alu64R.src->Armi.Reg.reg,
2572 i->Ain.Alu64R.dst );
2573 *p++ = 0x89;
2574 p = doAMode_R(p, i->Ain.Alu64R.src->Armi.Reg.reg,
2575 i->Ain.Alu64R.dst);
2576 goto done;
2577 case Armi_Mem:
2578 *p++ = rexAMode_M(i->Ain.Alu64R.dst,
2579 i->Ain.Alu64R.src->Armi.Mem.am);
2580 *p++ = 0x8B;
2581 p = doAMode_M(p, i->Ain.Alu64R.dst,
2582 i->Ain.Alu64R.src->Armi.Mem.am);
2583 goto done;
2584 default:
2585 goto bad;
2586 }
2587 }
2588 /* MUL */
2589 if (i->Ain.Alu64R.op == Aalu_MUL) {
2590 switch (i->Ain.Alu64R.src->tag) {
2591 case Armi_Reg:
2592 *p++ = rexAMode_R( i->Ain.Alu64R.dst,
2593 i->Ain.Alu64R.src->Armi.Reg.reg);
2594 *p++ = 0x0F;
2595 *p++ = 0xAF;
2596 p = doAMode_R(p, i->Ain.Alu64R.dst,
2597 i->Ain.Alu64R.src->Armi.Reg.reg);
2598 goto done;
2599 case Armi_Mem:
2600 *p++ = rexAMode_M(i->Ain.Alu64R.dst,
2601 i->Ain.Alu64R.src->Armi.Mem.am);
2602 *p++ = 0x0F;
2603 *p++ = 0xAF;
2604 p = doAMode_M(p, i->Ain.Alu64R.dst,
2605 i->Ain.Alu64R.src->Armi.Mem.am);
2606 goto done;
2607 case Armi_Imm:
2608 if (fits8bits(i->Ain.Alu64R.src->Armi.Imm.imm32)) {
2609 *p++ = rexAMode_R(i->Ain.Alu64R.dst, i->Ain.Alu64R.dst);
2610 *p++ = 0x6B;
2611 p = doAMode_R(p, i->Ain.Alu64R.dst, i->Ain.Alu64R.dst);
2612 *p++ = toUChar(0xFF & i->Ain.Alu64R.src->Armi.Imm.imm32);
2613 } else {
2614 *p++ = rexAMode_R(i->Ain.Alu64R.dst, i->Ain.Alu64R.dst);
2615 *p++ = 0x69;
2616 p = doAMode_R(p, i->Ain.Alu64R.dst, i->Ain.Alu64R.dst);
2617 p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32);
2618 }
2619 goto done;
2620 default:
2621 goto bad;
2622 }
2623 }
2624 /* ADD/SUB/ADC/SBB/AND/OR/XOR/CMP */
2625 opc = opc_rr = subopc_imm = opc_imma = 0;
2626 switch (i->Ain.Alu64R.op) {
2627 case Aalu_ADC: opc = 0x13; opc_rr = 0x11;
2628 subopc_imm = 2; opc_imma = 0x15; break;
2629 case Aalu_ADD: opc = 0x03; opc_rr = 0x01;
2630 subopc_imm = 0; opc_imma = 0x05; break;
2631 case Aalu_SUB: opc = 0x2B; opc_rr = 0x29;
2632 subopc_imm = 5; opc_imma = 0x2D; break;
2633 case Aalu_SBB: opc = 0x1B; opc_rr = 0x19;
2634 subopc_imm = 3; opc_imma = 0x1D; break;
2635 case Aalu_AND: opc = 0x23; opc_rr = 0x21;
2636 subopc_imm = 4; opc_imma = 0x25; break;
2637 case Aalu_XOR: opc = 0x33; opc_rr = 0x31;
2638 subopc_imm = 6; opc_imma = 0x35; break;
2639 case Aalu_OR: opc = 0x0B; opc_rr = 0x09;
2640 subopc_imm = 1; opc_imma = 0x0D; break;
2641 case Aalu_CMP: opc = 0x3B; opc_rr = 0x39;
2642 subopc_imm = 7; opc_imma = 0x3D; break;
2643 default: goto bad;
2644 }
2645 switch (i->Ain.Alu64R.src->tag) {
2646 case Armi_Imm:
2647 if (sameHReg(i->Ain.Alu64R.dst, hregAMD64_RAX())
2648 && !fits8bits(i->Ain.Alu64R.src->Armi.Imm.imm32)) {
2649 goto bad; /* FIXME: awaiting test case */
2650 *p++ = toUChar(opc_imma);
2651 p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32);
2652 } else
2653 if (fits8bits(i->Ain.Alu64R.src->Armi.Imm.imm32)) {
2654 *p++ = rexAMode_R_enc_reg( 0, i->Ain.Alu64R.dst );
2655 *p++ = 0x83;
2656 p = doAMode_R_enc_reg(p, subopc_imm, i->Ain.Alu64R.dst);
2657 *p++ = toUChar(0xFF & i->Ain.Alu64R.src->Armi.Imm.imm32);
2658 } else {
2659 *p++ = rexAMode_R_enc_reg( 0, i->Ain.Alu64R.dst);
2660 *p++ = 0x81;
2661 p = doAMode_R_enc_reg(p, subopc_imm, i->Ain.Alu64R.dst);
2662 p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32);
2663 }
2664 goto done;
2665 case Armi_Reg:
2666 *p++ = rexAMode_R( i->Ain.Alu64R.src->Armi.Reg.reg,
2667 i->Ain.Alu64R.dst);
2668 *p++ = toUChar(opc_rr);
2669 p = doAMode_R(p, i->Ain.Alu64R.src->Armi.Reg.reg,
2670 i->Ain.Alu64R.dst);
2671 goto done;
2672 case Armi_Mem:
2673 *p++ = rexAMode_M( i->Ain.Alu64R.dst,
2674 i->Ain.Alu64R.src->Armi.Mem.am);
2675 *p++ = toUChar(opc);
2676 p = doAMode_M(p, i->Ain.Alu64R.dst,
2677 i->Ain.Alu64R.src->Armi.Mem.am);
2678 goto done;
2679 default:
2680 goto bad;
2681 }
2682 break;
2683
2684 case Ain_Alu64M:
2685 /* Deal specially with MOV */
2686 if (i->Ain.Alu64M.op == Aalu_MOV) {
2687 switch (i->Ain.Alu64M.src->tag) {
2688 case Ari_Reg:
2689 *p++ = rexAMode_M(i->Ain.Alu64M.src->Ari.Reg.reg,
2690 i->Ain.Alu64M.dst);
2691 *p++ = 0x89;
2692 p = doAMode_M(p, i->Ain.Alu64M.src->Ari.Reg.reg,
2693 i->Ain.Alu64M.dst);
2694 goto done;
2695 case Ari_Imm:
2696 *p++ = rexAMode_M_enc(0, i->Ain.Alu64M.dst);
2697 *p++ = 0xC7;
2698 p = doAMode_M_enc(p, 0, i->Ain.Alu64M.dst);
2699 p = emit32(p, i->Ain.Alu64M.src->Ari.Imm.imm32);
2700 goto done;
2701 default:
2702 goto bad;
2703 }
2704 }
2705 /* ADD/SUB/ADC/SBB/AND/OR/XOR/CMP. MUL is not
2706 allowed here. (This is derived from the x86 version of same). */
2707 opc = subopc_imm = opc_imma = 0;
2708 switch (i->Ain.Alu64M.op) {
2709 case Aalu_CMP: opc = 0x39; subopc_imm = 7; break;
2710 default: goto bad;
2711 }
2712 switch (i->Ain.Alu64M.src->tag) {
2713 /*
2714 case Xri_Reg:
2715 *p++ = toUChar(opc);
2716 p = doAMode_M(p, i->Xin.Alu32M.src->Xri.Reg.reg,
2717 i->Xin.Alu32M.dst);
2718 goto done;
2719 */
2720 case Ari_Imm:
2721 if (fits8bits(i->Ain.Alu64M.src->Ari.Imm.imm32)) {
2722 *p++ = rexAMode_M_enc(subopc_imm, i->Ain.Alu64M.dst);
2723 *p++ = 0x83;
2724 p = doAMode_M_enc(p, subopc_imm, i->Ain.Alu64M.dst);
2725 *p++ = toUChar(0xFF & i->Ain.Alu64M.src->Ari.Imm.imm32);
2726 goto done;
2727 } else {
2728 *p++ = rexAMode_M_enc(subopc_imm, i->Ain.Alu64M.dst);
2729 *p++ = 0x81;
2730 p = doAMode_M_enc(p, subopc_imm, i->Ain.Alu64M.dst);
2731 p = emit32(p, i->Ain.Alu64M.src->Ari.Imm.imm32);
2732 goto done;
2733 }
2734 default:
2735 goto bad;
2736 }
2737
2738 break;
2739
2740 case Ain_Sh64:
2741 opc_cl = opc_imm = subopc = 0;
2742 switch (i->Ain.Sh64.op) {
2743 case Ash_SHR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 5; break;
2744 case Ash_SAR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 7; break;
2745 case Ash_SHL: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 4; break;
2746 default: goto bad;
2747 }
2748 if (i->Ain.Sh64.src == 0) {
2749 *p++ = rexAMode_R_enc_reg(0, i->Ain.Sh64.dst);
2750 *p++ = toUChar(opc_cl);
2751 p = doAMode_R_enc_reg(p, subopc, i->Ain.Sh64.dst);
2752 goto done;
2753 } else {
2754 *p++ = rexAMode_R_enc_reg(0, i->Ain.Sh64.dst);
2755 *p++ = toUChar(opc_imm);
2756 p = doAMode_R_enc_reg(p, subopc, i->Ain.Sh64.dst);
2757 *p++ = (UChar)(i->Ain.Sh64.src);
2758 goto done;
2759 }
2760 break;
2761
2762 case Ain_Test64:
2763 /* testq sign-extend($imm32), %reg */
2764 *p++ = rexAMode_R_enc_reg(0, i->Ain.Test64.dst);
2765 *p++ = 0xF7;
2766 p = doAMode_R_enc_reg(p, 0, i->Ain.Test64.dst);
2767 p = emit32(p, i->Ain.Test64.imm32);
2768 goto done;
2769
2770 case Ain_Unary64:
2771 if (i->Ain.Unary64.op == Aun_NOT) {
2772 *p++ = rexAMode_R_enc_reg(0, i->Ain.Unary64.dst);
2773 *p++ = 0xF7;
2774 p = doAMode_R_enc_reg(p, 2, i->Ain.Unary64.dst);
2775 goto done;
2776 }
2777 if (i->Ain.Unary64.op == Aun_NEG) {
2778 *p++ = rexAMode_R_enc_reg(0, i->Ain.Unary64.dst);
2779 *p++ = 0xF7;
2780 p = doAMode_R_enc_reg(p, 3, i->Ain.Unary64.dst);
2781 goto done;
2782 }
2783 break;
2784
2785 case Ain_Lea64:
2786 *p++ = rexAMode_M(i->Ain.Lea64.dst, i->Ain.Lea64.am);
2787 *p++ = 0x8D;
2788 p = doAMode_M(p, i->Ain.Lea64.dst, i->Ain.Lea64.am);
2789 goto done;
2790
2791 case Ain_Alu32R:
2792 /* ADD/SUB/AND/OR/XOR/CMP */
2793 opc = opc_rr = subopc_imm = opc_imma = 0;
2794 switch (i->Ain.Alu32R.op) {
2795 case Aalu_ADD: opc = 0x03; opc_rr = 0x01;
2796 subopc_imm = 0; opc_imma = 0x05; break;
2797 case Aalu_SUB: opc = 0x2B; opc_rr = 0x29;
2798 subopc_imm = 5; opc_imma = 0x2D; break;
2799 case Aalu_AND: opc = 0x23; opc_rr = 0x21;
2800 subopc_imm = 4; opc_imma = 0x25; break;
2801 case Aalu_XOR: opc = 0x33; opc_rr = 0x31;
2802 subopc_imm = 6; opc_imma = 0x35; break;
2803 case Aalu_OR: opc = 0x0B; opc_rr = 0x09;
2804 subopc_imm = 1; opc_imma = 0x0D; break;
2805 case Aalu_CMP: opc = 0x3B; opc_rr = 0x39;
2806 subopc_imm = 7; opc_imma = 0x3D; break;
2807 default: goto bad;
2808 }
2809 switch (i->Ain.Alu32R.src->tag) {
2810 case Armi_Imm:
2811 if (sameHReg(i->Ain.Alu32R.dst, hregAMD64_RAX())
2812 && !fits8bits(i->Ain.Alu32R.src->Armi.Imm.imm32)) {
2813 goto bad; /* FIXME: awaiting test case */
2814 *p++ = toUChar(opc_imma);
2815 p = emit32(p, i->Ain.Alu32R.src->Armi.Imm.imm32);
2816 } else
2817 if (fits8bits(i->Ain.Alu32R.src->Armi.Imm.imm32)) {
2818 rex = clearWBit( rexAMode_R_enc_reg( 0, i->Ain.Alu32R.dst ) );
2819 if (rex != 0x40) *p++ = rex;
2820 *p++ = 0x83;
2821 p = doAMode_R_enc_reg(p, subopc_imm, i->Ain.Alu32R.dst);
2822 *p++ = toUChar(0xFF & i->Ain.Alu32R.src->Armi.Imm.imm32);
2823 } else {
2824 rex = clearWBit( rexAMode_R_enc_reg( 0, i->Ain.Alu32R.dst) );
2825 if (rex != 0x40) *p++ = rex;
2826 *p++ = 0x81;
2827 p = doAMode_R_enc_reg(p, subopc_imm, i->Ain.Alu32R.dst);
2828 p = emit32(p, i->Ain.Alu32R.src->Armi.Imm.imm32);
2829 }
2830 goto done;
2831 case Armi_Reg:
2832 rex = clearWBit(
2833 rexAMode_R( i->Ain.Alu32R.src->Armi.Reg.reg,
2834 i->Ain.Alu32R.dst) );
2835 if (rex != 0x40) *p++ = rex;
2836 *p++ = toUChar(opc_rr);
2837 p = doAMode_R(p, i->Ain.Alu32R.src->Armi.Reg.reg,
2838 i->Ain.Alu32R.dst);
2839 goto done;
2840 case Armi_Mem:
2841 rex = clearWBit(
2842 rexAMode_M( i->Ain.Alu32R.dst,
2843 i->Ain.Alu32R.src->Armi.Mem.am) );
2844 if (rex != 0x40) *p++ = rex;
2845 *p++ = toUChar(opc);
2846 p = doAMode_M(p, i->Ain.Alu32R.dst,
2847 i->Ain.Alu32R.src->Armi.Mem.am);
2848 goto done;
2849 default:
2850 goto bad;
2851 }
2852 break;
2853
2854 case Ain_MulL:
2855 subopc = i->Ain.MulL.syned ? 5 : 4;
2856 switch (i->Ain.MulL.src->tag) {
2857 case Arm_Mem:
2858 *p++ = rexAMode_M_enc(0, i->Ain.MulL.src->Arm.Mem.am);
2859 *p++ = 0xF7;
2860 p = doAMode_M_enc(p, subopc, i->Ain.MulL.src->Arm.Mem.am);
2861 goto done;
2862 case Arm_Reg:
2863 *p++ = rexAMode_R_enc_reg(0, i->Ain.MulL.src->Arm.Reg.reg);
2864 *p++ = 0xF7;
2865 p = doAMode_R_enc_reg(p, subopc, i->Ain.MulL.src->Arm.Reg.reg);
2866 goto done;
2867 default:
2868 goto bad;
2869 }
2870 break;
2871
2872 case Ain_Div:
2873 subopc = i->Ain.Div.syned ? 7 : 6;
2874 if (i->Ain.Div.sz == 4) {
2875 switch (i->Ain.Div.src->tag) {
2876 case Arm_Mem:
2877 goto bad;
2878 /*FIXME*/
2879 *p++ = 0xF7;
2880 p = doAMode_M_enc(p, subopc, i->Ain.Div.src->Arm.Mem.am);
2881 goto done;
2882 case Arm_Reg:
2883 *p++ = clearWBit(
2884 rexAMode_R_enc_reg(0, i->Ain.Div.src->Arm.Reg.reg));
2885 *p++ = 0xF7;
2886 p = doAMode_R_enc_reg(p, subopc, i->Ain.Div.src->Arm.Reg.reg);
2887 goto done;
2888 default:
2889 goto bad;
2890 }
2891 }
2892 if (i->Ain.Div.sz == 8) {
2893 switch (i->Ain.Div.src->tag) {
2894 case Arm_Mem:
2895 *p++ = rexAMode_M_enc(0, i->Ain.Div.src->Arm.Mem.am);
2896 *p++ = 0xF7;
2897 p = doAMode_M_enc(p, subopc, i->Ain.Div.src->Arm.Mem.am);
2898 goto done;
2899 case Arm_Reg:
2900 *p++ = rexAMode_R_enc_reg(0, i->Ain.Div.src->Arm.Reg.reg);
2901 *p++ = 0xF7;
2902 p = doAMode_R_enc_reg(p, subopc, i->Ain.Div.src->Arm.Reg.reg);
2903 goto done;
2904 default:
2905 goto bad;
2906 }
2907 }
2908 break;
2909
2910 case Ain_Push:
2911 switch (i->Ain.Push.src->tag) {
2912 case Armi_Mem:
2913 *p++ = clearWBit(
2914 rexAMode_M_enc(0, i->Ain.Push.src->Armi.Mem.am));
2915 *p++ = 0xFF;
2916 p = doAMode_M_enc(p, 6, i->Ain.Push.src->Armi.Mem.am);
2917 goto done;
2918 case Armi_Imm:
2919 *p++ = 0x68;
2920 p = emit32(p, i->Ain.Push.src->Armi.Imm.imm32);
2921 goto done;
2922 case Armi_Reg:
2923 *p++ = toUChar(0x40 + (1 & iregEnc3(i->Ain.Push.src->Armi.Reg.reg)));
2924 *p++ = toUChar(0x50 + iregEnc210(i->Ain.Push.src->Armi.Reg.reg));
2925 goto done;
2926 default:
2927 goto bad;
2928 }
2929
2930 case Ain_Call: {
2931 /* As per detailed comment for Ain_Call in getRegUsage_AMD64Instr
2932 above, %r11 is used as an address temporary. */
2933 /* If we don't need to do any fixup actions in the case that the
2934 call doesn't happen, just do the simple thing and emit
2935 straight-line code. This is usually the case. */
2936 if (i->Ain.Call.cond == Acc_ALWAYS/*call always happens*/
2937 || i->Ain.Call.rloc.pri == RLPri_None/*no fixup action*/) {
2938 /* jump over the following two insns if the condition does
2939 not hold */
2940 Bool shortImm = fitsIn32Bits(i->Ain.Call.target);
2941 if (i->Ain.Call.cond != Acc_ALWAYS) {
2942 *p++ = toUChar(0x70 + (0xF & (i->Ain.Call.cond ^ 1)));
2943 *p++ = shortImm ? 10 : 13;
2944 /* 10 or 13 bytes in the next two insns */
2945 }
2946 if (shortImm) {
2947 /* 7 bytes: movl sign-extend(imm32), %r11 */
2948 *p++ = 0x49;
2949 *p++ = 0xC7;
2950 *p++ = 0xC3;
2951 p = emit32(p, (UInt)i->Ain.Call.target);
2952 } else {
2953 /* 10 bytes: movabsq $target, %r11 */
2954 *p++ = 0x49;
2955 *p++ = 0xBB;
2956 p = emit64(p, i->Ain.Call.target);
2957 }
2958 /* 3 bytes: call *%r11 */
2959 *p++ = 0x41;
2960 *p++ = 0xFF;
2961 *p++ = 0xD3;
2962 } else {
2963 Int delta;
2964 /* Complex case. We have to generate an if-then-else diamond. */
2965 // before:
2966 // j{!cond} else:
2967 // movabsq $target, %r11
2968 // call* %r11
2969 // preElse:
2970 // jmp after:
2971 // else:
2972 // movabsq $0x5555555555555555, %rax // possibly
2973 // movq %rax, %rdx // possibly
2974 // after:
2975
2976 // before:
2977 UChar* pBefore = p;
2978
2979 // j{!cond} else:
2980 *p++ = toUChar(0x70 + (0xF & (i->Ain.Call.cond ^ 1)));
2981 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
2982
2983 // movabsq $target, %r11
2984 *p++ = 0x49;
2985 *p++ = 0xBB;
2986 p = emit64(p, i->Ain.Call.target);
2987
2988 // call* %r11
2989 *p++ = 0x41;
2990 *p++ = 0xFF;
2991 *p++ = 0xD3;
2992
2993 // preElse:
2994 UChar* pPreElse = p;
2995
2996 // jmp after:
2997 *p++ = 0xEB;
2998 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
2999
3000 // else:
3001 UChar* pElse = p;
3002
3003 /* Do the 'else' actions */
3004 switch (i->Ain.Call.rloc.pri) {
3005 case RLPri_Int:
3006 // movabsq $0x5555555555555555, %rax
3007 *p++ = 0x48; *p++ = 0xB8; p = emit64(p, 0x5555555555555555ULL);
3008 break;
3009 case RLPri_2Int:
3010 goto bad; //ATC
3011 // movabsq $0x5555555555555555, %rax
3012 *p++ = 0x48; *p++ = 0xB8; p = emit64(p, 0x5555555555555555ULL);
3013 // movq %rax, %rdx
3014 *p++ = 0x48; *p++ = 0x89; *p++ = 0xC2;
3015 break;
3016 case RLPri_V128SpRel:
3017 if (i->Ain.Call.rloc.spOff == 0) {
3018 // We could accept any |spOff| here, but that's more
3019 // hassle and the only value we're ever going to get
3020 // is zero (I believe.) Hence take the easy path :)
3021 // We need a scag register -- r11 can be it.
3022 // movabsq $0x5555555555555555, %r11
3023 *p++ = 0x49; *p++ = 0xBB;
3024 p = emit64(p, 0x5555555555555555ULL);
3025 // movq %r11, 0(%rsp)
3026 *p++ = 0x4C; *p++ = 0x89; *p++ = 0x1C; *p++ = 0x24;
3027 // movq %r11, 8(%rsp)
3028 *p++ = 0x4C; *p++ = 0x89; *p++ = 0x5C; *p++ = 0x24;
3029 *p++ = 0x08;
3030 break;
3031 }
3032 goto bad; //ATC for all other spOff values
3033 case RLPri_V256SpRel:
3034 goto bad; //ATC
3035 case RLPri_None: case RLPri_INVALID: default:
3036 vassert(0); // should never get here
3037 }
3038
3039 // after:
3040 UChar* pAfter = p;
3041
3042 // Fix up the branch offsets. The +2s in the offset
3043 // calculations are there because x86 requires conditional
3044 // branches to have their offset stated relative to the
3045 // instruction immediately following the branch insn. And in
3046 // both cases the branch insns are 2 bytes long.
3047
3048 // First, the "j{!cond} else:" at pBefore.
3049 delta = (Int)(Long)(pElse - (pBefore + 2));
3050 vassert(delta >= 0 && delta < 100/*arbitrary*/);
3051 *(pBefore+1) = (UChar)delta;
3052
3053 // And secondly, the "jmp after:" at pPreElse.
3054 delta = (Int)(Long)(pAfter - (pPreElse + 2));
3055 vassert(delta >= 0 && delta < 100/*arbitrary*/);
3056 *(pPreElse+1) = (UChar)delta;
3057 }
3058 goto done;
3059 }
3060
3061 case Ain_XDirect: {
3062 /* NB: what goes on here has to be very closely coordinated with the
3063 chainXDirect_AMD64 and unchainXDirect_AMD64 below. */
3064 /* We're generating chain-me requests here, so we need to be
3065 sure this is actually allowed -- no-redir translations can't
3066 use chain-me's. Hence: */
3067 vassert(disp_cp_chain_me_to_slowEP != NULL);
3068 vassert(disp_cp_chain_me_to_fastEP != NULL);
3069
3070 HReg r11 = hregAMD64_R11();
3071
3072 /* Use ptmp for backpatching conditional jumps. */
3073 ptmp = NULL;
3074
3075 /* First off, if this is conditional, create a conditional
3076 jump over the rest of it. */
3077 if (i->Ain.XDirect.cond != Acc_ALWAYS) {
3078 /* jmp fwds if !condition */
3079 *p++ = toUChar(0x70 + (0xF & (i->Ain.XDirect.cond ^ 1)));
3080 ptmp = p; /* fill in this bit later */
3081 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
3082 }
3083
3084 /* Update the guest RIP. */
3085 if (fitsIn32Bits(i->Ain.XDirect.dstGA)) {
3086 /* use a shorter encoding */
3087 /* movl sign-extend(dstGA), %r11 */
3088 *p++ = 0x49;
3089 *p++ = 0xC7;
3090 *p++ = 0xC3;
3091 p = emit32(p, (UInt)i->Ain.XDirect.dstGA);
3092 } else {
3093 /* movabsq $dstGA, %r11 */
3094 *p++ = 0x49;
3095 *p++ = 0xBB;
3096 p = emit64(p, i->Ain.XDirect.dstGA);
3097 }
3098
3099 /* movq %r11, amRIP */
3100 *p++ = rexAMode_M(r11, i->Ain.XDirect.amRIP);
3101 *p++ = 0x89;
3102 p = doAMode_M(p, r11, i->Ain.XDirect.amRIP);
3103
3104 /* --- FIRST PATCHABLE BYTE follows --- */
3105 /* VG_(disp_cp_chain_me_to_{slowEP,fastEP}) (where we're calling
3106 to) backs up the return address, so as to find the address of
3107 the first patchable byte. So: don't change the length of the
3108 two instructions below. */
3109 /* movabsq $disp_cp_chain_me_to_{slow,fast}EP,%r11; */
3110 *p++ = 0x49;
3111 *p++ = 0xBB;
3112 const void* disp_cp_chain_me
3113 = i->Ain.XDirect.toFastEP ? disp_cp_chain_me_to_fastEP
3114 : disp_cp_chain_me_to_slowEP;
3115 p = emit64(p, (Addr)disp_cp_chain_me);
3116 /* call *%r11 */
3117 *p++ = 0x41;
3118 *p++ = 0xFF;
3119 *p++ = 0xD3;
3120 /* --- END of PATCHABLE BYTES --- */
3121
3122 /* Fix up the conditional jump, if there was one. */
3123 if (i->Ain.XDirect.cond != Acc_ALWAYS) {
3124 Int delta = p - ptmp;
3125 vassert(delta > 0 && delta < 40);
3126 *ptmp = toUChar(delta-1);
3127 }
3128 goto done;
3129 }
3130
3131 case Ain_XIndir: {
3132 /* We're generating transfers that could lead indirectly to a
3133 chain-me, so we need to be sure this is actually allowed --
3134 no-redir translations are not allowed to reach normal
3135 translations without going through the scheduler. That means
3136 no XDirects or XIndirs out from no-redir translations.
3137 Hence: */
3138 vassert(disp_cp_xindir != NULL);
3139
3140 /* Use ptmp for backpatching conditional jumps. */
3141 ptmp = NULL;
3142
3143 /* First off, if this is conditional, create a conditional
3144 jump over the rest of it. */
3145 if (i->Ain.XIndir.cond != Acc_ALWAYS) {
3146 /* jmp fwds if !condition */
3147 *p++ = toUChar(0x70 + (0xF & (i->Ain.XIndir.cond ^ 1)));
3148 ptmp = p; /* fill in this bit later */
3149 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
3150 }
3151
3152 /* movq dstGA(a reg), amRIP -- copied from Alu64M MOV case */
3153 *p++ = rexAMode_M(i->Ain.XIndir.dstGA, i->Ain.XIndir.amRIP);
3154 *p++ = 0x89;
3155 p = doAMode_M(p, i->Ain.XIndir.dstGA, i->Ain.XIndir.amRIP);
3156
3157 /* get $disp_cp_xindir into %r11 */
3158 if (fitsIn32Bits((Addr)disp_cp_xindir)) {
3159 /* use a shorter encoding */
3160 /* movl sign-extend(disp_cp_xindir), %r11 */
3161 *p++ = 0x49;
3162 *p++ = 0xC7;
3163 *p++ = 0xC3;
3164 p = emit32(p, (UInt)(Addr)disp_cp_xindir);
3165 } else {
3166 /* movabsq $disp_cp_xindir, %r11 */
3167 *p++ = 0x49;
3168 *p++ = 0xBB;
3169 p = emit64(p, (Addr)disp_cp_xindir);
3170 }
3171
3172 /* jmp *%r11 */
3173 *p++ = 0x41;
3174 *p++ = 0xFF;
3175 *p++ = 0xE3;
3176
3177 /* Fix up the conditional jump, if there was one. */
3178 if (i->Ain.XIndir.cond != Acc_ALWAYS) {
3179 Int delta = p - ptmp;
3180 vassert(delta > 0 && delta < 40);
3181 *ptmp = toUChar(delta-1);
3182 }
3183 goto done;
3184 }
3185
3186 case Ain_XAssisted: {
3187 /* Use ptmp for backpatching conditional jumps. */
3188 ptmp = NULL;
3189
3190 /* First off, if this is conditional, create a conditional
3191 jump over the rest of it. */
3192 if (i->Ain.XAssisted.cond != Acc_ALWAYS) {
3193 /* jmp fwds if !condition */
3194 *p++ = toUChar(0x70 + (0xF & (i->Ain.XAssisted.cond ^ 1)));
3195 ptmp = p; /* fill in this bit later */
3196 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
3197 }
3198
3199 /* movq dstGA(a reg), amRIP -- copied from Alu64M MOV case */
3200 *p++ = rexAMode_M(i->Ain.XAssisted.dstGA, i->Ain.XAssisted.amRIP);
3201 *p++ = 0x89;
3202 p = doAMode_M(p, i->Ain.XAssisted.dstGA, i->Ain.XAssisted.amRIP);
3203 /* movl $magic_number, %ebp. Since these numbers are all small positive
3204 integers, we can get away with "movl $N, %ebp" rather than
3205 the longer "movq $N, %rbp". */
3206 UInt trcval = 0;
3207 switch (i->Ain.XAssisted.jk) {
3208 case Ijk_ClientReq: trcval = VEX_TRC_JMP_CLIENTREQ; break;
3209 case Ijk_Sys_syscall: trcval = VEX_TRC_JMP_SYS_SYSCALL; break;
3210 case Ijk_Sys_int32: trcval = VEX_TRC_JMP_SYS_INT32; break;
3211 case Ijk_Sys_int210: trcval = VEX_TRC_JMP_SYS_INT210; break;
3212 case Ijk_Yield: trcval = VEX_TRC_JMP_YIELD; break;
3213 case Ijk_EmWarn: trcval = VEX_TRC_JMP_EMWARN; break;
3214 case Ijk_MapFail: trcval = VEX_TRC_JMP_MAPFAIL; break;
3215 case Ijk_NoDecode: trcval = VEX_TRC_JMP_NODECODE; break;
3216 case Ijk_InvalICache: trcval = VEX_TRC_JMP_INVALICACHE; break;
3217 case Ijk_NoRedir: trcval = VEX_TRC_JMP_NOREDIR; break;
3218 case Ijk_SigTRAP: trcval = VEX_TRC_JMP_SIGTRAP; break;
3219 case Ijk_SigSEGV: trcval = VEX_TRC_JMP_SIGSEGV; break;
3220 case Ijk_Boring: trcval = VEX_TRC_JMP_BORING; break;
3221 /* We don't expect to see the following being assisted. */
3222 case Ijk_Ret:
3223 case Ijk_Call:
3224 /* fallthrough */
3225 default:
3226 ppIRJumpKind(i->Ain.XAssisted.jk);
3227 vpanic("emit_AMD64Instr.Ain_XAssisted: unexpected jump kind");
3228 }
3229 vassert(trcval != 0);
3230 *p++ = 0xBD;
3231 p = emit32(p, trcval);
3232 /* movabsq $disp_assisted, %r11 */
3233 *p++ = 0x49;
3234 *p++ = 0xBB;
3235 p = emit64(p, (Addr)disp_cp_xassisted);
3236 /* jmp *%r11 */
3237 *p++ = 0x41;
3238 *p++ = 0xFF;
3239 *p++ = 0xE3;
3240
3241 /* Fix up the conditional jump, if there was one. */
3242 if (i->Ain.XAssisted.cond != Acc_ALWAYS) {
3243 Int delta = p - ptmp;
3244 vassert(delta > 0 && delta < 40);
3245 *ptmp = toUChar(delta-1);
3246 }
3247 goto done;
3248 }
3249
3250 case Ain_CMov64:
3251 vassert(i->Ain.CMov64.cond != Acc_ALWAYS);
3252 *p++ = rexAMode_R(i->Ain.CMov64.dst, i->Ain.CMov64.src);
3253 *p++ = 0x0F;
3254 *p++ = toUChar(0x40 + (0xF & i->Ain.CMov64.cond));
3255 p = doAMode_R(p, i->Ain.CMov64.dst, i->Ain.CMov64.src);
3256 goto done;
3257
3258 case Ain_CLoad: {
3259 vassert(i->Ain.CLoad.cond != Acc_ALWAYS);
3260
3261 /* Only 32- or 64-bit variants are allowed. */
3262 vassert(i->Ain.CLoad.szB == 4 || i->Ain.CLoad.szB == 8);
3263
3264 /* Use ptmp for backpatching conditional jumps. */
3265 ptmp = NULL;
3266
3267 /* jmp fwds if !condition */
3268 *p++ = toUChar(0x70 + (0xF & (i->Ain.CLoad.cond ^ 1)));
3269 ptmp = p; /* fill in this bit later */
3270 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
3271
3272 /* Now the load. Either a normal 64 bit load or a normal 32 bit
3273 load, which, by the default zero-extension rule, zeroes out
3274 the upper half of the destination, as required. */
3275 rex = rexAMode_M(i->Ain.CLoad.dst, i->Ain.CLoad.addr);
3276 *p++ = i->Ain.CLoad.szB == 4 ? clearWBit(rex) : rex;
3277 *p++ = 0x8B;
3278 p = doAMode_M(p, i->Ain.CLoad.dst, i->Ain.CLoad.addr);
3279
3280 /* Fix up the conditional branch */
3281 Int delta = p - ptmp;
3282 vassert(delta > 0 && delta < 40);
3283 *ptmp = toUChar(delta-1);
3284 goto done;
3285 }
3286
3287 case Ain_CStore: {
3288 /* AFAICS this is identical to Ain_CLoad except that the opcode
3289 is 0x89 instead of 0x8B. */
3290 vassert(i->Ain.CStore.cond != Acc_ALWAYS);
3291
3292 /* Only 32- or 64-bit variants are allowed. */
3293 vassert(i->Ain.CStore.szB == 4 || i->Ain.CStore.szB == 8);
3294
3295 /* Use ptmp for backpatching conditional jumps. */
3296 ptmp = NULL;
3297
3298 /* jmp fwds if !condition */
3299 *p++ = toUChar(0x70 + (0xF & (i->Ain.CStore.cond ^ 1)));
3300 ptmp = p; /* fill in this bit later */
3301 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
3302
3303 /* Now the store. */
3304 rex = rexAMode_M(i->Ain.CStore.src, i->Ain.CStore.addr);
3305 *p++ = i->Ain.CStore.szB == 4 ? clearWBit(rex) : rex;
3306 *p++ = 0x89;
3307 p = doAMode_M(p, i->Ain.CStore.src, i->Ain.CStore.addr);
3308
3309 /* Fix up the conditional branch */
3310 Int delta = p - ptmp;
3311 vassert(delta > 0 && delta < 40);
3312 *ptmp = toUChar(delta-1);
3313 goto done;
3314 }
3315
3316 case Ain_MovxLQ:
3317 /* No, _don't_ ask me why the sense of the args has to be
3318 different in the S vs Z case. I don't know. */
3319 if (i->Ain.MovxLQ.syned) {
3320 /* Need REX.W = 1 here, but rexAMode_R does that for us. */
3321 *p++ = rexAMode_R(i->Ain.MovxLQ.dst, i->Ain.MovxLQ.src);
3322 *p++ = 0x63;
3323 p = doAMode_R(p, i->Ain.MovxLQ.dst, i->Ain.MovxLQ.src);
3324 } else {
3325 /* Produce a 32-bit reg-reg move, since the implicit
3326 zero-extend does what we want. */
3327 *p++ = clearWBit (
3328 rexAMode_R(i->Ain.MovxLQ.src, i->Ain.MovxLQ.dst));
3329 *p++ = 0x89;
3330 p = doAMode_R(p, i->Ain.MovxLQ.src, i->Ain.MovxLQ.dst);
3331 }
3332 goto done;
3333
3334 case Ain_LoadEX:
3335 if (i->Ain.LoadEX.szSmall == 1 && !i->Ain.LoadEX.syned) {
3336 /* movzbq */
3337 *p++ = rexAMode_M(i->Ain.LoadEX.dst, i->Ain.LoadEX.src);
3338 *p++ = 0x0F;
3339 *p++ = 0xB6;
3340 p = doAMode_M(p, i->Ain.LoadEX.dst, i->Ain.LoadEX.src);
3341 goto done;
3342 }
3343 if (i->Ain.LoadEX.szSmall == 2 && !i->Ain.LoadEX.syned) {
3344 /* movzwq */
3345 *p++ = rexAMode_M(i->Ain.LoadEX.dst, i->Ain.LoadEX.src);
3346 *p++ = 0x0F;
3347 *p++ = 0xB7;
3348 p = doAMode_M(p, i->Ain.LoadEX.dst, i->Ain.LoadEX.src);
3349 goto done;
3350 }
3351 if (i->Ain.LoadEX.szSmall == 4 && !i->Ain.LoadEX.syned) {
3352 /* movzlq */
3353 /* This isn't really an existing AMD64 instruction per se.
3354 Rather, we have to do a 32-bit load. Because a 32-bit
3355 write implicitly clears the upper 32 bits of the target
3356 register, we get what we want. */
3357 *p++ = clearWBit(
3358 rexAMode_M(i->Ain.LoadEX.dst, i->Ain.LoadEX.src));
3359 *p++ = 0x8B;
3360 p = doAMode_M(p, i->Ain.LoadEX.dst, i->Ain.LoadEX.src);
3361 goto done;
3362 }
3363 break;
3364
3365 case Ain_Set64:
3366 /* Make the destination register be 1 or 0, depending on whether
3367 the relevant condition holds. Complication: the top 56 bits
3368 of the destination should be forced to zero, but doing 'xorq
3369 %r,%r' kills the flag(s) we are about to read. Sigh. So
3370 start off my moving $0 into the dest. */
3371 reg = iregEnc3210(i->Ain.Set64.dst);
3372 vassert(reg < 16);
3373
3374 /* movq $0, %dst */
3375 *p++ = toUChar(reg >= 8 ? 0x49 : 0x48);
3376 *p++ = 0xC7;
3377 *p++ = toUChar(0xC0 + (reg & 7));
3378 p = emit32(p, 0);
3379
3380 /* setb lo8(%dst) */
3381 /* note, 8-bit register rex trickyness. Be careful here. */
3382 *p++ = toUChar(reg >= 8 ? 0x41 : 0x40);
3383 *p++ = 0x0F;
3384 *p++ = toUChar(0x90 + (0x0F & i->Ain.Set64.cond));
3385 *p++ = toUChar(0xC0 + (reg & 7));
3386 goto done;
3387
3388 case Ain_Bsfr64:
3389 *p++ = rexAMode_R(i->Ain.Bsfr64.dst, i->Ain.Bsfr64.src);
3390 *p++ = 0x0F;
3391 if (i->Ain.Bsfr64.isFwds) {
3392 *p++ = 0xBC;
3393 } else {
3394 *p++ = 0xBD;
3395 }
3396 p = doAMode_R(p, i->Ain.Bsfr64.dst, i->Ain.Bsfr64.src);
3397 goto done;
3398
3399 case Ain_MFence:
3400 /* mfence */
3401 *p++ = 0x0F; *p++ = 0xAE; *p++ = 0xF0;
3402 goto done;
3403
3404 case Ain_ACAS:
3405 /* lock */
3406 *p++ = 0xF0;
3407 if (i->Ain.ACAS.sz == 2) *p++ = 0x66;
3408 /* cmpxchg{b,w,l,q} %rbx,mem. Expected-value in %rax, new value
3409 in %rbx. The new-value register is hardwired to be %rbx
3410 since dealing with byte integer registers is too much hassle,
3411 so we force the register operand to %rbx (could equally be
3412 %rcx or %rdx). */
3413 rex = rexAMode_M( hregAMD64_RBX(), i->Ain.ACAS.addr );
3414 if (i->Ain.ACAS.sz != 8)
3415 rex = clearWBit(rex);
3416
3417 *p++ = rex; /* this can emit 0x40, which is pointless. oh well. */
3418 *p++ = 0x0F;
3419 if (i->Ain.ACAS.sz == 1) *p++ = 0xB0; else *p++ = 0xB1;
3420 p = doAMode_M(p, hregAMD64_RBX(), i->Ain.ACAS.addr);
3421 goto done;
3422
3423 case Ain_DACAS:
3424 /* lock */
3425 *p++ = 0xF0;
3426 /* cmpxchg{8,16}b m{64,128}. Expected-value in %rdx:%rax, new
3427 value in %rcx:%rbx. All 4 regs are hardwired in the ISA, so
3428 aren't encoded in the insn. */
3429 rex = rexAMode_M_enc(1, i->Ain.ACAS.addr );
3430 if (i->Ain.ACAS.sz != 8)
3431 rex = clearWBit(rex);
3432 *p++ = rex;
3433 *p++ = 0x0F;
3434 *p++ = 0xC7;
3435 p = doAMode_M_enc(p, 1, i->Ain.DACAS.addr);
3436 goto done;
3437
3438 case Ain_A87Free:
3439 vassert(i->Ain.A87Free.nregs > 0 && i->Ain.A87Free.nregs <= 7);
3440 for (j = 0; j < i->Ain.A87Free.nregs; j++) {
3441 p = do_ffree_st(p, 7-j);
3442 }
3443 goto done;
3444
3445 case Ain_A87PushPop:
3446 vassert(i->Ain.A87PushPop.szB == 8 || i->Ain.A87PushPop.szB == 4);
3447 if (i->Ain.A87PushPop.isPush) {
3448 /* Load from memory into %st(0): flds/fldl amode */
3449 *p++ = clearWBit(
3450 rexAMode_M_enc(0, i->Ain.A87PushPop.addr) );
3451 *p++ = i->Ain.A87PushPop.szB == 4 ? 0xD9 : 0xDD;
3452 p = doAMode_M_enc(p, 0/*subopcode*/, i->Ain.A87PushPop.addr);
3453 } else {
3454 /* Dump %st(0) to memory: fstps/fstpl amode */
3455 *p++ = clearWBit(
3456 rexAMode_M_enc(3, i->Ain.A87PushPop.addr) );
3457 *p++ = i->Ain.A87PushPop.szB == 4 ? 0xD9 : 0xDD;
3458 p = doAMode_M_enc(p, 3/*subopcode*/, i->Ain.A87PushPop.addr);
3459 goto done;
3460 }
3461 goto done;
3462
3463 case Ain_A87FpOp:
3464 switch (i->Ain.A87FpOp.op) {
3465 case Afp_SQRT: *p++ = 0xD9; *p++ = 0xFA; break;
3466 case Afp_SIN: *p++ = 0xD9; *p++ = 0xFE; break;
3467 case Afp_COS: *p++ = 0xD9; *p++ = 0xFF; break;
3468 case Afp_ROUND: *p++ = 0xD9; *p++ = 0xFC; break;
3469 case Afp_2XM1: *p++ = 0xD9; *p++ = 0xF0; break;
3470 case Afp_SCALE: *p++ = 0xD9; *p++ = 0xFD; break;
3471 case Afp_ATAN: *p++ = 0xD9; *p++ = 0xF3; break;
3472 case Afp_YL2X: *p++ = 0xD9; *p++ = 0xF1; break;
3473 case Afp_YL2XP1: *p++ = 0xD9; *p++ = 0xF9; break;
3474 case Afp_PREM: *p++ = 0xD9; *p++ = 0xF8; break;
3475 case Afp_PREM1: *p++ = 0xD9; *p++ = 0xF5; break;
3476 case Afp_TAN:
3477 /* fptan pushes 1.0 on the FP stack, except when the
3478 argument is out of range. Hence we have to do the
3479 instruction, then inspect C2 to see if there is an out
3480 of range condition. If there is, we skip the fincstp
3481 that is used by the in-range case to get rid of this
3482 extra 1.0 value. */
3483 *p++ = 0xD9; *p++ = 0xF2; // fptan
3484 *p++ = 0x50; // pushq %rax
3485 *p++ = 0xDF; *p++ = 0xE0; // fnstsw %ax
3486 *p++ = 0x66; *p++ = 0xA9;
3487 *p++ = 0x00; *p++ = 0x04; // testw $0x400,%ax
3488 *p++ = 0x75; *p++ = 0x02; // jnz after_fincstp
3489 *p++ = 0xD9; *p++ = 0xF7; // fincstp
3490 *p++ = 0x58; // after_fincstp: popq %rax
3491 break;
3492 default:
3493 goto bad;
3494 }
3495 goto done;
3496
3497 case Ain_A87LdCW:
3498 *p++ = clearWBit(
3499 rexAMode_M_enc(5, i->Ain.A87LdCW.addr) );
3500 *p++ = 0xD9;
3501 p = doAMode_M_enc(p, 5/*subopcode*/, i->Ain.A87LdCW.addr);
3502 goto done;
3503
3504 case Ain_A87StSW:
3505 *p++ = clearWBit(
3506 rexAMode_M_enc(7, i->Ain.A87StSW.addr) );
3507 *p++ = 0xDD;
3508 p = doAMode_M_enc(p, 7/*subopcode*/, i->Ain.A87StSW.addr);
3509 goto done;
3510
3511 case Ain_Store:
3512 if (i->Ain.Store.sz == 2) {
3513 /* This just goes to show the crazyness of the instruction
3514 set encoding. We have to insert two prefix bytes, but be
3515 careful to avoid a conflict in what the size should be, by
3516 ensuring that REX.W = 0. */
3517 *p++ = 0x66; /* override to 16-bits */
3518 *p++ = clearWBit( rexAMode_M( i->Ain.Store.src, i->Ain.Store.dst) );
3519 *p++ = 0x89;
3520 p = doAMode_M(p, i->Ain.Store.src, i->Ain.Store.dst);
3521 goto done;
3522 }
3523 if (i->Ain.Store.sz == 4) {
3524 *p++ = clearWBit( rexAMode_M( i->Ain.Store.src, i->Ain.Store.dst) );
3525 *p++ = 0x89;
3526 p = doAMode_M(p, i->Ain.Store.src, i->Ain.Store.dst);
3527 goto done;
3528 }
3529 if (i->Ain.Store.sz == 1) {
3530 /* This is one place where it would be wrong to skip emitting
3531 a rex byte of 0x40, since the mere presence of rex changes
3532 the meaning of the byte register access. Be careful. */
3533 *p++ = clearWBit( rexAMode_M( i->Ain.Store.src, i->Ain.Store.dst) );
3534 *p++ = 0x88;
3535 p = doAMode_M(p, i->Ain.Store.src, i->Ain.Store.dst);
3536 goto done;
3537 }
3538 break;
3539
3540 case Ain_LdMXCSR:
3541 *p++ = clearWBit(rexAMode_M_enc(0, i->Ain.LdMXCSR.addr));
3542 *p++ = 0x0F;
3543 *p++ = 0xAE;
3544 p = doAMode_M_enc(p, 2/*subopcode*/, i->Ain.LdMXCSR.addr);
3545 goto done;
3546
3547 case Ain_SseUComIS:
3548 /* ucomi[sd] %srcL, %srcR ; pushfq ; popq %dst */
3549 /* ucomi[sd] %srcL, %srcR */
3550 if (i->Ain.SseUComIS.sz == 8) {
3551 *p++ = 0x66;
3552 } else {
3553 goto bad;
3554 vassert(i->Ain.SseUComIS.sz == 4);
3555 }
3556 *p++ = clearWBit (
3557 rexAMode_R_enc_enc( vregEnc3210(i->Ain.SseUComIS.srcL),
3558 vregEnc3210(i->Ain.SseUComIS.srcR) ));
3559 *p++ = 0x0F;
3560 *p++ = 0x2E;
3561 p = doAMode_R_enc_enc(p, vregEnc3210(i->Ain.SseUComIS.srcL),
3562 vregEnc3210(i->Ain.SseUComIS.srcR) );
3563 /* pushfq */
3564 *p++ = 0x9C;
3565 /* popq %dst */
3566 *p++ = toUChar(0x40 + (1 & iregEnc3(i->Ain.SseUComIS.dst)));
3567 *p++ = toUChar(0x58 + iregEnc210(i->Ain.SseUComIS.dst));
3568 goto done;
3569
3570 case Ain_SseSI2SF:
3571 /* cvssi2s[sd] %src, %dst */
3572 rex = rexAMode_R_enc_reg( vregEnc3210(i->Ain.SseSI2SF.dst),
3573 i->Ain.SseSI2SF.src );
3574 *p++ = toUChar(i->Ain.SseSI2SF.szD==4 ? 0xF3 : 0xF2);
3575 *p++ = toUChar(i->Ain.SseSI2SF.szS==4 ? clearWBit(rex) : rex);
3576 *p++ = 0x0F;
3577 *p++ = 0x2A;
3578 p = doAMode_R_enc_reg( p, vregEnc3210(i->Ain.SseSI2SF.dst),
3579 i->Ain.SseSI2SF.src );
3580 goto done;
3581
3582 case Ain_SseSF2SI:
3583 /* cvss[sd]2si %src, %dst */
3584 rex = rexAMode_R_reg_enc( i->Ain.SseSF2SI.dst,
3585 vregEnc3210(i->Ain.SseSF2SI.src) );
3586 *p++ = toUChar(i->Ain.SseSF2SI.szS==4 ? 0xF3 : 0xF2);
3587 *p++ = toUChar(i->Ain.SseSF2SI.szD==4 ? clearWBit(rex) : rex);
3588 *p++ = 0x0F;
3589 *p++ = 0x2D;
3590 p = doAMode_R_reg_enc( p, i->Ain.SseSF2SI.dst,
3591 vregEnc3210(i->Ain.SseSF2SI.src) );
3592 goto done;
3593
3594 case Ain_SseSDSS:
3595 /* cvtsd2ss/cvtss2sd %src, %dst */
3596 *p++ = toUChar(i->Ain.SseSDSS.from64 ? 0xF2 : 0xF3);
3597 *p++ = clearWBit(
3598 rexAMode_R_enc_enc( vregEnc3210(i->Ain.SseSDSS.dst),
3599 vregEnc3210(i->Ain.SseSDSS.src) ));
3600 *p++ = 0x0F;
3601 *p++ = 0x5A;
3602 p = doAMode_R_enc_enc( p, vregEnc3210(i->Ain.SseSDSS.dst),
3603 vregEnc3210(i->Ain.SseSDSS.src) );
3604 goto done;
3605
3606 case Ain_SseLdSt:
3607 if (i->Ain.SseLdSt.sz == 8) {
3608 *p++ = 0xF2;
3609 } else
3610 if (i->Ain.SseLdSt.sz == 4) {
3611 *p++ = 0xF3;
3612 } else
3613 if (i->Ain.SseLdSt.sz != 16) {
3614 vassert(0);
3615 }
3616 *p++ = clearWBit(
3617 rexAMode_M_enc(vregEnc3210(i->Ain.SseLdSt.reg),
3618 i->Ain.SseLdSt.addr));
3619 *p++ = 0x0F;
3620 *p++ = toUChar(i->Ain.SseLdSt.isLoad ? 0x10 : 0x11);
3621 p = doAMode_M_enc(p, vregEnc3210(i->Ain.SseLdSt.reg),
3622 i->Ain.SseLdSt.addr);
3623 goto done;
3624
3625 case Ain_SseCStore: {
3626 vassert(i->Ain.SseCStore.cond != Acc_ALWAYS);
3627
3628 /* Use ptmp for backpatching conditional jumps. */
3629 ptmp = NULL;
3630
3631 /* jmp fwds if !condition */
3632 *p++ = toUChar(0x70 + (0xF & (i->Ain.SseCStore.cond ^ 1)));
3633 ptmp = p; /* fill in this bit later */
3634 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
3635
3636 /* Now the store. */
3637 *p++ = clearWBit(
3638 rexAMode_M_enc(vregEnc3210(i->Ain.SseCStore.src),
3639 i->Ain.SseCStore.addr));
3640 *p++ = 0x0F;
3641 *p++ = toUChar(0x11);
3642 p = doAMode_M_enc(p, vregEnc3210(i->Ain.SseCStore.src),
3643 i->Ain.SseCStore.addr);
3644
3645 /* Fix up the conditional branch */
3646 Int delta = p - ptmp;
3647 vassert(delta > 0 && delta < 40);
3648 *ptmp = toUChar(delta-1);
3649 goto done;
3650 }
3651
3652 case Ain_SseCLoad: {
3653 vassert(i->Ain.SseCLoad.cond != Acc_ALWAYS);
3654
3655 /* Use ptmp for backpatching conditional jumps. */
3656 ptmp = NULL;
3657
3658 /* jmp fwds if !condition */
3659 *p++ = toUChar(0x70 + (0xF & (i->Ain.SseCLoad.cond ^ 1)));
3660 ptmp = p; /* fill in this bit later */
3661 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
3662
3663 /* Now the load. */
3664 *p++ = clearWBit(
3665 rexAMode_M_enc(vregEnc3210(i->Ain.SseCLoad.dst),
3666 i->Ain.SseCLoad.addr));
3667 *p++ = 0x0F;
3668 *p++ = toUChar(0x10);
3669 p = doAMode_M_enc(p, vregEnc3210(i->Ain.SseCLoad.dst),
3670 i->Ain.SseCLoad.addr);
3671
3672 /* Fix up the conditional branch */
3673 Int delta = p - ptmp;
3674 vassert(delta > 0 && delta < 40);
3675 *ptmp = toUChar(delta-1);
3676 goto done;
3677 }
3678
3679 case Ain_SseLdzLO:
3680 vassert(i->Ain.SseLdzLO.sz == 4 || i->Ain.SseLdzLO.sz == 8);
3681 /* movs[sd] amode, %xmm-dst */
3682 *p++ = toUChar(i->Ain.SseLdzLO.sz==4 ? 0xF3 : 0xF2);
3683 *p++ = clearWBit(
3684 rexAMode_M_enc(vregEnc3210(i->Ain.SseLdzLO.reg),
3685 i->Ain.SseLdzLO.addr));
3686 *p++ = 0x0F;
3687 *p++ = 0x10;
3688 p = doAMode_M_enc(p, vregEnc3210(i->Ain.SseLdzLO.reg),
3689 i->Ain.SseLdzLO.addr);
3690 goto done;
3691
3692 case Ain_Sse32Fx4:
3693 xtra = 0;
3694 switch (i->Ain.Sse32Fx4.op) {
3695 case Asse_F2I: *p++ = 0x66; break;
3696 default: break;
3697 }
3698 *p++ = clearWBit(
3699 rexAMode_R_enc_enc( vregEnc3210(i->Ain.Sse32Fx4.dst),
3700 vregEnc3210(i->Ain.Sse32Fx4.src) ));
3701 *p++ = 0x0F;
3702 switch (i->Ain.Sse32Fx4.op) {
3703 case Asse_ADDF: *p++ = 0x58; break;
3704 case Asse_DIVF: *p++ = 0x5E; break;
3705 case Asse_MAXF: *p++ = 0x5F; break;
3706 case Asse_MINF: *p++ = 0x5D; break;
3707 case Asse_MULF: *p++ = 0x59; break;
3708 case Asse_RCPF: *p++ = 0x53; break;
3709 case Asse_RSQRTF: *p++ = 0x52; break;
3710 case Asse_SQRTF: *p++ = 0x51; break;
3711 case Asse_I2F: *p++ = 0x5B; break; // cvtdq2ps; no 0x66 pfx
3712 case Asse_F2I: *p++ = 0x5B; break; // cvtps2dq; with 0x66 pfx
3713 case Asse_SUBF: *p++ = 0x5C; break;
3714 case Asse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
3715 case Asse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
3716 case Asse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
3717 case Asse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
3718 default: goto bad;
3719 }
3720 p = doAMode_R_enc_enc(p, vregEnc3210(i->Ain.Sse32Fx4.dst),
3721 vregEnc3210(i->Ain.Sse32Fx4.src) );
3722 if (xtra & 0x100)
3723 *p++ = toUChar(xtra & 0xFF);
3724 goto done;
3725
3726 case Ain_Sse64Fx2:
3727 xtra = 0;
3728 *p++ = 0x66;
3729 *p++ = clearWBit(
3730 rexAMode_R_enc_enc( vregEnc3210(i->Ain.Sse64Fx2.dst),
3731 vregEnc3210(i->Ain.Sse64Fx2.src) ));
3732 *p++ = 0x0F;
3733 switch (i->Ain.Sse64Fx2.op) {
3734 case Asse_ADDF: *p++ = 0x58; break;
3735 case Asse_DIVF: *p++ = 0x5E; break;
3736 case Asse_MAXF: *p++ = 0x5F; break;
3737 case Asse_MINF: *p++ = 0x5D; break;
3738 case Asse_MULF: *p++ = 0x59; break;
3739 case Asse_SQRTF: *p++ = 0x51; break;
3740 case Asse_SUBF: *p++ = 0x5C; break;
3741 case Asse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
3742 case Asse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
3743 case Asse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
3744 case Asse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
3745 default: goto bad;
3746 }
3747 p = doAMode_R_enc_enc(p, vregEnc3210(i->Ain.Sse64Fx2.dst),
3748 vregEnc3210(i->Ain.Sse64Fx2.src) );
3749 if (xtra & 0x100)
3750 *p++ = toUChar(xtra & 0xFF);
3751 goto done;
3752
3753 case Ain_Sse32FLo:
3754 xtra = 0;
3755 *p++ = 0xF3;
3756 *p++ = clearWBit(
3757 rexAMode_R_enc_enc( vregEnc3210(i->Ain.Sse32FLo.dst),
3758 vregEnc3210(i->Ain.Sse32FLo.src) ));
3759 *p++ = 0x0F;
3760 switch (i->Ain.Sse32FLo.op) {
3761 case Asse_ADDF: *p++ = 0x58; break;
3762 case Asse_DIVF: *p++ = 0x5E; break;
3763 case Asse_MAXF: *p++ = 0x5F; break;
3764 case Asse_MINF: *p++ = 0x5D; break;
3765 case Asse_MULF: *p++ = 0x59; break;
3766 case Asse_RCPF: *p++ = 0x53; break;
3767 case Asse_RSQRTF: *p++ = 0x52; break;
3768 case Asse_SQRTF: *p++ = 0x51; break;
3769 case Asse_SUBF: *p++ = 0x5C; break;
3770 case Asse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
3771 case Asse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
3772 case Asse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
3773 case Asse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
3774 default: goto bad;
3775 }
3776 p = doAMode_R_enc_enc(p, vregEnc3210(i->Ain.Sse32FLo.dst),
3777 vregEnc3210(i->Ain.Sse32FLo.src) );
3778 if (xtra & 0x100)
3779 *p++ = toUChar(xtra & 0xFF);
3780 goto done;
3781
3782 case Ain_Sse64FLo:
3783 xtra = 0;
3784 *p++ = 0xF2;
3785 *p++ = clearWBit(
3786 rexAMode_R_enc_enc( vregEnc3210(i->Ain.Sse64FLo.dst),
3787 vregEnc3210(i->Ain.Sse64FLo.src) ));
3788 *p++ = 0x0F;
3789 switch (i->Ain.Sse64FLo.op) {
3790 case Asse_ADDF: *p++ = 0x58; break;
3791 case Asse_DIVF: *p++ = 0x5E; break;
3792 case Asse_MAXF: *p++ = 0x5F; break;
3793 case Asse_MINF: *p++ = 0x5D; break;
3794 case Asse_MULF: *p++ = 0x59; break;
3795 case Asse_SQRTF: *p++ = 0x51; break;
3796 case Asse_SUBF: *p++ = 0x5C; break;
3797 case Asse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
3798 case Asse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
3799 case Asse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
3800 case Asse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
3801 default: goto bad;
3802 }
3803 p = doAMode_R_enc_enc(p, vregEnc3210(i->Ain.Sse64FLo.dst),
3804 vregEnc3210(i->Ain.Sse64FLo.src) );
3805 if (xtra & 0x100)
3806 *p++ = toUChar(xtra & 0xFF);
3807 goto done;
3808
3809 case Ain_SseReRg:
3810 # define XX(_n) *p++ = (_n)
3811
3812 rex = clearWBit(
3813 rexAMode_R_enc_enc( vregEnc3210(i->Ain.SseReRg.dst),
3814 vregEnc3210(i->Ain.SseReRg.src) ));
3815
3816 switch (i->Ain.SseReRg.op) {
3817 case Asse_MOV: /*movups*/ XX(rex); XX(0x0F); XX(0x10); break;
3818 case Asse_OR: XX(rex); XX(0x0F); XX(0x56); break;
3819 case Asse_XOR: XX(rex); XX(0x0F); XX(0x57); break;
3820 case Asse_AND: XX(rex); XX(0x0F); XX(0x54); break;
3821 case Asse_ANDN: XX(rex); XX(0x0F); XX(0x55); break;
3822 case Asse_PACKSSD: XX(0x66); XX(rex); XX(0x0F); XX(0x6B); break;
3823 case Asse_PACKSSW: XX(0x66); XX(rex); XX(0x0F); XX(0x63); break;
3824 case Asse_PACKUSW: XX(0x66); XX(rex); XX(0x0F); XX(0x67); break;
3825 case Asse_ADD8: XX(0x66); XX(rex); XX(0x0F); XX(0xFC); break;
3826 case Asse_ADD16: XX(0x66); XX(rex); XX(0x0F); XX(0xFD); break;
3827 case Asse_ADD32: XX(0x66); XX(rex); XX(0x0F); XX(0xFE); break;
3828 case Asse_ADD64: XX(0x66); XX(rex); XX(0x0F); XX(0xD4); break;
3829 case Asse_QADD8S: XX(0x66); XX(rex); XX(0x0F); XX(0xEC); break;
3830 case Asse_QADD16S: XX(0x66); XX(rex); XX(0x0F); XX(0xED); break;
3831 case Asse_QADD8U: XX(0x66); XX(rex); XX(0x0F); XX(0xDC); break;
3832 case Asse_QADD16U: XX(0x66); XX(rex); XX(0x0F); XX(0xDD); break;
3833 case Asse_AVG8U: XX(0x66); XX(rex); XX(0x0F); XX(0xE0); break;
3834 case Asse_AVG16U: XX(0x66); XX(rex); XX(0x0F); XX(0xE3); break;
3835 case Asse_CMPEQ8: XX(0x66); XX(rex); XX(0x0F); XX(0x74); break;
3836 case Asse_CMPEQ16: XX(0x66); XX(rex); XX(0x0F); XX(0x75); break;
3837 case Asse_CMPEQ32: XX(0x66); XX(rex); XX(0x0F); XX(0x76); break;
3838 case Asse_CMPGT8S: XX(0x66); XX(rex); XX(0x0F); XX(0x64); break;
3839 case Asse_CMPGT16S: XX(0x66); XX(rex); XX(0x0F); XX(0x65); break;
3840 case Asse_CMPGT32S: XX(0x66); XX(rex); XX(0x0F); XX(0x66); break;
3841 case Asse_MAX16S: XX(0x66); XX(rex); XX(0x0F); XX(0xEE); break;
3842 case Asse_MAX8U: XX(0x66); XX(rex); XX(0x0F); XX(0xDE); break;
3843 case Asse_MIN16S: XX(0x66); XX(rex); XX(0x0F); XX(0xEA); break;
3844 case Asse_MIN8U: XX(0x66); XX(rex); XX(0x0F); XX(0xDA); break;
3845 case Asse_MULHI16U: XX(0x66); XX(rex); XX(0x0F); XX(0xE4); break;
3846 case Asse_MULHI16S: XX(0x66); XX(rex); XX(0x0F); XX(0xE5); break;
3847 case Asse_MUL16: XX(0x66); XX(rex); XX(0x0F); XX(0xD5); break;
3848 case Asse_SHL16: XX(0x66); XX(rex); XX(0x0F); XX(0xF1); break;
3849 case Asse_SHL32: XX(0x66); XX(rex); XX(0x0F); XX(0xF2); break;
3850 case Asse_SHL64: XX(0x66); XX(rex); XX(0x0F); XX(0xF3); break;
3851 case Asse_SAR16: XX(0x66); XX(rex); XX(0x0F); XX(0xE1); break;
3852 case Asse_SAR32: XX(0x66); XX(rex); XX(0x0F); XX(0xE2); break;
3853 case Asse_SHR16: XX(0x66); XX(rex); XX(0x0F); XX(0xD1); break;
3854 case Asse_SHR32: XX(0x66); XX(rex); XX(0x0F); XX(0xD2); break;
3855 case Asse_SHR64: XX(0x66); XX(rex); XX(0x0F); XX(0xD3); break;
3856 case Asse_SUB8: XX(0x66); XX(rex); XX(0x0F); XX(0xF8); break;
3857 case Asse_SUB16: XX(0x66); XX(rex); XX(0x0F); XX(0xF9); break;
3858 case Asse_SUB32: XX(0x66); XX(rex); XX(0x0F); XX(0xFA); break;
3859 case Asse_SUB64: XX(0x66); XX(rex); XX(0x0F); XX(0xFB); break;
3860 case Asse_QSUB8S: XX(0x66); XX(rex); XX(0x0F); XX(0xE8); break;
3861 case Asse_QSUB16S: XX(0x66); XX(rex); XX(0x0F); XX(0xE9); break;
3862 case Asse_QSUB8U: XX(0x66); XX(rex); XX(0x0F); XX(0xD8); break;
3863 case Asse_QSUB16U: XX(0x66); XX(rex); XX(0x0F); XX(0xD9); break;
3864 case Asse_UNPCKHB: XX(0x66); XX(rex); XX(0x0F); XX(0x68); break;
3865 case Asse_UNPCKHW: XX(0x66); XX(rex); XX(0x0F); XX(0x69); break;
3866 case Asse_UNPCKHD: XX(0x66); XX(rex); XX(0x0F); XX(0x6A); break;
3867 case Asse_UNPCKHQ: XX(0x66); XX(rex); XX(0x0F); XX(0x6D); break;
3868 case Asse_UNPCKLB: XX(0x66); XX(rex); XX(0x0F); XX(0x60); break;
3869 case Asse_UNPCKLW: XX(0x66); XX(rex); XX(0x0F); XX(0x61); break;
3870 case Asse_UNPCKLD: XX(0x66); XX(rex); XX(0x0F); XX(0x62); break;
3871 case Asse_UNPCKLQ: XX(0x66); XX(rex); XX(0x0F); XX(0x6C); break;
3872 case Asse_PSHUFB: XX(0x66); XX(rex);
3873 XX(0x0F); XX(0x38); XX(0x00); break;
3874 default: goto bad;
3875 }
3876 p = doAMode_R_enc_enc(p, vregEnc3210(i->Ain.SseReRg.dst),
3877 vregEnc3210(i->Ain.SseReRg.src) );
3878 # undef XX
3879 goto done;
3880
3881 case Ain_SseCMov:
3882 /* jmp fwds if !condition */
3883 *p++ = toUChar(0x70 + (i->Ain.SseCMov.cond ^ 1));
3884 *p++ = 0; /* # of bytes in the next bit, which we don't know yet */
3885 ptmp = p;
3886
3887 /* movaps %src, %dst */
3888 *p++ = clearWBit(
3889 rexAMode_R_enc_enc( vregEnc3210(i->Ain.SseCMov.dst),
3890 vregEnc3210(i->Ain.SseCMov.src) ));
3891 *p++ = 0x0F;
3892 *p++ = 0x28;
3893 p = doAMode_R_enc_enc(p, vregEnc3210(i->Ain.SseCMov.dst),
3894 vregEnc3210(i->Ain.SseCMov.src) );
3895
3896 /* Fill in the jump offset. */
3897 *(ptmp-1) = toUChar(p - ptmp);
3898 goto done;
3899
3900 case Ain_SseShuf:
3901 *p++ = 0x66;
3902 *p++ = clearWBit(
3903 rexAMode_R_enc_enc( vregEnc3210(i->Ain.SseShuf.dst),
3904 vregEnc3210(i->Ain.SseShuf.src) ));
3905 *p++ = 0x0F;
3906 *p++ = 0x70;
3907 p = doAMode_R_enc_enc(p, vregEnc3210(i->Ain.SseShuf.dst),
3908 vregEnc3210(i->Ain.SseShuf.src) );
3909 *p++ = (UChar)(i->Ain.SseShuf.order);
3910 goto done;
3911
3912 case Ain_SseShiftN: {
3913 opc = 0; // invalid
3914 subopc_imm = 0; // invalid
3915 UInt limit = 0;
3916 UInt shiftImm = i->Ain.SseShiftN.shiftBits;
3917 switch (i->Ain.SseShiftN.op) {
3918 case Asse_SHL16: limit = 15; opc = 0x71; subopc_imm = 6; break;
3919 case Asse_SHL32: limit = 31; opc = 0x72; subopc_imm = 6; break;
3920 case Asse_SHL64: limit = 63; opc = 0x73; subopc_imm = 6; break;
3921 case Asse_SAR16: limit = 15; opc = 0x71; subopc_imm = 4; break;
3922 case Asse_SAR32: limit = 31; opc = 0x72; subopc_imm = 4; break;
3923 case Asse_SHR16: limit = 15; opc = 0x71; subopc_imm = 2; break;
3924 case Asse_SHR32: limit = 31; opc = 0x72; subopc_imm = 2; break;
3925 case Asse_SHR64: limit = 63; opc = 0x73; subopc_imm = 2; break;
3926 case Asse_SHL128:
3927 if ((shiftImm & 7) != 0) goto bad;
3928 shiftImm >>= 3;
3929 limit = 15; opc = 0x73; subopc_imm = 7;
3930 break;
3931 case Asse_SHR128:
3932 if ((shiftImm & 7) != 0) goto bad;
3933 shiftImm >>= 3;
3934 limit = 15; opc = 0x73; subopc_imm = 3;
3935 break;
3936 default:
3937 // This should never happen .. SSE2 only offers the above 10 insns
3938 // for the "shift with immediate" case
3939 goto bad;
3940 }
3941 vassert(limit > 0 && opc > 0 && subopc_imm > 0);
3942 if (shiftImm > limit) goto bad;
3943 *p++ = 0x66;
3944 *p++ = clearWBit(
3945 rexAMode_R_enc_enc( subopc_imm,
3946 vregEnc3210(i->Ain.SseShiftN.dst) ));
3947 *p++ = 0x0F;
3948 *p++ = opc;
3949 p = doAMode_R_enc_enc(p, subopc_imm, vregEnc3210(i->Ain.SseShiftN.dst));
3950 *p++ = shiftImm;
3951 goto done;
3952 }
3953
3954 case Ain_SseMOVQ: {
3955 Bool toXMM = i->Ain.SseMOVQ.toXMM;
3956 HReg gpr = i->Ain.SseMOVQ.gpr;
3957 HReg xmm = i->Ain.SseMOVQ.xmm;
3958 *p++ = 0x66;
3959 *p++ = setWBit( rexAMode_R_enc_enc( vregEnc3210(xmm), iregEnc3210(gpr)) );
3960 *p++ = 0x0F;
3961 *p++ = toXMM ? 0x6E : 0x7E;
3962 p = doAMode_R_enc_enc( p, vregEnc3210(xmm), iregEnc3210(gpr) );
3963 goto done;
3964 }
3965
3966 //uu case Ain_AvxLdSt: {
3967 //uu UInt vex = vexAMode_M( dvreg2ireg(i->Ain.AvxLdSt.reg),
3968 //uu i->Ain.AvxLdSt.addr );
3969 //uu p = emitVexPrefix(p, vex);
3970 //uu *p++ = toUChar(i->Ain.AvxLdSt.isLoad ? 0x10 : 0x11);
3971 //uu p = doAMode_M(p, dvreg2ireg(i->Ain.AvxLdSt.reg), i->Ain.AvxLdSt.addr);
3972 //uu goto done;
3973 //uu }
3974
3975 case Ain_EvCheck: {
3976 /* We generate:
3977 (3 bytes) decl 8(%rbp) 8 == offsetof(host_EvC_COUNTER)
3978 (2 bytes) jns nofail expected taken
3979 (3 bytes) jmp* 0(%rbp) 0 == offsetof(host_EvC_FAILADDR)
3980 nofail:
3981 */
3982 /* This is heavily asserted re instruction lengths. It needs to
3983 be. If we get given unexpected forms of .amCounter or
3984 .amFailAddr -- basically, anything that's not of the form
3985 uimm7(%rbp) -- they are likely to fail. */
3986 /* Note also that after the decl we must be very careful not to
3987 read the carry flag, else we get a partial flags stall.
3988 js/jns avoids that, though. */
3989 UChar* p0 = p;
3990 /* --- decl 8(%rbp) --- */
3991 /* Need to compute the REX byte for the decl in order to prove
3992 that we don't need it, since this is a 32-bit inc and all
3993 registers involved in the amode are < r8. "1" because
3994 there's no register in this encoding; instead the register
3995 field is used as a sub opcode. The encoding for "decl r/m32"
3996 is FF /1, hence the "1". */
3997 rex = clearWBit(rexAMode_M_enc(1, i->Ain.EvCheck.amCounter));
3998 if (rex != 0x40) goto bad; /* We don't expect to need the REX byte. */
3999 *p++ = 0xFF;
4000 p = doAMode_M_enc(p, 1, i->Ain.EvCheck.amCounter);
4001 vassert(p - p0 == 3);
4002 /* --- jns nofail --- */
4003 *p++ = 0x79;
4004 *p++ = 0x03; /* need to check this 0x03 after the next insn */
4005 vassert(p - p0 == 5);
4006 /* --- jmp* 0(%rbp) --- */
4007 /* Once again, verify we don't need REX. The encoding is FF /4.
4008 We don't need REX.W since by default FF /4 in 64-bit mode
4009 implies a 64 bit load. */
4010 rex = clearWBit(rexAMode_M_enc(4, i->Ain.EvCheck.amFailAddr));
4011 if (rex != 0x40) goto bad;
4012 *p++ = 0xFF;
4013 p = doAMode_M_enc(p, 4, i->Ain.EvCheck.amFailAddr);
4014 vassert(p - p0 == 8); /* also ensures that 0x03 offset above is ok */
4015 /* And crosscheck .. */
4016 vassert(evCheckSzB_AMD64() == 8);
4017 goto done;
4018 }
4019
4020 case Ain_ProfInc: {
4021 /* We generate movabsq $0, %r11
4022 incq (%r11)
4023 in the expectation that a later call to LibVEX_patchProfCtr
4024 will be used to fill in the immediate field once the right
4025 value is known.
4026 49 BB 00 00 00 00 00 00 00 00
4027 49 FF 03
4028 */
4029 *p++ = 0x49; *p++ = 0xBB;
4030 *p++ = 0x00; *p++ = 0x00; *p++ = 0x00; *p++ = 0x00;
4031 *p++ = 0x00; *p++ = 0x00; *p++ = 0x00; *p++ = 0x00;
4032 *p++ = 0x49; *p++ = 0xFF; *p++ = 0x03;
4033 /* Tell the caller .. */
4034 vassert(!(*is_profInc));
4035 *is_profInc = True;
4036 goto done;
4037 }
4038
4039 default:
4040 goto bad;
4041 }
4042
4043 bad:
4044 ppAMD64Instr(i, mode64);
4045 vpanic("emit_AMD64Instr");
4046 /*NOTREACHED*/
4047
4048 done:
4049 vassert(p - &buf[0] <= 64);
4050 return p - &buf[0];
4051 }
4052
4053
4054 /* How big is an event check? See case for Ain_EvCheck in
4055 emit_AMD64Instr just above. That crosschecks what this returns, so
4056 we can tell if we're inconsistent. */
evCheckSzB_AMD64(void)4057 Int evCheckSzB_AMD64 (void)
4058 {
4059 return 8;
4060 }
4061
4062
4063 /* NB: what goes on here has to be very closely coordinated with the
4064 emitInstr case for XDirect, above. */
chainXDirect_AMD64(VexEndness endness_host,void * place_to_chain,const void * disp_cp_chain_me_EXPECTED,const void * place_to_jump_to)4065 VexInvalRange chainXDirect_AMD64 ( VexEndness endness_host,
4066 void* place_to_chain,
4067 const void* disp_cp_chain_me_EXPECTED,
4068 const void* place_to_jump_to )
4069 {
4070 vassert(endness_host == VexEndnessLE);
4071
4072 /* What we're expecting to see is:
4073 movabsq $disp_cp_chain_me_EXPECTED, %r11
4074 call *%r11
4075 viz
4076 49 BB <8 bytes value == disp_cp_chain_me_EXPECTED>
4077 41 FF D3
4078 */
4079 UChar* p = (UChar*)place_to_chain;
4080 vassert(p[0] == 0x49);
4081 vassert(p[1] == 0xBB);
4082 vassert(read_misaligned_ULong_LE(&p[2]) == (Addr)disp_cp_chain_me_EXPECTED);
4083 vassert(p[10] == 0x41);
4084 vassert(p[11] == 0xFF);
4085 vassert(p[12] == 0xD3);
4086 /* And what we want to change it to is either:
4087 (general case):
4088 movabsq $place_to_jump_to, %r11
4089 jmpq *%r11
4090 viz
4091 49 BB <8 bytes value == place_to_jump_to>
4092 41 FF E3
4093 So it's the same length (convenient, huh) and we don't
4094 need to change all the bits.
4095 ---OR---
4096 in the case where the displacement falls within 32 bits
4097 jmpq disp32 where disp32 is relative to the next insn
4098 ud2; ud2; ud2; ud2
4099 viz
4100 E9 <4 bytes == disp32>
4101 0F 0B 0F 0B 0F 0B 0F 0B
4102
4103 In both cases the replacement has the same length as the original.
4104 To remain sane & verifiable,
4105 (1) limit the displacement for the short form to
4106 (say) +/- one billion, so as to avoid wraparound
4107 off-by-ones
4108 (2) even if the short form is applicable, once every (say)
4109 1024 times use the long form anyway, so as to maintain
4110 verifiability
4111 */
4112 /* This is the delta we need to put into a JMP d32 insn. It's
4113 relative to the start of the next insn, hence the -5. */
4114 Long delta = (Long)((const UChar *)place_to_jump_to - (const UChar*)p) - 5;
4115 Bool shortOK = delta >= -1000*1000*1000 && delta < 1000*1000*1000;
4116
4117 static UInt shortCTR = 0; /* DO NOT MAKE NON-STATIC */
4118 if (shortOK) {
4119 shortCTR++; // thread safety bleh
4120 if (0 == (shortCTR & 0x3FF)) {
4121 shortOK = False;
4122 if (0)
4123 vex_printf("QQQ chainXDirect_AMD64: shortCTR = %u, "
4124 "using long jmp\n", shortCTR);
4125 }
4126 }
4127
4128 /* And make the modifications. */
4129 if (shortOK) {
4130 p[0] = 0xE9;
4131 write_misaligned_UInt_LE(&p[1], (UInt)(Int)delta);
4132 p[5] = 0x0F; p[6] = 0x0B;
4133 p[7] = 0x0F; p[8] = 0x0B;
4134 p[9] = 0x0F; p[10] = 0x0B;
4135 p[11] = 0x0F; p[12] = 0x0B;
4136 /* sanity check on the delta -- top 32 are all 0 or all 1 */
4137 delta >>= 32;
4138 vassert(delta == 0LL || delta == -1LL);
4139 } else {
4140 /* Minimal modifications from the starting sequence. */
4141 write_misaligned_ULong_LE(&p[2], (ULong)(Addr)place_to_jump_to);
4142 p[12] = 0xE3;
4143 }
4144 VexInvalRange vir = { (HWord)place_to_chain, 13 };
4145 return vir;
4146 }
4147
4148
4149 /* NB: what goes on here has to be very closely coordinated with the
4150 emitInstr case for XDirect, above. */
unchainXDirect_AMD64(VexEndness endness_host,void * place_to_unchain,const void * place_to_jump_to_EXPECTED,const void * disp_cp_chain_me)4151 VexInvalRange unchainXDirect_AMD64 ( VexEndness endness_host,
4152 void* place_to_unchain,
4153 const void* place_to_jump_to_EXPECTED,
4154 const void* disp_cp_chain_me )
4155 {
4156 vassert(endness_host == VexEndnessLE);
4157
4158 /* What we're expecting to see is either:
4159 (general case)
4160 movabsq $place_to_jump_to_EXPECTED, %r11
4161 jmpq *%r11
4162 viz
4163 49 BB <8 bytes value == place_to_jump_to_EXPECTED>
4164 41 FF E3
4165 ---OR---
4166 in the case where the displacement falls within 32 bits
4167 jmpq d32
4168 ud2; ud2; ud2; ud2
4169 viz
4170 E9 <4 bytes == disp32>
4171 0F 0B 0F 0B 0F 0B 0F 0B
4172 */
4173 UChar* p = (UChar*)place_to_unchain;
4174 Bool valid = False;
4175 if (p[0] == 0x49 && p[1] == 0xBB
4176 && read_misaligned_ULong_LE(&p[2])
4177 == (ULong)(Addr)place_to_jump_to_EXPECTED
4178 && p[10] == 0x41 && p[11] == 0xFF && p[12] == 0xE3) {
4179 /* it's the long form */
4180 valid = True;
4181 }
4182 else
4183 if (p[0] == 0xE9
4184 && p[5] == 0x0F && p[6] == 0x0B
4185 && p[7] == 0x0F && p[8] == 0x0B
4186 && p[9] == 0x0F && p[10] == 0x0B
4187 && p[11] == 0x0F && p[12] == 0x0B) {
4188 /* It's the short form. Check the offset is right. */
4189 Int s32 = (Int)read_misaligned_UInt_LE(&p[1]);
4190 Long s64 = (Long)s32;
4191 if ((UChar*)p + 5 + s64 == place_to_jump_to_EXPECTED) {
4192 valid = True;
4193 if (0)
4194 vex_printf("QQQ unchainXDirect_AMD64: found short form\n");
4195 }
4196 }
4197 vassert(valid);
4198 /* And what we want to change it to is:
4199 movabsq $disp_cp_chain_me, %r11
4200 call *%r11
4201 viz
4202 49 BB <8 bytes value == disp_cp_chain_me>
4203 41 FF D3
4204 So it's the same length (convenient, huh).
4205 */
4206 p[0] = 0x49;
4207 p[1] = 0xBB;
4208 write_misaligned_ULong_LE(&p[2], (ULong)(Addr)disp_cp_chain_me);
4209 p[10] = 0x41;
4210 p[11] = 0xFF;
4211 p[12] = 0xD3;
4212 VexInvalRange vir = { (HWord)place_to_unchain, 13 };
4213 return vir;
4214 }
4215
4216
4217 /* Patch the counter address into a profile inc point, as previously
4218 created by the Ain_ProfInc case for emit_AMD64Instr. */
patchProfInc_AMD64(VexEndness endness_host,void * place_to_patch,const ULong * location_of_counter)4219 VexInvalRange patchProfInc_AMD64 ( VexEndness endness_host,
4220 void* place_to_patch,
4221 const ULong* location_of_counter )
4222 {
4223 vassert(endness_host == VexEndnessLE);
4224 vassert(sizeof(ULong*) == 8);
4225 UChar* p = (UChar*)place_to_patch;
4226 vassert(p[0] == 0x49);
4227 vassert(p[1] == 0xBB);
4228 vassert(p[2] == 0x00);
4229 vassert(p[3] == 0x00);
4230 vassert(p[4] == 0x00);
4231 vassert(p[5] == 0x00);
4232 vassert(p[6] == 0x00);
4233 vassert(p[7] == 0x00);
4234 vassert(p[8] == 0x00);
4235 vassert(p[9] == 0x00);
4236 vassert(p[10] == 0x49);
4237 vassert(p[11] == 0xFF);
4238 vassert(p[12] == 0x03);
4239 ULong imm64 = (ULong)(Addr)location_of_counter;
4240 p[2] = imm64 & 0xFF; imm64 >>= 8;
4241 p[3] = imm64 & 0xFF; imm64 >>= 8;
4242 p[4] = imm64 & 0xFF; imm64 >>= 8;
4243 p[5] = imm64 & 0xFF; imm64 >>= 8;
4244 p[6] = imm64 & 0xFF; imm64 >>= 8;
4245 p[7] = imm64 & 0xFF; imm64 >>= 8;
4246 p[8] = imm64 & 0xFF; imm64 >>= 8;
4247 p[9] = imm64 & 0xFF; imm64 >>= 8;
4248 VexInvalRange vir = { (HWord)place_to_patch, 13 };
4249 return vir;
4250 }
4251
4252
4253 /*---------------------------------------------------------------*/
4254 /*--- end host_amd64_defs.c ---*/
4255 /*---------------------------------------------------------------*/
4256