1 /*
2 * HT Editor
3 * x86asm.cc
4 *
5 * Copyright (C) 1999-2002 Stefan Weyergraf
6 * Copyright (C) 2005-2007 Sebastian Biallas (sb@biallas.net)
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 */
21
22 #include <stdlib.h>
23 #include <string.h>
24
25 #include "x86asm.h"
26 #include "snprintf.h"
27 #include "strtools.h"
28
29 enum {
30 X86ASM_PREFIX_NO,
31 X86ASM_PREFIX_0F,
32 X86ASM_PREFIX_F20F,
33 X86ASM_PREFIX_F30F,
34 X86ASM_PREFIX_0F0F,
35 X86ASM_PREFIX_0F38,
36 X86ASM_PREFIX_660F38,
37 X86ASM_PREFIX_F20F38,
38 X86ASM_PREFIX_0F3A,
39 X86ASM_PREFIX_660F3A,
40 X86ASM_PREFIX_0F7A,
41 X86ASM_PREFIX_0F7B,
42 X86ASM_PREFIX_0F24,
43 X86ASM_PREFIX_0F25,
44 X86ASM_PREFIX_D8,
45 X86ASM_PREFIX_D9,
46 X86ASM_PREFIX_DA,
47 X86ASM_PREFIX_DB,
48 X86ASM_PREFIX_DC,
49 X86ASM_PREFIX_DD,
50 X86ASM_PREFIX_DE,
51 X86ASM_PREFIX_DF,
52 };
53
54 #define X86ASM_ERRMSG_AMBIGUOUS "ambiguous command"
55 #define X86ASM_ERRMSG_UNKNOWN_COMMAND "unknown command '%s'"
56 #define X86ASM_ERRMSG_INVALID_PREFIX "invalid prefix"
57 #define X86ASM_ERRMSG_UNKNOWN_SYMBOL "unknown symbol '%s'"
58 #define X86ASM_ERRMSG_INVALID_OPERANDS "invalid operand(s)"
59 #define X86ASM_ERRMSG_INTERNAL "internal error: "
60
61 #define rexw 0x48
62 #define rexr 0x44
63 #define rexx 0x42
64 #define rexb 0x41
65
66 static const x86addrcoding modrm16[3][8] = {
67 /* mod = 0 */
68 {
69 {X86_REG_BX, X86_REG_SI, 0},
70 {X86_REG_BX, X86_REG_DI, 0},
71 {X86_REG_BP, X86_REG_SI, 0},
72 {X86_REG_BP, X86_REG_DI, 0},
73 {X86_REG_SI, X86_REG_NO, 0},
74 {X86_REG_DI, X86_REG_NO, 0},
75 {X86_REG_NO, X86_REG_NO, 2},
76 {X86_REG_BX, X86_REG_NO, 0}
77 },
78 /* mod = 1 */
79 {
80 {X86_REG_BX, X86_REG_SI, 1},
81 {X86_REG_BX, X86_REG_DI, 1},
82 {X86_REG_BP, X86_REG_SI, 1},
83 {X86_REG_BP, X86_REG_DI, 1},
84 {X86_REG_SI, X86_REG_NO, 1},
85 {X86_REG_DI, X86_REG_NO, 1},
86 {X86_REG_BP, X86_REG_NO, 1},
87 {X86_REG_BX, X86_REG_NO, 1}
88 },
89 /* mod = 2 */
90 {
91 {X86_REG_BX, X86_REG_SI, 2},
92 {X86_REG_BX, X86_REG_DI, 2},
93 {X86_REG_BP, X86_REG_SI, 2},
94 {X86_REG_BP, X86_REG_DI, 2},
95 {X86_REG_SI, X86_REG_NO, 2},
96 {X86_REG_DI, X86_REG_NO, 2},
97 {X86_REG_BP, X86_REG_NO, 2},
98 {X86_REG_BX, X86_REG_NO, 2}
99 }
100 };
101
102 static const x86addrcoding modrm32[3][8] = {
103 /* mod = 0 */
104 {
105 {X86_REG_AX, X86_REG_NO, 0},
106 {X86_REG_CX, X86_REG_NO, 0},
107 {X86_REG_DX, X86_REG_NO, 0},
108 {X86_REG_BX, X86_REG_NO, 0},
109 {X86_REG_INVALID, X86_REG_INVALID, -1}, /* special: SIB */
110 {X86_REG_NO, X86_REG_NO, 4},
111 {X86_REG_SI, X86_REG_NO, 0},
112 {X86_REG_DI, X86_REG_NO, 0}
113 },
114 /* mod = 1 */
115 {
116 {X86_REG_AX, X86_REG_NO, 1},
117 {X86_REG_CX, X86_REG_NO, 1},
118 {X86_REG_DX, X86_REG_NO, 1},
119 {X86_REG_BX, X86_REG_NO, 1},
120 {X86_REG_INVALID, X86_REG_INVALID, -1}, /* special: SIB + disp8 */
121 {X86_REG_BP, X86_REG_NO, 1},
122 {X86_REG_SI, X86_REG_NO, 1},
123 {X86_REG_DI, X86_REG_NO, 1}
124 },
125 /* mod = 2 */
126 {
127 {X86_REG_AX, X86_REG_NO, 4},
128 {X86_REG_CX, X86_REG_NO, 4},
129 {X86_REG_DX, X86_REG_NO, 4},
130 {X86_REG_BX, X86_REG_NO, 4},
131 {X86_REG_INVALID, X86_REG_INVALID, -1}, /* special: SIB + disp32 */
132 {X86_REG_BP, X86_REG_NO, 4},
133 {X86_REG_SI, X86_REG_NO, 4},
134 {X86_REG_DI, X86_REG_NO, 4}
135 }
136 };
137
138 /* convert logical operand types to hardware operand types */
139 static const byte lop2hop[12][9] = {
140 /* X86_OPTYPE_EMPTY */
141 {},
142 /* X86_OPTYPE_IMM */
143 {TYPE_I, TYPE_Is, TYPE_J, TYPE_A, TYPE_Ix, TYPE_I4},
144 /* X86_OPTYPE_REG */
145 {TYPE_R, TYPE_Rx, TYPE_RXx, TYPE_G, TYPE_E, TYPE_MR, TYPE_RV},
146 /* X86_OPTYPE_SEG */
147 {TYPE_S, TYPE_Sx},
148 /* X86_OPTYPE_MEM */
149 {TYPE_E, TYPE_M, TYPE_MR, TYPE_O, TYPE_Q, TYPE_W, TYPE_VS, TYPE_X},
150 /* X86_OPTYPE_CRX */
151 {TYPE_C},
152 /* X86_OPTYPE_DRX */
153 {TYPE_D},
154 /* X86_OPTYPE_STX */
155 {TYPE_F, TYPE_Fx},
156 /* X86_OPTYPE_MMX */
157 {TYPE_P, TYPE_Q, TYPE_PR},
158 /* X86_OPTYPE_XMM */
159 {TYPE_V, TYPE_W, TYPE_VR, TYPE_Vx, TYPE_VV, TYPE_VI, TYPE_VS, TYPE_VD},
160 /* X86_OPTYPE_YMM */
161 {TYPE_Y, TYPE_X, TYPE_YR, TYPE_YV, TYPE_YI},
162 /* X86_OPTYPE_FARPTR */
163 {},
164 };
165
166 static const char immhsz8_16[] = { SIZE_B, SIZE_BV, SIZE_W, SIZE_V, SIZE_VV, 0 };
167 static const char immhsz16_16[] = { SIZE_W, SIZE_V, SIZE_VV, 0 };
168 static const char immhsz32_16[] = { 0 };
169 static const char immhsz64_16[] = { 0 };
170
171 static const char immhsz8_32[] = { SIZE_B, SIZE_BV, SIZE_W, SIZE_V, SIZE_VV, 0 };
172 static const char immhsz16_32[] = { SIZE_W, SIZE_V, SIZE_VV, 0 };
173 static const char immhsz32_32[] = { SIZE_V, SIZE_VV, 0 };
174 static const char immhsz64_32[] = { 0 };
175
176 static const char immhsz8_64[] = { SIZE_B, SIZE_BV, SIZE_W, SIZE_V, SIZE_VV, 0 };
177 static const char immhsz16_64[] = { SIZE_W, SIZE_V, SIZE_VV, 0 };
178 static const char immhsz32_64[] = { SIZE_V, SIZE_VV, 0 };
179 static const char immhsz64_64[] = { SIZE_V, 0 };
180
181 static const char hsz8_16[] = { SIZE_B, 0 };
182 static const char hsz16_16[] = { SIZE_W, SIZE_V, SIZE_VV, 0 };
183 static const char hsz32_16[] = { SIZE_D, SIZE_P, SIZE_Z, SIZE_R, 0 };
184 static const char hsz48_16[] = { 0 };
185 static const char hsz64_16[] = { SIZE_Q, SIZE_U, SIZE_Z, 0};
186 static const char hsz128_16[] = { SIZE_O, SIZE_U, 0};
187 static const char hsz256_16[] = { SIZE_Y, 0};
188
189 static const char hsz8_32[] = { SIZE_B, 0 };
190 static const char hsz16_32[] = { SIZE_W, 0 };
191 static const char hsz32_32[] = { SIZE_D, SIZE_V, SIZE_VV, SIZE_R, SIZE_Z, 0 };
192 static const char hsz48_32[] = { SIZE_P, 0 };
193 static const char hsz64_32[] = { SIZE_Q, SIZE_U, SIZE_Z, 0};
194 static const char hsz128_32[] = { SIZE_O, SIZE_U, 0};
195 static const char hsz256_32[] = { SIZE_Y, 0};
196
197 static const char hsz8_64[] = { SIZE_B, 0 };
198 static const char hsz16_64[] = { SIZE_W, 0 };
199 static const char hsz32_64[] = { SIZE_D, SIZE_Z, 0 };
200 static const char hsz48_64[] = { 0 };
201 static const char hsz64_64[] = { SIZE_Q, SIZE_U, SIZE_V, SIZE_VV, SIZE_R, SIZE_Z, 0};
202 static const char hsz128_64[] = { SIZE_O, SIZE_U, 0};
203 static const char hsz256_64[] = { SIZE_Y, 0};
204
205 static const int reg2size[4] = {1, 2, 4, 8};
206 static const int addr2size[4] = {-1, 2, 4, 8};
207
208 /*
209 * CLASS x86asm
210 */
211
x86asm(X86OpSize o,X86AddrSize a)212 x86asm::x86asm(X86OpSize o, X86AddrSize a)
213 : Assembler(false)
214 {
215 opsize = o;
216 addrsize = a;
217 if (a != X86_ADDRSIZE64) {
218 prepInsns();
219 }
220 }
221
222 x86opc_insn (*x86asm::x86_32a_insns)[256];
223
prepInsns()224 void x86asm::prepInsns()
225 {
226 if (!x86_32a_insns) {
227 x86_32a_insns = ht_malloc(sizeof *x86_32a_insns);
228 memcpy(x86_32a_insns, x86_32_insns, sizeof x86_32_insns);
229
230 (*x86_32a_insns)[0xc4] = x86_les;
231 (*x86_32a_insns)[0xc5] = x86_lds;
232 }
233 x86_insns = x86_32a_insns;
234 }
235
alloc_insn()236 asm_insn *x86asm::alloc_insn()
237 {
238 return ht_malloc(sizeof (x86asm_insn));
239 }
240
createCompatibleDisassembler()241 x86dis *x86asm::createCompatibleDisassembler()
242 {
243 return new x86dis(opsize, addrsize);
244 }
245
delete_nonsense(CPU_ADDR addr)246 void x86asm::delete_nonsense(CPU_ADDR addr)
247 {
248 x86dis *dis = createCompatibleDisassembler();
249 restart:
250 asm_code *c=codes;
251 while (c) {
252 if (delete_nonsense_insn(c, dis, addr)) goto restart;
253 c = c->next;
254 }
255 delete dis;
256 }
257
skip_prefixes(byte ** p,int & sizep,int addrsize)258 static void skip_prefixes(byte **p, int &sizep, int addrsize)
259 {
260 while (sizep > 0) {
261 if (**p == 0x66 || **p == 0x67 || **p == 0xf2 || **p == 0xf3
262 || (addrsize == X86_ADDRSIZE64 && (**p & 0xf0) == 0x40)) {
263 sizep--; (*p)++;
264 } else {
265 break;
266 }
267 }
268 }
269
cmp_insn_normal(byte * p,int sizep,byte * q,int sizeq,int addrsize,x86dis * dis,CPU_ADDR addr)270 static bool cmp_insn_normal(byte *p, int sizep, byte *q, int sizeq, int addrsize, x86dis *dis, CPU_ADDR addr)
271 {
272 // UGLY: compare disassembly
273 char s[200];
274 dis_insn *d = dis->decode(p, sizep, addr);
275 ht_strlcpy(s, dis->str(d, X86DIS_STYLE_EXPLICIT_MEMSIZE), sizeof s);
276 d = dis->decode(q, sizeq, addr);
277 if (strcmp(s, dis->str(d, X86DIS_STYLE_EXPLICIT_MEMSIZE))) return false;
278 // different disassembly --> not the same
279
280 // compare opcodes (w/o prefixes)
281 skip_prefixes(&p, sizep, addrsize);
282 skip_prefixes(&q, sizeq, addrsize);
283 if (sizep != sizeq) return false;
284 // -> different raw opcodes --> not the same
285 return memcmp(p, q, sizep) == 0;
286 }
287
delete_nonsense_insn(asm_code * code,x86dis * dis,CPU_ADDR addr)288 bool x86asm::delete_nonsense_insn(asm_code *code, x86dis *dis, CPU_ADDR addr)
289 {
290 asm_code *c = codes;
291 while (c) {
292 if (c != code && code->size <= c->size) {
293 if (cmp_insn_normal(c->data, c->size, code->data, code->size, addrsize, dis, addr)) {
294 deletecode(c);
295 return true;
296 }
297 }
298 c = c->next;
299 }
300 return false;
301 }
302
emitdisp(uint64 d,int size)303 void x86asm::emitdisp(uint64 d, int size)
304 {
305 dispsize = size;
306 disp = d;
307 }
308
emitimm(uint64 i,int size)309 void x86asm::emitimm(uint64 i, int size)
310 {
311 immsize = size;
312 imm = i;
313 }
314
emitfarptr(uint32 s,uint32 o,bool big)315 void x86asm::emitfarptr(uint32 s, uint32 o, bool big)
316 {
317 if (big) {
318 immsize = 6;
319 imm = o;
320 imm2 = s;
321 } else {
322 immsize = 4;
323 imm = (s<<16) | (o & 0xffff);
324 }
325 }
326
emitmodrm(int modrm)327 void x86asm::emitmodrm(int modrm)
328 {
329 modrmv = modrm;
330 }
331
emitmodrm_mod(int mod)332 void x86asm::emitmodrm_mod(int mod)
333 {
334 if (modrmv == -1) modrmv = 0;
335 modrmv = (modrmv & ~(3<<6)) | ((mod & 3)<<6);
336 }
337
emitmodrm_reg(int reg)338 void x86asm::emitmodrm_reg(int reg)
339 {
340 if (modrmv == -1) modrmv = 0;
341 modrmv = (modrmv & ~(7<<3)) | ((reg & 7)<<3);
342 }
343
emitmodrm_rm(int rm)344 void x86asm::emitmodrm_rm(int rm)
345 {
346 if (modrmv == -1) modrmv = 0;
347 modrmv = (modrmv & ~7) | (rm & 7);
348 }
349
emitsib_base(int base)350 void x86asm::emitsib_base(int base)
351 {
352 if (sibv == -1) sibv = 0;
353 sibv = (sibv & ~7) | (base & 7);
354 }
355
emitsib_index(int index)356 void x86asm::emitsib_index(int index)
357 {
358 if (sibv == -1) sibv = 0;
359 sibv = (sibv & ~(7<<3)) | ((index & 7)<<3);
360 }
361
emitsib_scale(int scale)362 void x86asm::emitsib_scale(int scale)
363 {
364 if (sibv == -1) sibv = 0;
365 sibv = (sibv & ~(3<<6)) | ((scale & 3)<<6);
366 }
367
368 #define MATCHOPNAME_NOMATCH 0
369 #define MATCHOPNAME_MATCH 1
370 #define MATCHOPNAME_MATCH_IF_OPSIZE16 2
371 #define MATCHOPNAME_MATCH_IF_OPSIZE32 3
372 #define MATCHOPNAME_MATCH_IF_OPSIZE64 4
373 #define MATCHOPNAME_MATCH_IF_ADDRSIZE16 5
374 #define MATCHOPNAME_MATCH_IF_ADDRSIZE32 6
375 #define MATCHOPNAME_MATCH_IF_ADDRSIZE64 7
376 #define MATCHOPNAME_MATCH_IF_OPPREFIX 8
377 #define MATCHOPNAME_MATCH_IF_NOOPPREFIX 9
378
encode(asm_insn * asm_insn,int options,CPU_ADDR cur_address)379 asm_code *x86asm::encode(asm_insn *asm_insn, int options, CPU_ADDR cur_address)
380 {
381 Assembler::encode(asm_insn, options, cur_address);
382 x86asm_insn *insn = (x86asm_insn*)asm_insn;
383
384 newcode();
385 namefound = false;
386 if (addrsize == X86_ADDRSIZE64) {
387 address = cur_address.flat64.addr;
388 } else {
389 address = cur_address.addr32.offset;
390 }
391 esizes[0] = 0;
392 esizes[1] = 0;
393 esizes[2] = 0;
394 esizes[3] = 0;
395 esizes[4] = 0;
396 ambiguous = false;
397 match_opcodes(*x86_insns, insn, X86ASM_PREFIX_NO, MATCHOPNAME_MATCH);
398 if (!namefound && insn->repprefix != X86_PREFIX_NO) {
399 set_error_msg(X86ASM_ERRMSG_INVALID_PREFIX);
400 } else {
401 match_fopcodes(insn);
402 match_opcodes(x86_insns_ext, insn, X86ASM_PREFIX_0F, MATCHOPNAME_MATCH_IF_NOOPPREFIX);
403 match_opcodes(x86_insns_ext_66, insn, X86ASM_PREFIX_0F, MATCHOPNAME_MATCH_IF_OPPREFIX);
404 match_opcodes(x86_insns_ext_f2, insn, X86ASM_PREFIX_F20F, MATCHOPNAME_MATCH_IF_NOOPPREFIX);
405 match_opcodes(x86_insns_ext_f3, insn, X86ASM_PREFIX_F30F, MATCHOPNAME_MATCH_IF_NOOPPREFIX);
406 match_opcodes(x86_opc_group_insns[0], insn, X86ASM_PREFIX_0F38, MATCHOPNAME_MATCH_IF_NOOPPREFIX);
407 match_opcodes(x86_opc_group_insns[1], insn, X86ASM_PREFIX_660F38, MATCHOPNAME_MATCH_IF_OPPREFIX);
408 match_opcodes(x86_opc_group_insns[2], insn, X86ASM_PREFIX_F20F38, MATCHOPNAME_MATCH_IF_NOOPPREFIX);
409 match_opcodes(x86_opc_group_insns[3], insn, X86ASM_PREFIX_0F3A, MATCHOPNAME_MATCH_IF_NOOPPREFIX);
410 match_opcodes(x86_opc_group_insns[4], insn, X86ASM_PREFIX_660F3A, MATCHOPNAME_MATCH_IF_OPPREFIX);
411 match_opcodes(x86_opc_group_insns[5], insn, X86ASM_PREFIX_0F7A, MATCHOPNAME_MATCH_IF_NOOPPREFIX);
412 match_opcodes(x86_opc_group_insns[6], insn, X86ASM_PREFIX_0F7B, MATCHOPNAME_MATCH_IF_NOOPPREFIX);
413 match_opcodes(x86_opc_group_insns[7], insn, X86ASM_PREFIX_0F24, MATCHOPNAME_MATCH_IF_NOOPPREFIX);
414 match_opcodes(x86_opc_group_insns[8], insn, X86ASM_PREFIX_0F25, MATCHOPNAME_MATCH_IF_NOOPPREFIX);
415 match_vex_opcodes(insn);
416 }
417 if (error) {
418 free_asm_codes();
419 } else if (!codes) {
420 if (namefound) {
421 set_error_msg(X86ASM_ERRMSG_INVALID_OPERANDS);
422 } else {
423 set_error_msg(X86ASM_ERRMSG_UNKNOWN_COMMAND, insn->name);
424 }
425 } else {
426 delete_nonsense(cur_address);
427 }
428 return codes;
429 }
430
encode_insn(x86asm_insn * insn,x86opc_insn * opcode,int opcodeb,int additional_opcode,int prefix,int eopsize,int eaddrsize)431 bool x86asm::encode_insn(x86asm_insn *insn, x86opc_insn *opcode, int opcodeb, int additional_opcode, int prefix, int eopsize, int eaddrsize)
432 {
433 rexprefix = 0;
434 disppos = 0;
435
436 bool opsize_depend = false;
437 for (int i = 0; i < 4; i++) {
438 switch (x86_op_type[opcode->op[i]].size) {
439 case SIZE_BV:
440 case SIZE_V:
441 case SIZE_VV:
442 case SIZE_P:
443 opsize_depend = true;
444 break;
445 }
446 }
447
448 code.context = (void*)opsize_depend;
449
450 /* test rex thingies */
451 for (int i=0; i < 4; i++) {
452 if (insn->op[i].need_rex) {
453 rexprefix |= 0x40;
454 }
455 if (insn->op[i].forbid_rex) {
456 rexprefix |= 0x80;
457 }
458 }
459
460 modrmv = -1;
461 sibv = -1;
462 drexdest = -1;
463 drexoc0 = -1;
464 dispsize = 0;
465 immsize = 0;
466 if (additional_opcode != -1) {
467 if (additional_opcode & 0x800) {
468 emitmodrm_mod(3);
469 emitmodrm_reg(additional_opcode & 0x7);
470 emitmodrm_rm((additional_opcode >> 3) & 0x7);
471 } else {
472 emitmodrm_reg(additional_opcode);
473 }
474 }
475
476 if (addrsize == X86_ADDRSIZE64) {
477 if (eopsize == X86_ADDRSIZE64) {
478 if (insn->opsizeprefix == X86_PREFIX_OPSIZE) emitbyte(0x66);
479 if (!(x86_op_type[opcode->op[0]].info & INFO_DEFAULT_64)) {
480 // instruction doesn't default to 64 bit opsize
481 rexprefix |= rexw;
482 }
483 } else if (eopsize == X86_ADDRSIZE32) {
484 if (x86_op_type[opcode->op[0]].info & INFO_DEFAULT_64) {
485 // instruction defaults to 64 bit opsize
486 // it's not possible to switch to 32 bit
487 return false;
488 }
489 if (insn->opsizeprefix == X86_PREFIX_OPSIZE) emitbyte(0x66);
490 } else if (eopsize == X86_ADDRSIZE16) {
491 if (insn->opsizeprefix == X86_PREFIX_NOOPSIZE) return false;
492 emitbyte(0x66);
493 }
494 if (eaddrsize == X86_ADDRSIZE16) return false;
495 if (eaddrsize == X86_ADDRSIZE32) emitbyte(0x67);
496 } else {
497 if (eopsize != opsize || insn->opsizeprefix == X86_PREFIX_OPSIZE) {
498 if (insn->opsizeprefix == X86_PREFIX_NOOPSIZE) return false;
499 emitbyte(0x66);
500 }
501 if (eaddrsize != addrsize) emitbyte(0x67);
502 }
503
504 if ((rexprefix & 0xc0) == 0xc0) {
505 // can't combine insns which simultaneously need REX and forbid REX
506 clearcode();
507 return false;
508 }
509
510 /* write lock, rep and/or seg prefixes if needed */
511 switch (insn->lockprefix) {
512 case X86_PREFIX_LOCK: emitbyte(0xf0); break;
513 }
514 switch (insn->repprefix) {
515 case X86_PREFIX_REPNZ: emitbyte(0xf2); break;
516 case X86_PREFIX_REPZ: emitbyte(0xf3); break;
517 }
518 switch (insn->segprefix) {
519 case X86_PREFIX_ES: emitbyte(0x26); break;
520 case X86_PREFIX_CS: emitbyte(0x2e); break;
521 case X86_PREFIX_SS: emitbyte(0x36); break;
522 case X86_PREFIX_DS: emitbyte(0x3e); break;
523 case X86_PREFIX_FS: emitbyte(0x64); break;
524 case X86_PREFIX_GS: emitbyte(0x65); break;
525 }
526
527 switch (prefix) {
528 case X86ASM_PREFIX_F20F:
529 case X86ASM_PREFIX_F20F38:
530 emitbyte(0xf2);
531 break;
532 case X86ASM_PREFIX_F30F:
533 emitbyte(0xf3);
534 break;
535 }
536
537 int rexpos = code.size;
538 if ((rexprefix & 0x40)
539 && prefix != X86ASM_PREFIX_0F24
540 && prefix != X86ASM_PREFIX_0F25) {
541 emitbyte(0xff); // dummy value
542 }
543
544 /* write opcodeprefixes and opcode */
545 switch (prefix) {
546 case X86ASM_PREFIX_0F0F:
547 emitbyte(0x0f);
548 case X86ASM_PREFIX_F20F:
549 case X86ASM_PREFIX_F30F:
550 case X86ASM_PREFIX_0F:
551 emitbyte(0x0f);
552 case X86ASM_PREFIX_NO:
553 break;
554 case X86ASM_PREFIX_0F38:
555 case X86ASM_PREFIX_660F38:
556 case X86ASM_PREFIX_F20F38:
557 emitbyte(0x0f);
558 emitbyte(0x38);
559 break;
560 case X86ASM_PREFIX_0F3A:
561 case X86ASM_PREFIX_660F3A:
562 emitbyte(0x0f);
563 emitbyte(0x3a);
564 break;
565 case X86ASM_PREFIX_0F24:
566 emitbyte(0x0f);
567 emitbyte(0x24);
568 break;
569 case X86ASM_PREFIX_0F25:
570 emitbyte(0x0f);
571 emitbyte(0x25);
572 break;
573 case X86ASM_PREFIX_0F7A:
574 emitbyte(0x0f);
575 emitbyte(0x7a);
576 break;
577 case X86ASM_PREFIX_0F7B:
578 emitbyte(0x0f);
579 emitbyte(0x7b);
580 break;
581 case X86ASM_PREFIX_DF:
582 case X86ASM_PREFIX_DE:
583 case X86ASM_PREFIX_DD:
584 case X86ASM_PREFIX_DC:
585 case X86ASM_PREFIX_DB:
586 case X86ASM_PREFIX_DA:
587 case X86ASM_PREFIX_D9:
588 case X86ASM_PREFIX_D8:
589 opcodeb = 0xd8 + prefix - X86ASM_PREFIX_D8;
590 break;
591 }
592 emitbyte(opcodeb);
593
594 /* encode the ops */
595 for (int i=0; i < 4; i++) {
596 if (!encode_op(&insn->op[i], &x86_op_type[opcode->op[i]], &esizes[i], eopsize, eaddrsize)) {
597 clearcode();
598 return false;
599 }
600 }
601
602 /* write the rest */
603 if (modrmv != -1) emitbyte(modrmv);
604 if (sibv != -1) emitbyte(sibv);
605
606 if (drexdest != -1) {
607 byte oc0 = 0;
608 if (drexoc0 != -1) {
609 oc0 = drexoc0;
610 }
611 byte drex = (drexdest << 4) | (oc0 << 3) | (rexprefix & 7);
612 rexprefix = 0;
613 emitbyte(drex);
614 }
615
616 if (disppos && addrsize == X86_ADDRSIZE64) {
617 // fix ip-relative disp in PM64 mode
618 dispsize = 4;
619 disp -= address + code.size + dispsize + immsize;
620 if (simmsize(disp, 4) > 4) {
621 clearcode();
622 return false;
623 }
624 }
625 switch (dispsize) {
626 case 1:
627 emitbyte(disp);
628 break;
629 case 2:
630 emitword(disp);
631 break;
632 case 4:
633 emitdword(disp);
634 break;
635 case 8:
636 emitqword(disp);
637 break;
638 }
639 switch (immsize) {
640 case 1:
641 emitbyte(imm);
642 break;
643 case 2:
644 emitword(imm);
645 break;
646 case 4:
647 emitdword(imm);
648 break;
649 case 6:
650 emitdword(imm);
651 emitword(imm2);
652 break;
653 case 8:
654 emitqword(imm);
655 break;
656 }
657
658 // fix rex code
659 if (rexprefix & 0x40) {
660 code.data[rexpos] = rexprefix;
661 }
662
663 return true;
664 }
665
encode_vex_insn(x86asm_insn * insn,x86opc_vex_insn * opcode,int opcodeb,int additional_opcode,int eopsize,int eaddrsize)666 bool x86asm::encode_vex_insn(x86asm_insn *insn, x86opc_vex_insn *opcode, int opcodeb, int additional_opcode, int eopsize, int eaddrsize)
667 {
668 rexprefix = 0;
669 disppos = 0;
670 immsize = 0;
671 dispsize = 0;
672 vexvvvv = 0;
673 modrmv = -1;
674 sibv = -1;
675
676 if (additional_opcode != -1) {
677 emitmodrm_reg(additional_opcode);
678 }
679
680 switch (insn->lockprefix) {
681 case X86_PREFIX_LOCK:
682 clearcode();
683 return false;
684 }
685 switch (insn->repprefix) {
686 case X86_PREFIX_REPNZ:
687 case X86_PREFIX_REPZ:
688 clearcode();
689 return false;
690 }
691
692 if (addrsize == X86_ADDRSIZE64) {
693 if (eaddrsize == X86_ADDRSIZE16) return false;
694 if (eaddrsize == X86_ADDRSIZE32) emitbyte(0x67);
695 } else {
696 if (eaddrsize != addrsize) emitbyte(0x67);
697 }
698
699
700 switch (insn->segprefix) {
701 case X86_PREFIX_ES: emitbyte(0x26); break;
702 case X86_PREFIX_CS: emitbyte(0x2e); break;
703 case X86_PREFIX_SS: emitbyte(0x36); break;
704 case X86_PREFIX_DS: emitbyte(0x3e); break;
705 case X86_PREFIX_FS: emitbyte(0x64); break;
706 case X86_PREFIX_GS: emitbyte(0x65); break;
707 }
708
709 for (int i=0; i < 5; i++) {
710 if (!encode_op(&insn->op[i], &x86_op_type[opcode->op[i]], &esizes[i], eopsize, eaddrsize)) {
711 clearcode();
712 return false;
713 }
714 }
715
716 if ((opcode->vex & 0x3c) == 0x4 // 0x0f-opcode
717 && !(rexprefix & 3) // no rexb/rexx
718 && !(opcode->vex & W1)) {
719 // use short vex prefix
720 emitbyte(0xc5);
721 byte vex = (~rexprefix & 4) << 5
722 | ((~vexvvvv & 0xf) << 3)
723 | (opcode->vex & _256) >> 4
724 | (opcode->vex & 0x3);
725 emitbyte(vex);
726 } else {
727 // use long vex/xop prefix
728 if (opcode->vex & _0f24) {
729 emitbyte(0x8f);
730 } else {
731 emitbyte(0xc4);
732 }
733 byte vex = (~rexprefix & 7) << 5 | ((opcode->vex & 0x3c) >> 2);
734 emitbyte(vex);
735 vex = (opcode->vex & W1) | ((~vexvvvv & 0xf) << 3)
736 | (opcode->vex & _256) >> 4
737 | (opcode->vex & 0x3);
738 emitbyte(vex);
739 }
740
741 emitbyte(opcodeb);
742
743 if (modrmv != -1) emitbyte(modrmv);
744 if (sibv != -1) emitbyte(sibv);
745
746 if (disppos && addrsize == X86_ADDRSIZE64) {
747 // fix ip-relative disp in PM64 mode
748 dispsize = 4;
749 disp -= address + code.size + dispsize + immsize;
750 if (simmsize(disp, 4) > 4) {
751 clearcode();
752 return false;
753 }
754 }
755 switch (dispsize) {
756 case 1:
757 emitbyte(disp);
758 break;
759 case 2:
760 emitword(disp);
761 break;
762 case 4:
763 emitdword(disp);
764 break;
765 }
766
767 switch (immsize) {
768 case 1:
769 emitbyte(imm);
770 break;
771 }
772
773 return true;
774 }
775
encode_modrm(x86_insn_op * op,char size,bool allow_reg,bool allow_mem,int eopsize,int eaddrsize)776 bool x86asm::encode_modrm(x86_insn_op *op, char size, bool allow_reg, bool allow_mem, int eopsize, int eaddrsize)
777 {
778 switch (op->type) {
779 case X86_OPTYPE_REG:
780 if (!allow_reg) return false;
781 emitmodrm_mod(3);
782 emitmodrm_rm(op->reg);
783 if (op->reg > 7) rexprefix |= rexb;
784 break;
785 case X86_OPTYPE_MEM: {
786 if (!allow_mem) return false;
787 int addrsize = op->mem.addrsize;
788 int mindispsize = addr2size[addrsize+1];
789
790 if (addrsize == X86_ADDRSIZEUNKNOWN) {
791 addrsize = eaddrsize;
792 if (this->addrsize == X86_ADDRSIZE64) {
793 // ip-relative, we check this later
794 mindispsize = 4;
795 } else {
796 mindispsize = op->mem.disp ? simmsize(op->mem.disp, 8) : 0;
797 }
798 } else {
799 mindispsize = op->mem.disp ? simmsize(op->mem.disp, mindispsize) : 0;
800 if (mindispsize > 4) return false;
801 }
802 if (addrsize == X86_ADDRSIZE16) {
803 int mod, rm, dispsize;
804 if (!encode_modrm_v(&modrm16, op, mindispsize, &mod, &rm, &dispsize)) return 0;
805 emitmodrm_mod(mod);
806 emitmodrm_rm(rm);
807 emitdisp(op->mem.disp, dispsize);
808 } else {
809 int mod, rm, dispsize;
810 if (!encode_modrm_v(&modrm32, op, mindispsize, &mod, &rm, &dispsize)) {
811 int scale, index, base, disp=op->mem.disp;
812 if (encode_sib_v(op, mindispsize, &scale, &index, &base, &mod, &dispsize, &disp)) {
813 emitmodrm_mod(mod);
814 emitmodrm_rm(4); /* SIB */
815 emitsib_scale(scale);
816 emitsib_index(index);
817 emitsib_base(base);
818 emitdisp(disp, dispsize);
819 } else return false;
820 } else {
821 emitmodrm_mod(mod);
822 emitmodrm_rm(rm);
823 emitdisp(op->mem.disp, dispsize);
824 }
825 }
826 break;
827 }
828 case X86_OPTYPE_MMX:
829 if (!allow_reg) return false;
830 emitmodrm_mod(3);
831 emitmodrm_rm(op->mmx);
832 break;
833 case X86_OPTYPE_XMM:
834 if (!allow_reg) return false;
835 emitmodrm_mod(3);
836 emitmodrm_rm(op->xmm);
837 if (op->xmm > 7) rexprefix |= rexb;
838 break;
839 case X86_OPTYPE_YMM:
840 if (!allow_reg) return false;
841 emitmodrm_mod(3);
842 emitmodrm_rm(op->ymm);
843 if (op->ymm > 7) rexprefix |= rexb;
844 break;
845 default:
846 return false;
847 }
848 return true;
849 }
850
encode_modrm_v(const x86addrcoding (* modrmc)[3][8],x86_insn_op * op,int mindispsize,int * _mod,int * _rm,int * _dispsize)851 bool x86asm::encode_modrm_v(const x86addrcoding (*modrmc)[3][8], x86_insn_op *op, int mindispsize, int *_mod, int *_rm, int *_dispsize)
852 {
853 if (op->mem.scale > 1) return false;
854 for (int mod=0; mod < 3; mod++) {
855 for (int rm=0; rm < 8; rm++) {
856 const x86addrcoding *c = &(*modrmc)[mod][rm];
857 int r1 = c->reg1;
858 int r2 = c->reg2;
859 if (r2 == (op->mem.base & ~8)) {
860 int t = r1;
861 r1 = r2;
862 r2 = t;
863 }
864 if (r1==(op->mem.base&~8) && r2==(op->mem.index&~8) && c->dispsize>=mindispsize) {
865 *_mod=mod;
866 *_rm=rm;
867 *_dispsize=c->dispsize;
868 if (this->addrsize == X86_ADDRSIZE64
869 && mod == 0 && rm == 5) {
870 // ip-relative addressing
871 disppos = 1;
872 }
873 if (op->mem.base & 8) rexprefix |= rexb;
874 return true;
875 }
876 }
877 }
878 return false;
879 }
880
encode_op(x86_insn_op * op,x86opc_insn_op * xop,int * esize,int eopsize,int eaddrsize)881 bool x86asm::encode_op(x86_insn_op *op, x86opc_insn_op *xop, int *esize, int eopsize, int eaddrsize)
882 {
883 int psize = op->size;
884 switch (xop->type) {
885 case TYPE_0:
886 return true;
887 case TYPE_A:
888 /* direct address without ModR/M */
889 if (op->type == X86_OPTYPE_FARPTR) {
890 int size = esizeop_ex(xop->size, eopsize);
891 emitfarptr(op->farptr.seg, op->farptr.offset, size == 6);
892 } else {
893 emitimm(op->imm, op->size);
894 }
895 break;
896 case TYPE_C:
897 /* reg of ModR/M picks control register */
898 emitmodrm_reg(op->crx);
899 if (op->crx > 7) rexprefix |= rexr;
900 break;
901 case TYPE_D:
902 /* reg of ModR/M picks debug register */
903 emitmodrm_reg(op->drx);
904 if (op->drx > 7) rexprefix |= rexr;
905 break;
906 case TYPE_E:
907 /* ModR/M (general reg or memory) */
908 if (!encode_modrm(op, xop->size, true, true, eopsize, eaddrsize)) return false; //XXX
909 psize = esizeop(xop->size, eopsize); //XXX
910 break;
911 case TYPE_F:
912 /* r/m of ModR/M picks a fpu register */
913 emitmodrm_rm(op->stx);
914 break;
915 case TYPE_Fx:
916 /* extra picks a fpu register */
917 return true;
918 case TYPE_G:
919 /* reg of ModR/M picks general register */
920 emitmodrm_reg(op->reg);
921 if (op->reg > 7) rexprefix |= rexr;
922 break;
923 case TYPE_Is: {
924 /* signed immediate */
925 int size = esizeop_ex(xop->size, eopsize);
926 emitimm(op->imm, size);
927 break;
928 }
929 case TYPE_I: {
930 /* unsigned immediate */
931 int size = esizeop_ex(xop->size, eopsize);
932 emitimm(op->imm, size);
933 break;
934 }
935 case TYPE_Ix:
936 /* fixed immediate */
937 return true;
938 case TYPE_I4: {
939 /* 4 bit immediate (see TYPE_VI, TYPE_YI) */
940 if (op->imm > 15) return false;
941 if (immsize == 0) {
942 immsize = 1;
943 imm = 0;
944 }
945 imm |= op->imm;
946 break;
947 }
948 case TYPE_J: {
949 /* relative branch offset */
950 int size = esizeop_ex(xop->size, eopsize);
951 emitimm(uint32(op->imm - address - code.size - size), size);
952 break;
953 }
954 case TYPE_M:
955 /* ModR/M (memory only) */
956 if (!encode_modrm(op, xop->size, false, true, eopsize, eaddrsize)) return false; // XXX
957 psize = esizeop(xop->size, eopsize); //XXX
958 break;
959 case TYPE_MR: {
960 /* Same as E, but extra picks reg size */
961 byte xopsize = xop->size;
962 if (op->type == X86_OPTYPE_REG) {
963 xopsize = xop->extra;
964 }
965 if (!encode_modrm(op, xopsize, true, true, eopsize, eaddrsize)) return false; //XXX
966 psize = esizeop(xopsize, eopsize); //XXX
967 break;
968 }
969 case TYPE_O: {
970 /* direct memory without ModR/M */
971 if (op->mem.base != X86_REG_NO) return false;
972 if (op->mem.index != X86_REG_NO) return false;
973 psize = esizeop(xop->size, eopsize); // XXX
974 switch (eaddrsize) {
975 case X86_ADDRSIZE16:
976 if (op->mem.disp > 0xffff) return false;
977 emitdisp(op->mem.disp, 2);
978 break;
979 case X86_ADDRSIZE32:
980 if (op->mem.disp > 0xffffffff) return false;
981 emitdisp(op->mem.disp, 4);
982 break;
983 case X86_ADDRSIZE64:
984 emitdisp(op->mem.disp, 8);
985 break;
986 }
987 break;
988 }
989 case TYPE_P:
990 /* reg of ModR/M picks MMX register */
991 emitmodrm_reg(op->mmx);
992 break;
993 case TYPE_PR:
994 /* rm of ModR/M picks MMX register */
995 emitmodrm_mod(3);
996 emitmodrm_rm(op->mmx);
997 break;
998 case TYPE_Q:
999 /* ModR/M (MMX reg or memory) */
1000 if (!encode_modrm(op, xop->size, true, true, eopsize, eaddrsize)) return false; //XXX
1001 psize = esizeop(xop->size, eopsize); //XXX
1002 break;
1003 case TYPE_R:
1004 /* rm of ModR/M picks general register */
1005 emitmodrm_mod(3);
1006 emitmodrm_rm(op->reg);
1007 // fall throu
1008 case TYPE_Rx:
1009 if (op->reg > 7) rexprefix |= rexb;
1010 return true;
1011 case TYPE_RXx:
1012 /* extra picks register, no REX */
1013 return true;
1014 case TYPE_RV:
1015 /* VEX.vvvv picks general register */
1016 vexvvvv = op->reg;
1017 return true;
1018 case TYPE_S:
1019 /* reg of ModR/M picks segment register */
1020 emitmodrm_reg(op->seg);
1021 break;
1022 case TYPE_Sx:
1023 /* extra picks segment register */
1024 return true;
1025 case TYPE_V:
1026 /* reg of ModR/M picks XMM register */
1027 emitmodrm_reg(op->xmm);
1028 if (op->xmm > 7) rexprefix |= rexr;
1029 break;
1030 case TYPE_VR:
1031 /* rm of ModR/M picks XMM register */
1032 emitmodrm_mod(3);
1033 emitmodrm_rm(op->xmm);
1034 if (op->xmm > 7) rexprefix |= rexb;
1035 break;
1036 case TYPE_Vx:
1037 break;
1038 case TYPE_VV:
1039 vexvvvv = op->xmm;
1040 break;
1041 case TYPE_VI: {
1042 /* bits 7-4 of imm pick XMM register */
1043 if (immsize == 0) {
1044 immsize = 1;
1045 imm = 0;
1046 }
1047 imm |= op->xmm << 4;
1048 break;
1049 }
1050 case TYPE_VD:
1051 if (drexdest == -1) {
1052 drexdest = op->xmm;
1053 } else {
1054 if (drexdest != op->xmm) {
1055 return false;
1056 }
1057 }
1058 break;
1059 case TYPE_VS:
1060 if (op->type == X86_OPTYPE_XMM) {
1061 if (drexoc0 == 0) {
1062 emitmodrm_mod(3);
1063 emitmodrm_rm(op->xmm);
1064 if (op->xmm > 7) rexprefix |= rexb;
1065 } else {
1066 emitmodrm_reg(op->xmm);
1067 if (op->xmm > 7) rexprefix |= rexr;
1068 if (drexoc0 == -1) drexoc0 = 0;
1069 }
1070 } else {
1071 if (drexoc0 == 1) return false;
1072 if (drexoc0 == -1) {
1073 if (xop->info) return false;
1074 drexoc0 = 1;
1075 }
1076 if (!encode_modrm(op, xop->size, true, true, eopsize, eaddrsize)) return false; //XXX
1077 psize = esizeop(xop->size, eopsize); //XXX
1078 }
1079 break;
1080 case TYPE_W:
1081 case TYPE_X:
1082 /* ModR/M (XMM/YMM reg or memory) */
1083 if (!encode_modrm(op, xop->size, true, true, eopsize, eaddrsize)) return false; //XXX
1084 psize = esizeop(xop->size, eopsize); //XXX
1085 break;
1086 case TYPE_Y:
1087 /* reg of ModR/M picks YMM register */
1088 emitmodrm_reg(op->ymm);
1089 if (op->ymm > 7) rexprefix |= rexr;
1090 break;
1091 case TYPE_YI: {
1092 /* bits 7-4 of imm pick YMM register */
1093 if (immsize == 0) {
1094 immsize = 1;
1095 imm = 0;
1096 }
1097 imm |= op->xmm << 4;
1098 break;
1099 }
1100 case TYPE_YV:
1101 vexvvvv = op->ymm;
1102 break;
1103 case TYPE_YR:
1104 /* rm of ModR/M picks YMM register */
1105 emitmodrm_mod(3);
1106 emitmodrm_rm(op->ymm);
1107 if (op->ymm > 7) rexprefix |= rexb;
1108 break;
1109 }
1110 if (!psize) {
1111 // set_error_msg(X86ASM_ERRMSG_INTERNAL"FIXME: size ??? %s, %d\n", __FILE__, __LINE__);
1112 }
1113 if (!*esize) *esize = psize;
1114 /* if (!(options & X86ASM_ALLOW_AMBIGUOUS) && *esize != psize) {
1115 ambiguous = 1;
1116 set_error_msg(X86ASM_ERRMSG_AMBIGUOUS);
1117 return 0;
1118 }*/
1119 return true;
1120 }
1121
encode_sib_v(x86_insn_op * op,int mindispsize,int * _ss,int * _index,int * _base,int * _mod,int * _dispsize,int * disp)1122 bool x86asm::encode_sib_v(x86_insn_op *op, int mindispsize, int *_ss, int *_index, int *_base, int *_mod, int *_dispsize, int *disp)
1123 {
1124 int ss, scale=op->mem.scale, index=op->mem.index, base=op->mem.base, mod, dispsize;
1125 if (base == X86_REG_NO && index != X86_REG_NO) {
1126 switch (scale) {
1127 case 1: case 4: case 8:
1128 break;
1129 case 2: case 3: case 5: case 9:
1130 scale--;
1131 base = index;
1132 break;
1133 default:
1134 return false;
1135 }
1136 }
1137 if (index == X86_REG_SP) {
1138 if (scale > 1) return false;
1139 if (scale == 1) {
1140 if (base == X86_REG_SP) return false;
1141 int temp = index;
1142 index = base;
1143 base = temp;
1144 }
1145 }
1146 if (index != X86_REG_NO) {
1147 switch (scale) {
1148 case 1: ss = 0; break;
1149 case 2: ss = 1; break;
1150 case 4: ss = 2; break;
1151 case 8: ss = 3; break;
1152 default: return 0;
1153 }
1154 } else {
1155 ss = 0;
1156 index = 4;
1157 }
1158 switch (mindispsize) {
1159 case 0:
1160 mod = 0;
1161 dispsize = 0;
1162 break;
1163 case 1:
1164 mod = 1;
1165 dispsize = 1;
1166 break;
1167 case 2:
1168 case 4:
1169 case 8:
1170 mod = 2;
1171 dispsize = 4;
1172 break;
1173 default:
1174 return false;
1175 }
1176 if (base == X86_REG_NO) {
1177 base = 5;
1178 mod = 0;
1179 dispsize = 4;
1180 if (!mindispsize) *disp = 0;
1181 } else {
1182 if ((base & 7) == X86_REG_BP && mod == 0) {
1183 mod = 1;
1184 dispsize = 1;
1185 if (!mindispsize) *disp = 0;
1186 }
1187 }
1188 *_mod = mod;
1189 *_ss = ss;
1190 *_index = index;
1191 *_base = base;
1192 if (index & 8) rexprefix |= rexx;
1193 if (base & 8) rexprefix |= rexb;
1194 *_dispsize = dispsize;
1195 return 1;
1196 }
1197
esizeop(uint c,int size)1198 int x86asm::esizeop(uint c, int size)
1199 {
1200 switch (c) {
1201 case SIZE_B:
1202 return 1;
1203 case SIZE_W:
1204 return 2;
1205 case SIZE_D:
1206 case SIZE_S:
1207 return 4;
1208 case SIZE_Q:
1209 case SIZE_L:
1210 return 8;
1211 case SIZE_O:
1212 return 16;
1213 case SIZE_T:
1214 return 10;
1215 case SIZE_V:
1216 case SIZE_BV:
1217 case SIZE_VV:
1218 switch (size) {
1219 case X86_OPSIZE16: return 2;
1220 case X86_OPSIZE32: return 4;
1221 case X86_OPSIZE64: return 8;
1222 }
1223 /* case SIZE_R:
1224 if (rexw(insn.rexprefix)) return 8; else return 4;
1225 case SIZE_U:
1226 if (insn.opsizeprefix == X86_PREFIX_OPSIZE) return 16; else return 8;
1227 case SIZE_Z:
1228 if (insn.opsizeprefix == X86_PREFIX_OPSIZE) return 8; else return 4;
1229 */
1230 case SIZE_P:
1231 if (size == X86_OPSIZE16) return 4; else return 6;
1232 }
1233 return 0;
1234 }
1235
esizeop_ex(uint c,int size)1236 int x86asm::esizeop_ex(uint c, int size)
1237 {
1238 switch (c) {
1239 case SIZE_BV:
1240 return 1;
1241 case SIZE_VV:
1242 switch (size) {
1243 case X86_OPSIZE16: return 2;
1244 case X86_OPSIZE32:
1245 case X86_OPSIZE64: return 4;
1246 }
1247 }
1248 return esizeop(c, size);
1249 }
1250
1251
flsz2hsz(int size)1252 char x86asm::flsz2hsz(int size)
1253 {
1254 switch (size) {
1255 case 4:
1256 return SIZE_S;
1257 case 8:
1258 return SIZE_L;
1259 case 10:
1260 return SIZE_T;
1261 }
1262 return 0;
1263 }
1264
get_name()1265 const char *x86asm::get_name()
1266 {
1267 return "x86asm";
1268 }
1269
immlsz2hsz(int size,int opsize)1270 const char *x86asm::immlsz2hsz(int size, int opsize)
1271 {
1272 if (opsize == X86_OPSIZE16) {
1273 switch (size) {
1274 case 1:
1275 return immhsz8_16;
1276 case 2:
1277 return immhsz16_16;
1278 case 4:
1279 return immhsz32_16;
1280 case 8:
1281 return immhsz64_16;
1282 }
1283 } else if (opsize == X86_OPSIZE32) {
1284 switch (size) {
1285 case 1:
1286 return immhsz8_32;
1287 case 2:
1288 return immhsz16_32;
1289 case 4:
1290 return immhsz32_32;
1291 case 8:
1292 return immhsz64_32;
1293 }
1294 } else {
1295 switch (size) {
1296 case 1:
1297 return immhsz8_64;
1298 case 2:
1299 return immhsz16_64;
1300 case 4:
1301 return immhsz32_64;
1302 case 8:
1303 return immhsz64_64;
1304 }
1305 }
1306 return 0;
1307 }
1308
lsz2hsz(int size,int opsize)1309 const char *x86asm::lsz2hsz(int size, int opsize)
1310 {
1311 if (opsize == X86_OPSIZE16) {
1312 switch (size) {
1313 case 1:
1314 return hsz8_16;
1315 case 2:
1316 return hsz16_16;
1317 case 4:
1318 return hsz32_16;
1319 case 6:
1320 return hsz48_16;
1321 case 8:
1322 return hsz64_16;
1323 case 16:
1324 return hsz128_16;
1325 case 32:
1326 return hsz256_16;
1327 }
1328 } else if (opsize == X86_OPSIZE32) {
1329 switch (size) {
1330 case 1:
1331 return hsz8_32;
1332 case 2:
1333 return hsz16_32;
1334 case 4:
1335 return hsz32_32;
1336 case 6:
1337 return hsz48_32;
1338 case 8:
1339 return hsz64_32;
1340 case 16:
1341 return hsz128_32;
1342 case 32:
1343 return hsz256_32;
1344 }
1345 } else {
1346 switch (size) {
1347 case 1:
1348 return hsz8_64;
1349 case 2:
1350 return hsz16_64;
1351 case 4:
1352 return hsz32_64;
1353 case 6:
1354 return hsz48_64;
1355 case 8:
1356 return hsz64_64;
1357 case 16:
1358 return hsz128_64;
1359 case 32:
1360 return hsz256_64;
1361 }
1362 }
1363 return 0;
1364 }
1365
1366 #define MATCHTYPE_NOMATCH 0
1367 #define MATCHTYPE_MATCH 1
1368 #define MATCHTYPE_NOOPPREFIX 2
1369 #define MATCHTYPE_OPPREFIX 3
1370
match_type(x86_insn_op * op,x86opc_insn_op * xop,int addrsize)1371 int x86asm::match_type(x86_insn_op *op, x86opc_insn_op *xop, int addrsize)
1372 {
1373 if (op->type == X86_OPTYPE_EMPTY && xop->type == TYPE_0) return MATCHTYPE_MATCH;
1374 int r = MATCHTYPE_MATCH;
1375 if (op->type == X86_OPTYPE_MMX) {
1376 if ((xop->type == TYPE_P || xop->type == TYPE_PR || xop->type == TYPE_Q)
1377 && (xop->size == SIZE_U || xop->size == SIZE_Z)) {
1378 r = MATCHTYPE_NOOPPREFIX;
1379 }
1380 }
1381 const byte *hop = lop2hop[op->type];
1382 while (*hop) {
1383 if (*hop == xop->type) {
1384 if (xop->type == TYPE_Rx) {
1385 if (xop->extra == (op->reg & 7)) return r;
1386 } else if (xop->type == TYPE_RXx) {
1387 if (xop->extra == op->reg) return r;
1388 } else if (xop->type == TYPE_Sx) {
1389 if (xop->extra == op->seg) return r;
1390 } else if (xop->type == TYPE_Ix) {
1391 if ((unsigned)xop->extra == op->imm) return r;
1392 } else if (xop->type == TYPE_Fx) {
1393 if (xop->extra == op->stx) return r;
1394 } else if (xop->type == TYPE_Vx) {
1395 if (xop->extra == op->xmm) return r;
1396 } else if (op->type == X86_OPTYPE_MEM) {
1397 if (op->mem.addrsize == addrsize
1398 || op->mem.addrsize == X86_ADDRSIZEUNKNOWN) return r;
1399 } else return r;
1400 }
1401 hop++;
1402 }
1403 // special xmm match of mmx operands
1404 if (op->type == X86_OPTYPE_XMM) {
1405 if ((xop->type == TYPE_P || xop->type == TYPE_PR || xop->type == TYPE_Q)
1406 && (xop->size == SIZE_U || xop->size == SIZE_Z)) {
1407 return MATCHTYPE_OPPREFIX;
1408 }
1409 }
1410 return MATCHTYPE_NOMATCH;
1411 }
1412
match_size(x86_insn_op * op,x86opc_insn_op * xop,int opsize)1413 bool x86asm::match_size(x86_insn_op *op, x86opc_insn_op *xop, int opsize)
1414 {
1415 if (op->type == X86_OPTYPE_EMPTY && xop->type == TYPE_0) return true;
1416 if (!op->size && xop->type != TYPE_0) return true;
1417 const char *hsz = NULL;
1418
1419 byte xopsize = xop->size;
1420 if (op->type == X86_OPTYPE_REG && xop->type == TYPE_MR) {
1421 xopsize = xop->extra;
1422 }
1423
1424 if ((op->type == X86_OPTYPE_MEM && op->mem.floatptr)
1425 || op->type == X86_OPTYPE_STX) {
1426 return xop->size == flsz2hsz(op->size);
1427 } else if (op->type == X86_OPTYPE_IMM) {
1428 if (xop->type == TYPE_Is) {
1429 hsz = immlsz2hsz(simmsize(op->imm, esizeop(xop->size, opsize)), opsize); //XXX
1430 } else if (xop->type == TYPE_J) {
1431 int ssize = esizeop_ex(xop->size, opsize);
1432 int size = esizeop(xop->size, opsize);
1433 // FIXME: ?!
1434 hsz = immlsz2hsz(simmsize(op->imm - address - code.size - ssize, size), opsize);
1435 } else {
1436 hsz = immlsz2hsz(simmsize(op->imm, esizeop(xop->size, opsize)), opsize); //XXX
1437 }
1438 } else if (op->type == X86_OPTYPE_YMM
1439 || op->type == X86_OPTYPE_XMM
1440 || op->type == X86_OPTYPE_MMX) {
1441 return true;
1442 } else {
1443 hsz = lsz2hsz(op->size, opsize);
1444 }
1445
1446 if (hsz) {
1447 while (*hsz) {
1448 if (*hsz == xopsize) return true;
1449 hsz++;
1450 }
1451 }
1452 return false;
1453 }
1454
match_allops(x86asm_insn * insn,byte * xop,int maxop,int opsize,int addrsize)1455 int x86asm::match_allops(x86asm_insn *insn, byte *xop, int maxop, int opsize, int addrsize)
1456 {
1457 int m = 0;
1458 for (int i = 0; i < maxop; i++) {
1459 int m2 = match_type(&insn->op[i], &x86_op_type[xop[i]], addrsize);
1460 if (!m2 || (m && m != MATCHTYPE_MATCH && m2 != MATCHTYPE_MATCH && m != m2)) {
1461 return MATCHTYPE_NOMATCH;
1462 } else {
1463 if (m2 > m) m = m2;
1464 }
1465 if (!match_size(&insn->op[i], &x86_op_type[xop[i]], opsize)) return MATCHTYPE_NOMATCH;
1466 }
1467 return m;
1468 }
1469
pickname(char * result,const char * name,int n)1470 static void pickname(char *result, const char *name, int n)
1471 {
1472 const char *s = name;
1473 do {
1474 name = s+1;
1475 s = strchr(name, '|');
1476 if (!s) {
1477 strcpy(result, name);
1478 return;
1479 }
1480 } while (n--);
1481 ht_strlcpy(result, name, s-name+1);
1482 }
1483
match_opcode_name(const char * input_name,const char * opcodelist_name,int def_match)1484 int x86asm::match_opcode_name(const char *input_name, const char *opcodelist_name, int def_match)
1485 {
1486 if (opcodelist_name) {
1487 if (*opcodelist_name == '~') opcodelist_name++;
1488 char n1[32], n2[32], n3[32];
1489 pickname(n1, opcodelist_name, 0);
1490 pickname(n2, opcodelist_name, 1);
1491 pickname(n3, opcodelist_name, 2);
1492 switch (opcodelist_name[0]) {
1493 case '|':
1494 case '&':
1495 if (strcmp(n1, input_name)==0) return def_match;
1496 if (strcmp(n2, input_name)==0) return def_match;
1497 if (strcmp(n3, input_name)==0) return def_match;
1498 break;
1499 case '?':
1500 if (strcmp(n1, input_name)==0) return MATCHOPNAME_MATCH_IF_OPSIZE16;
1501 if (strcmp(n2, input_name)==0) return MATCHOPNAME_MATCH_IF_OPSIZE32;
1502 if (strcmp(n3, input_name)==0) return MATCHOPNAME_MATCH_IF_OPSIZE64;
1503 break;
1504 case '*':
1505 if (strcmp(n1, input_name)==0) return MATCHOPNAME_MATCH_IF_ADDRSIZE16;
1506 if (strcmp(n2, input_name)==0) return MATCHOPNAME_MATCH_IF_ADDRSIZE32;
1507 if (strcmp(n3, input_name)==0) return MATCHOPNAME_MATCH_IF_ADDRSIZE64;
1508 break;
1509 default:
1510 if (strcmp(opcodelist_name, input_name)==0) return def_match;
1511 }
1512 }
1513 return MATCHOPNAME_NOMATCH;
1514 }
1515
swap(char & a,char & b)1516 static void swap(char &a, char &b)
1517 {
1518 char tmp = a;
1519 a = b; b = tmp;
1520 }
1521
match_opcode(x86opc_insn * opcode,x86asm_insn * insn,int prefix,byte opcodebyte,int additional_opcode,int def_match)1522 void x86asm::match_opcode(x86opc_insn *opcode, x86asm_insn *insn, int prefix, byte opcodebyte, int additional_opcode, int def_match)
1523 {
1524 int n = match_opcode_name(insn->name, opcode->name, def_match);
1525 namefound |= n;
1526 if (n == MATCHOPNAME_NOMATCH) return;
1527
1528 insn->opsizeprefix = X86_PREFIX_NO;
1529 char opsizes[] = {X86_OPSIZE16, X86_OPSIZE32, X86_OPSIZE64};
1530 char addrsizes[] = {X86_ADDRSIZE16, X86_ADDRSIZE32, X86_ADDRSIZE64};
1531
1532 switch (addrsize) {
1533 case X86_ADDRSIZE32: swap(addrsizes[0], addrsizes[1]); break;
1534 case X86_ADDRSIZE64: swap(addrsizes[0], addrsizes[2]); break;
1535 case X86_ADDRSIZE16:
1536 case X86_ADDRSIZEUNKNOWN:;
1537 }
1538
1539 bool done1 = false;
1540 int o = 0;
1541 /*
1542 * check all permutations of opsize and addrsize
1543 * if possible and necessary
1544 */
1545 switch (n) {
1546 case MATCHOPNAME_MATCH_IF_OPSIZE16: done1 = true; break;
1547 case MATCHOPNAME_MATCH_IF_OPSIZE32: o = 1; done1 = true; break;
1548 case MATCHOPNAME_MATCH_IF_OPSIZE64: o = 2; done1 = true; break;
1549 }
1550 for (; o < 3; o++) {
1551 if (o == 2 && addrsize != X86_ADDRSIZE64) break;
1552 switch (def_match) {
1553 case MATCHOPNAME_MATCH_IF_OPPREFIX:
1554 if (opsize == X86_OPSIZE16 && o == 0) continue;
1555 if (opsize == X86_OPSIZE32 && o == 1) continue;
1556 break;
1557 case MATCHOPNAME_MATCH_IF_NOOPPREFIX:
1558 if (opsize == X86_OPSIZE16 && o == 1) continue;
1559 if (opsize == X86_OPSIZE32 && o == 0) continue;
1560 break;
1561 }
1562 for (int a=0; a < 2; a++) {
1563 char as = addrsizes[a];
1564 bool done2 = false;
1565 switch (n) {
1566 case MATCHOPNAME_MATCH_IF_ADDRSIZE16: as = X86_ADDRSIZE16; done2 = true; break;
1567 case MATCHOPNAME_MATCH_IF_ADDRSIZE32: as = X86_ADDRSIZE32; done2 = true; break;
1568 case MATCHOPNAME_MATCH_IF_ADDRSIZE64: as = X86_ADDRSIZE64; done2 = true; break;
1569 }
1570 match_opcode_final(opcode, insn, prefix, opcodebyte, additional_opcode, opsizes[o], as, n);
1571 if (done2) break;
1572 }
1573 if (done1) break;
1574 }
1575 }
1576
match_vex_opcode(x86opc_vex_insn * opcode,x86asm_insn * insn,byte opcodebyte,int additional_opcode)1577 void x86asm::match_vex_opcode(x86opc_vex_insn *opcode, x86asm_insn *insn, byte opcodebyte, int additional_opcode)
1578 {
1579 int n = match_opcode_name(insn->name, opcode->name, MATCHOPNAME_MATCH_IF_NOOPPREFIX);
1580 namefound |= n;
1581 if (n == MATCHOPNAME_NOMATCH) return;
1582
1583 char addrsizes[] = {X86_ADDRSIZE16, X86_ADDRSIZE32, X86_ADDRSIZE64};
1584
1585 switch (addrsize) {
1586 case X86_ADDRSIZE32: swap(addrsizes[0], addrsizes[1]); break;
1587 case X86_ADDRSIZE64: swap(addrsizes[0], addrsizes[2]); break;
1588 case X86_ADDRSIZE16:
1589 case X86_ADDRSIZEUNKNOWN:;
1590 }
1591
1592 for (int a=0; a < 2; a++) {
1593 char as = addrsizes[a];
1594 match_vex_opcode_final(opcode, insn, opcodebyte, additional_opcode, opsize, as);
1595 }
1596 }
1597
match_opcode_final(x86opc_insn * opcode,x86asm_insn * insn,int prefix,byte opcodebyte,int additional_opcode,int opsize,int addrsize,int match)1598 int x86asm::match_opcode_final(x86opc_insn *opcode, x86asm_insn *insn, int prefix, byte opcodebyte, int additional_opcode, int opsize, int addrsize, int match)
1599 {
1600 switch (match_allops(insn, opcode->op, 4, opsize, addrsize)) {
1601 case MATCHTYPE_NOMATCH:
1602 return false;
1603 case MATCHTYPE_MATCH:
1604 if (match == MATCHOPNAME_MATCH_IF_OPPREFIX) {
1605 insn->opsizeprefix = X86_PREFIX_OPSIZE;
1606 } else if (match == MATCHOPNAME_MATCH_IF_NOOPPREFIX) {
1607 insn->opsizeprefix = X86_PREFIX_NOOPSIZE;
1608 }
1609 break;
1610 case MATCHTYPE_NOOPPREFIX:
1611 if (match == MATCHOPNAME_MATCH_IF_OPPREFIX) return false;
1612 insn->opsizeprefix = X86_PREFIX_NOOPSIZE;
1613 break;
1614 case MATCHTYPE_OPPREFIX:
1615 if (match == MATCHOPNAME_MATCH_IF_NOOPPREFIX) return false;
1616 insn->opsizeprefix = X86_PREFIX_OPSIZE;
1617 break;
1618 }
1619 if (encode_insn(insn, opcode, opcodebyte, additional_opcode, prefix, opsize, addrsize)) {
1620 pushcode();
1621 newcode();
1622 }
1623 return true;
1624 }
1625
match_vex_opcode_final(x86opc_vex_insn * opcode,x86asm_insn * insn,byte opcodebyte,int additional_opcode,int opsize,int addrsize)1626 int x86asm::match_vex_opcode_final(x86opc_vex_insn *opcode, x86asm_insn *insn, byte opcodebyte, int additional_opcode, int opsize, int addrsize)
1627 {
1628 if (match_allops(insn, opcode->op, 5, opsize, addrsize) == MATCHTYPE_NOMATCH) {
1629 return false;
1630 }
1631 if (encode_vex_insn(insn, opcode, opcodebyte, additional_opcode, opsize, addrsize)) {
1632 pushcode();
1633 newcode();
1634 }
1635 return true;
1636 }
1637
match_opcodes(x86opc_insn * opcodes,x86asm_insn * insn,int prefix,int def_match)1638 void x86asm::match_opcodes(x86opc_insn *opcodes, x86asm_insn *insn, int prefix, int def_match)
1639 {
1640 for (int i=0; i < 256; i++) {
1641 if (!opcodes[i].name) {
1642 byte specialtype = opcodes[i].op[0];
1643 if (specialtype == SPECIAL_TYPE_GROUP) {
1644 byte specialdata = opcodes[i].op[1];
1645 x86opc_insn *group = x86_group_insns[specialdata];
1646 for (int g=0; g < 8; g++) {
1647 if (!group[g].name) {
1648 byte special2type = group[g].op[0];
1649 if (special2type == SPECIAL_TYPE_SGROUP) {
1650 byte special2data = group[g].op[1];
1651 x86opc_insn *group = x86_special_group_insns[special2data];
1652 for (int h=0; h < 8; h++) {
1653 match_opcode(&group[h], insn, prefix, i, (h<<3) + g + 0x800, def_match);
1654 }
1655 match_opcode(&group[8], insn, prefix, i, -1, def_match);
1656 }
1657 } else {
1658 match_opcode(&group[g], insn, prefix, i, g, def_match);
1659 }
1660 }
1661 }
1662 } else {
1663 match_opcode(&opcodes[i], insn, prefix, i, -1, def_match);
1664 }
1665 }
1666 }
1667
match_vex_opcodes(x86asm_insn * insn)1668 void x86asm::match_vex_opcodes(x86asm_insn *insn)
1669 {
1670 for (int i=0; i < 256; i++) {
1671 x86opc_vex_insn *opcodes = x86_vex_insns[i];
1672 if (!opcodes) continue;
1673 while (!opcodes->name && opcodes->op[0] == SPECIAL_TYPE_GROUP) {
1674 for (int j=0; j < 8; j++) {
1675 x86opc_vex_insn *group = &x86_group_vex_insns[opcodes->op[1]][j];
1676 if (group->name) match_vex_opcode(group, insn, i, j);
1677 }
1678 opcodes++;
1679 }
1680 while (opcodes->name) {
1681 match_vex_opcode(opcodes, insn, i, -1);
1682 opcodes++;
1683 }
1684 }
1685 }
1686
match_fopcodes(x86asm_insn * insn)1687 void x86asm::match_fopcodes(x86asm_insn *insn)
1688 {
1689 /* try modrm fopcodes */
1690 for (int i=0; i < 8; i++) {
1691 for (int j=0; j < 8; j++) {
1692 int n = match_opcode_name(insn->name, x86_modfloat_group_insns[i][j].name, MATCHOPNAME_MATCH);
1693 namefound |= n;
1694 if (n != MATCHOPNAME_NOMATCH) {
1695 int eaddrsize = addrsize;
1696 for (int k=0; k < 2; k++) {
1697 if (match_allops(insn, x86_modfloat_group_insns[i][j].op, 4, opsize, eaddrsize)) {
1698 if (encode_insn(insn, &x86_modfloat_group_insns[i][j], -1, j, X86ASM_PREFIX_D8+i, opsize, eaddrsize)) {
1699 pushcode();
1700 newcode();
1701 }
1702 }
1703 switch (eaddrsize) {
1704 case X86_ADDRSIZE64:
1705 case X86_ADDRSIZE16: eaddrsize = X86_ADDRSIZE32; break;
1706 case X86_ADDRSIZE32: eaddrsize = X86_ADDRSIZE16; break;
1707 }
1708 }
1709 }
1710 }
1711 }
1712 /* try the rest */
1713 for (int i=0; i<8; i++) {
1714 for (int j=0; j<8; j++) {
1715 if (x86_float_group_insns[i][j].group == 0) {
1716 int n = match_opcode_name(insn->name, x86_float_group_insns[i][j].insn.name, MATCHOPNAME_MATCH);
1717 namefound |= n;
1718 if (n != MATCHOPNAME_NOMATCH) {
1719 if (match_allops(insn, x86_float_group_insns[i][j].insn.op, 4, opsize, addrsize)) {
1720 if (encode_insn(insn, &x86_float_group_insns[i][j].insn, -1, 0x800 | j, X86ASM_PREFIX_D8+i, opsize, addrsize)) {
1721 pushcode();
1722 newcode();
1723 }
1724 if (error) return;
1725 }
1726 }
1727 } else {
1728 x86opc_insn *group=x86_float_group_insns[i][j].group;
1729 for (int k=0; k < 8; k++) {
1730 int n = match_opcode_name(insn->name, group[k].name, MATCHOPNAME_MATCH);
1731 namefound |= n;
1732 if (n != MATCHOPNAME_NOMATCH) {
1733 int eaddrsize = addrsize;
1734 for (int l=0; l < 2; l++) {
1735 if (match_allops(insn, group[k].op, 4, opsize, eaddrsize)) {
1736 if (encode_insn(insn, &group[k], -1, 0x800 | k<<3 | j, X86ASM_PREFIX_D8+i, opsize, eaddrsize)) {
1737 pushcode();
1738 newcode();
1739 }
1740 }
1741 switch (eaddrsize) {
1742 case X86_ADDRSIZE64:
1743 case X86_ADDRSIZE16: eaddrsize = X86_ADDRSIZE32; break;
1744 case X86_ADDRSIZE32: eaddrsize = X86_ADDRSIZE16; break;
1745 }
1746 }
1747 }
1748 }
1749 }
1750 }
1751 }
1752 }
1753
opreg(x86_insn_op * op,const char * xop)1754 bool x86asm::opreg(x86_insn_op *op, const char *xop)
1755 {
1756 for (int i=0; i<3; i++) {
1757 for (int j=0; j<8; j++) {
1758 if (x86_regs[i][j] && strcmp(xop, x86_regs[i][j])==0) {
1759 op->type = X86_OPTYPE_REG;
1760 op->size = reg2size[i];
1761 op->reg = j;
1762 return true;
1763 }
1764 }
1765 }
1766 return false;
1767 }
1768
opmmx(x86_insn_op * op,const char * xop)1769 bool x86asm::opmmx(x86_insn_op *op, const char *xop)
1770 {
1771 if (strlen(xop) == 3 && xop[0] == 'm' && xop[1] == 'm'
1772 && xop[2] >= '0' && xop[2] <= '7') {
1773 op->type = X86_OPTYPE_MMX;
1774 op->size = 8;
1775 op->mmx = xop[2] - '0';
1776 return true;
1777 } else {
1778 return false;
1779 }
1780 }
1781
opxmm(x86_insn_op * op,const char * xop)1782 bool x86asm::opxmm(x86_insn_op *op, const char *xop)
1783 {
1784 if (strlen(xop) == 4 && xop[0] == 'x' && xop[1] == 'm' && xop[2] == 'm'
1785 && xop[3] >= '0' && xop[3] <= '7') {
1786 op->type = X86_OPTYPE_XMM;
1787 op->size = 16;
1788 op->xmm = xop[3] - '0';
1789 return true;
1790 } else {
1791 return false;
1792 }
1793 }
1794
opymm(x86_insn_op * op,const char * xop)1795 bool x86asm::opymm(x86_insn_op *op, const char *xop)
1796 {
1797 if (strlen(xop) == 4 && xop[0] == 'y' && xop[1] == 'm' && xop[2] == 'm'
1798 && xop[3] >= '0' && xop[3] <= '7') {
1799 op->type = X86_OPTYPE_YMM;
1800 op->size = 32;
1801 op->xmm = xop[3] - '0';
1802 return true;
1803 } else {
1804 return false;
1805 }
1806 }
1807
opseg(x86_insn_op * op,const char * xop)1808 bool x86asm::opseg(x86_insn_op *op, const char *xop)
1809 {
1810 for (int i=0; i<8; i++) {
1811 if (x86_segs[i] && strcmp(xop, x86_segs[i])==0) {
1812 op->type = X86_OPTYPE_SEG;
1813 op->size = 2;
1814 op->seg = i;
1815 return true;
1816 }
1817 }
1818 return false;
1819 }
1820
opfarptr(x86_insn_op * op,const char * xop)1821 bool x86asm::opfarptr(x86_insn_op *op, const char *xop)
1822 {
1823 return false;
1824 /*
1825 FIXME:
1826 uint64 seg, offset;
1827 char *x = xop;
1828
1829 if (!fetch_number(&x, &seg)) return false;
1830 if (*x != ':') return false;
1831 x++;
1832 if (!fetch_number(&x, &offset)) return false;
1833 if (*x) return false;
1834 op->type = X86_OPTYPE_FARPTR;
1835 if (offset > 0xffff) op->size=6; else op->size=4;
1836 op->farptr.seg = seg;
1837 op->farptr.offset = offset;
1838 return true;
1839 */
1840 }
1841
opimm(x86_insn_op * op,const char * xop)1842 bool x86asm::opimm(x86_insn_op *op, const char *xop)
1843 {
1844 uint64 i;
1845 if (!str2int(xop, i)) return false;
1846 op->type = X86_OPTYPE_IMM;
1847 if (i > 0xffffffffULL) {
1848 op->size = 8;
1849 } else if (i > 0xffff) {
1850 op->size = 4;
1851 } else if (i > 0xff) {
1852 op->size = 2;
1853 } else {
1854 op->size = 1;
1855 }
1856 op->imm = i;
1857 return true;
1858 }
1859
opplugimm(x86_insn_op * op,const char * xop)1860 bool x86asm::opplugimm(x86_insn_op *op, const char *xop)
1861 {
1862 uint64 d;
1863 if (imm_eval_proc && imm_eval_proc(imm_eval_context, xop, d)) {
1864 op->type = X86_OPTYPE_IMM;
1865 if (d > 0xffffffff) {
1866 op->size = 8;
1867 } else if (d > 0xffff) {
1868 op->size = 4;
1869 } else if (d > 0xff) {
1870 op->size = 2;
1871 } else {
1872 op->size = 1;
1873 }
1874 op->imm = d;
1875 return true;
1876 }
1877 return false;
1878 }
1879
opmem(x86asm_insn * asm_insn,x86_insn_op * op,const char * s)1880 bool x86asm::opmem(x86asm_insn *asm_insn, x86_insn_op *op, const char *s)
1881 {
1882 /* FIXME: dirty implementation! */
1883 int opsize = 0, hsize = 0;
1884 bool floatptr = false;
1885 char token[256];
1886 const char *sep = "[]()*+-:";
1887
1888 tok(&s, token, sizeof token, sep);
1889
1890 static const char *types[] = {"byte", "word", "dword", "pword", "qword", "oword", "xmmword", "ymmword", "single", "double", "extended"};
1891 static byte type_size[] = {1, 2, 4, 6, 8, 16, 16, 32, 4, 8, 10};
1892 // typecast
1893 for (uint i=0; i < sizeof types / sizeof types[0]; i++) {
1894 if (strcmp(token, types[i]) == 0) {
1895 hsize = type_size[i];
1896 if (i >= 8) floatptr = true;
1897 break;
1898 }
1899 }
1900 if (hsize) {
1901 tok(&s, token, sizeof token, sep);
1902 if (!(strcmp(token, "ptr") == 0)) return false;
1903 opsize = hsize;
1904 tok(&s, token, sizeof token, sep);
1905 }
1906
1907 // segprefixes (e.g. fs:)
1908 for (int i = 0; i < 8; i++) {
1909 if (x86_segs[i]) {
1910 if (strcmp(x86_segs[i], token)==0) {
1911 tok(&s, token, sizeof token, sep);
1912 if (!(strcmp(token, ":") == 0)) return false;
1913 static const int c2p[8] = {X86_PREFIX_ES, X86_PREFIX_CS, X86_PREFIX_SS, X86_PREFIX_DS, X86_PREFIX_FS, X86_PREFIX_GS, 0, 0};
1914 asm_insn->segprefix = c2p[i];
1915 tok(&s, token, sizeof token, sep);
1916 break;
1917 }
1918 }
1919 }
1920
1921 if (!(strcmp(token, "[") == 0)) return false;
1922
1923 int scale = 0, index = X86_REG_NO, base = X86_REG_NO;
1924 uint64 disp = 0;
1925 int addrsize = X86_ADDRSIZEUNKNOWN;
1926 int lasttokenreg = X86_REG_NO;
1927 bool need_rex = false;
1928
1929 int sign = 1;
1930 sep = "[]()*+-";
1931 while (1) {
1932 cont:
1933 tok(&s, token, sizeof token, sep);
1934 if (strcmp(token, "+") == 0) {
1935 if (!sign) sign = 1;
1936 continue;
1937 }
1938 if (strcmp(token, "-") == 0) {
1939 if (sign) {
1940 sign = -sign;
1941 } else {
1942 sign = -1;
1943 }
1944 continue;
1945 }
1946 if (strcmp(token, "]") == 0) {
1947 if (sign) return false;
1948 break;
1949 }
1950 if (strcmp(token, "*") == 0) {
1951 tok(&s, token, sizeof token, sep);
1952 if (lasttokenreg == X86_REG_NO) {
1953 /* FIXME: case "imm*reg" not yet supported!
1954 cleaner implementation needed! */
1955 return false;
1956 } else {
1957 uint64 v;
1958 if (!str2int(token, v)) return false;
1959 if (v > 1) {
1960 if (index == lasttokenreg) {
1961 scale += v-1;
1962 } else if (base == lasttokenreg) {
1963 if (index != X86_REG_NO) return false;
1964 index = base;
1965 base = X86_REG_NO;
1966 scale = v;
1967 }
1968 }
1969 }
1970 lasttokenreg = X86_REG_NO;
1971 sign = 0;
1972 continue;
1973 }
1974 /* test if reg */
1975 for (int i=1; i < 4; i++) {
1976 for (int j=0; j < 16; j++) {
1977 if (x86_64regs[i][j] && strcmp(token, x86_64regs[i][j])==0) {
1978 if (j > 7 || i == 3) {
1979 if (this->addrsize != X86_ADDRSIZE64) break;
1980 if (j > 7) need_rex = true;
1981 }
1982 if (sign < 0) return false;
1983 static const byte sizer[] = {X86_ADDRSIZE16, X86_ADDRSIZE32, X86_ADDRSIZE64};
1984 int caddrsize = sizer[i-1];
1985 if (addrsize == X86_ADDRSIZEUNKNOWN) {
1986 addrsize = caddrsize;
1987 } else if (addrsize != caddrsize) {
1988 return false;
1989 }
1990 if (index == j) {
1991 scale++;
1992 } else if (base == X86_REG_NO) {
1993 base = j;
1994 } else if (index == X86_REG_NO) {
1995 index = j;
1996 scale = 1;
1997 } else if (base == j && scale == 1) {
1998 int t = index;
1999 index = base;
2000 base = t;
2001 scale = 2;
2002 } else return false;
2003 lasttokenreg = j;
2004 sign = 0;
2005 goto cont;
2006 }
2007 }
2008 }
2009 lasttokenreg = X86_REG_NO;
2010
2011 /* test if number */
2012 uint64 v;
2013 if ((imm_eval_proc && imm_eval_proc(imm_eval_context, token, v))
2014 || str2int(token, v)) {
2015 if (!sign) return false;
2016 if (sign < 0) disp -= v; else disp += v;
2017 sign = 0;
2018 continue;
2019 }
2020 return false;
2021 }
2022
2023 op->type = X86_OPTYPE_MEM;
2024 op->size = opsize;
2025 op->mem.base = base;
2026 op->mem.index = index;
2027 op->mem.scale = scale;
2028 op->mem.addrsize = addrsize;
2029 op->mem.disp = disp;
2030 op->mem.floatptr = floatptr;
2031 op->need_rex = need_rex;
2032 return true;
2033 }
2034
opspecialregs(x86_insn_op * op,const char * xop)2035 bool x86asm::opspecialregs(x86_insn_op *op, const char *xop)
2036 {
2037 char *e;
2038 if (strcmp(xop, "st")==0) {
2039 op->type=X86_OPTYPE_STX;
2040 op->size=10;
2041 op->stx=0;
2042 return true;
2043 } else if (ht_strncmp(xop, "st", 2)==0 && xop[2]=='(' && xop[4]==')') {
2044 int w = strtol(xop+3, &e, 10);
2045 if (e != xop+4 || w > 7) return false;
2046 op->type = X86_OPTYPE_STX;
2047 op->size = 10;
2048 op->stx = w;
2049 return 1;
2050 }
2051
2052 /* FIXME: do we need this?
2053 * strtol sets e to next untranslatable char,
2054 * this case is caught below...
2055 */
2056 if (strlen(xop) != 3) return 0;
2057
2058 int w = strtol(xop+2, &e, 10);
2059 if (*e || w > 7) return 0;
2060 if (ht_strncmp(xop, "cr", 2) == 0) {
2061 op->type = X86_OPTYPE_CRX;
2062 op->size = 4;
2063 op->crx = w;
2064 return true;
2065 } else if (ht_strncmp(xop, "dr", 2) == 0) {
2066 op->type = X86_OPTYPE_DRX;
2067 op->size = 4;
2068 op->drx = w;
2069 return true;
2070 }
2071 return false;
2072 }
2073
translate_str(asm_insn * asm_insn,const char * s)2074 bool x86asm::translate_str(asm_insn *asm_insn, const char *s)
2075 {
2076 x86asm_insn *insn=(x86asm_insn*)asm_insn;
2077 char *opp[5], op[5][256];
2078 opp[0]=op[0];
2079 opp[1]=op[1];
2080 opp[2]=op[2];
2081 opp[3]=op[3];
2082 opp[4]=op[4];
2083 for (int i=0; i<5; i++) {
2084 insn->op[i].need_rex = insn->op[i].forbid_rex = false;
2085 insn->op[i].type = X86_OPTYPE_EMPTY;
2086 }
2087
2088 insn->lockprefix = X86_PREFIX_NO;
2089 insn->repprefix = X86_PREFIX_NO;
2090 insn->segprefix = X86_PREFIX_NO;
2091 insn->opsizeprefix = X86_PREFIX_NO;
2092
2093 const char *p = s, *a, *b;
2094
2095 /* prefixes */
2096 whitespaces(p);
2097 a=p;
2098 non_whitespaces(p);
2099 b=p;
2100 if (ht_strncmp(a, "rep", b-a) == 0 || ht_strncmp(a, "repe", b-a) == 0
2101 || ht_strncmp(a, "repz", b-a) == 0) {
2102 insn->repprefix=X86_PREFIX_REPZ;
2103 s = p;
2104 } else if (ht_strncmp(a, "repne", b-a) == 0 || ht_strncmp(a, "repnz", b-a) == 0) {
2105 insn->repprefix=X86_PREFIX_REPNZ;
2106 s = p;
2107 } else if (ht_strncmp(a, "lock", b-a) == 0) {
2108 insn->lockprefix=X86_PREFIX_LOCK;
2109 s = p;
2110 }
2111
2112 /**/
2113 splitstr(s, insn->n, sizeof insn->n, (char**)opp, 256);
2114 insn->name = insn->n;
2115 for (int i=0; i<5; i++) {
2116 if (!*op[i]) break;
2117
2118 if (!(opplugimm(&insn->op[i], op[i])
2119 || opreg(&insn->op[i], op[i])
2120 || opmmx(&insn->op[i], op[i])
2121 || opxmm(&insn->op[i], op[i])
2122 || opymm(&insn->op[i], op[i])
2123 || opfarptr(&insn->op[i], op[i])
2124 || opimm(&insn->op[i], op[i])
2125 || opseg(&insn->op[i], op[i])
2126 || opmem(insn, &insn->op[i], op[i])
2127 || opspecialregs(&insn->op[i], op[i]))) {
2128 set_error_msg(X86ASM_ERRMSG_UNKNOWN_SYMBOL, op[i]);
2129 return false;
2130 }
2131 }
2132 return true;
2133 }
2134
simmsize(uint64 imm,int immsize)2135 int x86asm::simmsize(uint64 imm, int immsize)
2136 {
2137 switch (immsize) {
2138 case 1:
2139 if (imm <= 0xff) return 1;
2140 break;
2141 case 2:
2142 if (imm <= 0xffff) imm = sint64(sint16(imm));
2143 break;
2144 case 4:
2145 if (imm <= 0xffffffff) imm = sint64(sint32(imm));
2146 break;
2147 }
2148 if (imm >= 0xffffffffffffff80ULL || imm < 0x80) return 1;
2149 if (imm >= 0xffffffffffff8000ULL || imm < 0x8000) return 2;
2150 if (imm >= 0xffffffff80000000ULL || imm < 0x80000000) return 4;
2151 return 8;
2152 }
2153
splitstr(const char * s,char * name,int size,char ** op,int opsize)2154 void x86asm::splitstr(const char *s, char *name, int size, char **op, int opsize)
2155 {
2156 const char *a, *b;
2157 bool wantbreak = false;
2158 *name=0;
2159 *op[0]=0;
2160 *op[1]=0;
2161 *op[2]=0;
2162 *op[3]=0;
2163 *op[4]=0;
2164 /* find name */
2165 whitespaces(s);
2166 a = s;
2167 non_whitespaces(s);
2168 b = s;
2169 ht_strlcpy(name, a, MIN(b-a+1, size));
2170 /* find ops */
2171 for (int i = 0; i < 5; i++) {
2172 whitespaces(s);
2173 if (!*s) break;
2174 a = s;
2175 waitforchar(s, ',');
2176 while (is_whitespace(s[-1])) s--;
2177 if (!*s) wantbreak = true;
2178 b = s;
2179 whitespaces(s);
2180 if (!*s) wantbreak = true;
2181 ht_strlcpy(op[i], a, MIN(b-a+1, opsize));
2182 whitespaces(s);
2183 if (wantbreak || *s != ',') break;
2184 s++;
2185 }
2186 }
2187
tok(const char ** s,char * res,int reslen,const char * sep)2188 void x86asm::tok(const char **s, char *res, int reslen, const char *sep)
2189 {
2190 if (reslen <= 0) return;
2191 whitespaces(*s);
2192 if (strchr(sep, **s)) {
2193 if (reslen > 0) *res++ = *((*s)++);
2194 } else {
2195 while (reslen > 1) {
2196 *res++ = *((*s)++);
2197 reslen--;
2198 if (**s == ' ' || **s == '\t') break;
2199 if (strchr(sep, **s)) break;
2200 }
2201 }
2202 *res = 0;
2203 }
2204
2205 /************************************************************************
2206 *
2207 */
2208 x86opc_insn (*x86_64asm::x86_64_insns)[256];
2209
x86_64asm()2210 x86_64asm::x86_64asm()
2211 : x86asm(X86_OPSIZE32, X86_ADDRSIZE64)
2212 {
2213 prepInsns();
2214 }
2215
prepInsns()2216 void x86_64asm::prepInsns()
2217 {
2218 if (!x86_64_insns) {
2219 x86_64_insns = ht_malloc(sizeof *x86_64_insns);
2220 memcpy(x86_64_insns, x86_32_insns, sizeof x86_32_insns);
2221
2222 int i = 0;
2223 while (x86_64_insn_patches[i].opc != -1) {
2224 (*x86_64_insns)[x86_64_insn_patches[i].opc] = x86_64_insn_patches[i].insn;
2225 i++;
2226 }
2227 }
2228 x86_insns = x86_64_insns;
2229 }
2230
createCompatibleDisassembler()2231 x86dis *x86_64asm::createCompatibleDisassembler()
2232 {
2233 return new x86_64dis();
2234 }
2235
opreg(x86_insn_op * op,const char * xop)2236 bool x86_64asm::opreg(x86_insn_op *op, const char *xop)
2237 {
2238 for (int i=0; i < 4; i++) {
2239 for (int j=0; j < 16; j++) {
2240 if (x86_64regs[i][j] && strcmp(xop, x86_64regs[i][j])==0) {
2241 op->type = X86_OPTYPE_REG;
2242 op->size = reg2size[i];
2243 op->reg = j;
2244 if (j > 7 || (i == 0 && j > 3)) {
2245 op->need_rex = true;
2246 }
2247 return true;
2248 }
2249 }
2250 }
2251 // check for legacy ah, ch, dh, bh
2252 for (int j=4; j < 8; j++) {
2253 if (x86_regs[0][j] && strcmp(xop, x86_regs[0][j])==0) {
2254 op->type = X86_OPTYPE_REG;
2255 op->size = reg2size[0];
2256 op->reg = j;
2257 op->forbid_rex = true;
2258 return true;
2259 }
2260 }
2261 return false;
2262 }
2263
opxmm(x86_insn_op * op,const char * xop)2264 bool x86_64asm::opxmm(x86_insn_op *op, const char *xop)
2265 {
2266 int slen = strlen(xop);
2267 if ((slen == 4 || slen == 5) && xop[0] == 'x' && xop[1] == 'm' && xop[2] == 'm'
2268 && xop[3] >= '0' && xop[3] <= '9') {
2269 int x = xop[3] - '0';
2270 if (slen == 5) {
2271 if (xop[4] < '0' || xop[4] > '9') return false;
2272 x *= 10;
2273 x += xop[4] - '0';
2274 if (x > 15) return false;
2275 }
2276 op->type = X86_OPTYPE_XMM;
2277 op->size = 16;
2278 op->xmm = x;
2279 if (x > 7) op->need_rex = true;
2280 return true;
2281 } else {
2282 return false;
2283 }
2284 }
2285
opymm(x86_insn_op * op,const char * xop)2286 bool x86_64asm::opymm(x86_insn_op *op, const char *xop)
2287 {
2288 int slen = strlen(xop);
2289 if ((slen == 4 || slen == 5) && xop[0] == 'y' && xop[1] == 'm' && xop[2] == 'm'
2290 && xop[3] >= '0' && xop[3] <= '9') {
2291 int x = xop[3] - '0';
2292 if (slen == 5) {
2293 if (xop[4] < '0' || xop[4] > '9') return false;
2294 x *= 10;
2295 x += xop[4] - '0';
2296 if (x > 15) return false;
2297 }
2298 op->type = X86_OPTYPE_YMM;
2299 op->size = 32;
2300 op->xmm = x;
2301 if (x > 7) op->need_rex = true;
2302 return true;
2303 } else {
2304 return false;
2305 }
2306 }
2307