1 /*BEGIN_LEGAL
2
3 Copyright (c) 2018 Intel Corporation
4
5 Licensed under the Apache License, Version 2.0 (the "License");
6 you may not use this file except in compliance with the License.
7 You may obtain a copy of the License at
8
9 http://www.apache.org/licenses/LICENSE-2.0
10
11 Unless required by applicable law or agreed to in writing, software
12 distributed under the License is distributed on an "AS IS" BASIS,
13 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 See the License for the specific language governing permissions and
15 limitations under the License.
16
17 END_LEGAL */
18 /// @file xed-ild.c
19 /// instruction length decoder
20
21 /*
22 FIXME:
23
24 need these opcode/mode/prefix based tables:
25 has_modrm (boolean)
26 disp_bytes = 1,2,4,8 bytes
27 imm_bytes = 1,2,4,8 bytes
28
29 >90% instructions have MODRM.
30
31 Key on the MOD pattern prebinding
32
33 >90% of the displacements come from the MODRM.MOD byte processing.
34
35 Some come from the pattern:
36 Nonterminals: BRDISP8, BRDISP32, MEMDISPv, BRDISPz
37
38 >90% of the are 1B and come from using map3.
39
40 xed grammar has UIMM32, UIMM16, UIMM8,UIMM8_1, SIMM8, SIMMz. The
41 signed/unsigned should move to an attribute or the xed_inst_t. The
42 UIMM32 is used on AMD XOP instructions.
43
44 uimm16: opcodes 9A, C2, C8, CA, EA
45
46 */
47
48 /* FIXME:we might have invalid map (TNI maps) - in this case
49 * we should check for it before looking up in tables for
50 * modrm/imm/disp/static decoding
51 * Also invalid map value cannot be 0xFF - we allocate 3 bits
52 * for MAP operand in key for static lookup.
53 */
54
55 #include "xed-internal-header.h"
56 #include "xed-ild.h"
57 #include "xed-util-private.h"
58 #include <string.h> // strcmp
59
60 #include "xed-ild-modrm.h"
61 #include "xed-ild-disp-bytes.h"
62 #include "xed-ild-imm-bytes.h"
63 #include "xed-operand-accessors.h"
64
65
66
xed3_mode_64b(xed_decoded_inst_t * d)67 static XED_INLINE int xed3_mode_64b(xed_decoded_inst_t* d) {
68 return (xed3_operand_get_mode(d) == XED_GRAMMAR_MODE_64);
69 }
70
71 /*
72 * The scanners cannot return arbitrarily. They MUST return by calling the
73 * next scanner.
74 */
75
76
77 static void init_has_disp_regular_table(void);
78 static void init_eamode_table(void);
79 static void init_has_sib_table(void);
80 static void set_has_modrm(xed_decoded_inst_t* d);
81
82
83
set_hint(xed_uint8_t b,xed_decoded_inst_t * d)84 static void set_hint(xed_uint8_t b, xed_decoded_inst_t* d){
85 switch(b){
86 case 0x2e:
87 xed3_operand_set_hint(d, 1);
88 return;
89 case 0x3e:
90 xed3_operand_set_hint(d, 2);
91 return;
92 default:
93 xed_assert(0);
94 }
95 }
96
97 // conservative filter table for fast prefix checking
98 // 2014-07-30:
99 // Timing perftest: 2-6% gain
100 // Without the filter:
101 // Average: 384.08s Minimum: 352.74s
102 // With the filter:
103 // Average: 362.97s Minimum: 346.26s
104 // Could use 2x the space and the 64b mode thing to pick the right table.
105 // That would speed up 32b prefix decodes.
106
107 #define XED_PREFIX_TABLE_SIZE 8
108 static xed_uint32_t prefix_table[XED_PREFIX_TABLE_SIZE]; // 32B=256b 32*8=2^5*2^3
109
set_prefix_table_bit(xed_uint8_t a)110 static void set_prefix_table_bit(xed_uint8_t a)
111 {
112 xed_uint32_t x = a >> 5;
113 xed_uint32_t y = a & 0x1F;
114 prefix_table[x] |= (1<<y);
115 }
116
get_prefix_table_bit(xed_uint8_t a)117 static XED_INLINE xed_uint_t get_prefix_table_bit(xed_uint8_t a)
118 {
119 // return 1 if the bit is set in the table
120 xed_uint32_t x = a >> 5;
121 xed_uint32_t y = a & 0x1F;
122 return (prefix_table[x] >> y ) & 1;
123 }
124
125 static void init_prefix_table(void);
init_prefix_table(void)126 static void init_prefix_table(void)
127 {
128 int i;
129 static xed_uint8_t legacy_prefixes[] = {
130 0xF0, // lock
131 0x66, // osz
132 0x67, // asz
133
134 0xF2, 0xF3, // rep/repne
135
136 0x2E, 0x3E, // 6 segment prefixes
137 0x26, 0x36,
138 0x64, 0x65,
139
140 0 // sentinel
141 };
142
143 for (i=0;i<XED_PREFIX_TABLE_SIZE;i++)
144 prefix_table[i]=0;
145
146 for (i=0;legacy_prefixes[i];i++)
147 set_prefix_table_bit(legacy_prefixes[i]);
148
149 // add the rex prefixes even for 32b mode
150 for(i=0x40;i<0x50;i++)
151 set_prefix_table_bit(XED_CAST(xed_uint8_t,i));
152 }
153
too_short(xed_decoded_inst_t * d)154 static void XED_NOINLINE too_short(xed_decoded_inst_t* d)
155 {
156 xed3_operand_set_out_of_bytes(d, 1);
157 if ( xed3_operand_get_max_bytes(d) >= XED_MAX_INSTRUCTION_BYTES)
158 xed3_operand_set_error(d,XED_ERROR_INSTR_TOO_LONG);
159 else
160 xed3_operand_set_error(d,XED_ERROR_BUFFER_TOO_SHORT);
161 }
162
bad_map(xed_decoded_inst_t * d)163 static void XED_NOINLINE bad_map(xed_decoded_inst_t* d)
164 {
165 xed3_operand_set_map(d,XED_ILD_MAP_INVALID);
166 xed3_operand_set_error(d,XED_ERROR_BAD_MAP);
167 }
168
169 #if defined(XED_SUPPORTS_AVX512)
bad_v4(xed_decoded_inst_t * d)170 static void XED_NOINLINE bad_v4(xed_decoded_inst_t* d)
171 {
172 xed3_operand_set_error(d,XED_ERROR_BAD_EVEX_V_PRIME);
173 }
bad_z_aaa(xed_decoded_inst_t * d)174 static void XED_NOINLINE bad_z_aaa(xed_decoded_inst_t* d)
175 {
176 xed3_operand_set_error(d,XED_ERROR_BAD_EVEX_Z_NO_MASKING);
177 }
178 #endif
179
prefix_scanner(xed_decoded_inst_t * d)180 static void prefix_scanner(xed_decoded_inst_t* d)
181 {
182 xed_uint8_t max_bytes = xed3_operand_get_max_bytes(d);
183 unsigned char length = xed_decoded_inst_get_length(d);
184 xed_uint8_t nprefixes = 0;
185 xed_uint8_t nseg_prefixes = 0;
186 xed_uint8_t nrexes = 0;
187 unsigned char rex = 0;
188
189 while(length < max_bytes)
190 {
191 xed_uint8_t b = xed_decoded_inst_get_byte(d, length);
192
193 // fast check to see if something might be a prefix
194 // includes REX prefixes in 32b mode
195 if (get_prefix_table_bit(b)==0)
196 goto out;
197
198 switch(b) {
199 case 0x66:
200 xed3_operand_set_osz(d, 1);
201 xed3_operand_set_prefix66(d, 1);
202 /*ignore possible REX prefix encoutered earlier */
203 rex = 0;
204 break;
205
206 case 0x67:
207 xed3_operand_set_asz(d, 1);
208 rex = 0;
209 break;
210
211 /* segment prefixes */
212 case 0x2E:
213 case 0x3E:
214 set_hint(b,d);
215 //INTENTIONAL FALLTHROUGH
216 case 0x26:
217 case 0x36:
218 if (xed3_mode_64b(d)==0)
219 xed3_operand_set_ild_seg(d, b);
220 nseg_prefixes++;
221 /*ignore possible REX prefix encountered earlier */
222 rex = 0;
223
224 break;
225 case 0x64:
226 case 0x65:
227 //for 64b mode we are ignoring non valid segment prefixes
228 //only FS=0x64 and GS=0x64 are valid for 64b mode
229 xed3_operand_set_ild_seg(d, b);
230
231 nseg_prefixes++;
232 /*ignore possible REX prefix encountered earlier */
233 rex = 0;
234 break;
235
236 case 0xF0:
237 xed3_operand_set_lock(d, 1);
238 rex = 0;
239 break;
240
241 case 0xF3:
242 xed3_operand_set_ild_f3(d, 1);
243 xed3_operand_set_last_f2f3(d, 3);
244 if(xed3_operand_get_first_f2f3(d) == 0)
245 xed3_operand_set_first_f2f3(d, 3);
246
247 rex = 0;
248 break;
249
250 case 0xF2:
251 xed3_operand_set_ild_f2(d, 1);
252 xed3_operand_set_last_f2f3(d, 2);
253 if(xed3_operand_get_first_f2f3(d) == 0)
254 xed3_operand_set_first_f2f3(d, 2);
255
256 rex = 0;
257 break;
258
259 default:
260 /*Take care of REX prefix */
261 if (xed3_mode_64b(d) &&
262 (b & 0xf0) == 0x40) {
263 nrexes++;
264 rex = b;
265 }
266 else
267 goto out;
268 }
269 length++;
270 nprefixes++;
271 }
272 out:
273 //set counts
274 xed_decoded_inst_set_length(d, length);
275 xed3_operand_set_nprefixes(d, nprefixes);
276 xed3_operand_set_nseg_prefixes(d, nseg_prefixes);
277 xed3_operand_set_nrexes(d, nrexes);
278
279 //set REX, REXW, etc.
280 if (rex) {
281 xed3_operand_set_rexw(d, (rex>>3) & 1);
282 xed3_operand_set_rexr(d, (rex>>2) & 1);
283 xed3_operand_set_rexx(d, (rex>>1) & 1);
284 xed3_operand_set_rexb(d, (rex) & 1);
285 xed3_operand_set_rex(d, 1);
286 }
287
288 //set REP and REFINING
289 if (xed3_operand_get_mode_first_prefix(d))
290 xed3_operand_set_rep(d, xed3_operand_get_first_f2f3(d));
291 else
292 xed3_operand_set_rep(d, xed3_operand_get_last_f2f3(d));
293
294 //set SEG_OVD
295 /*FIXME: lookup table for seg_ovd ? */
296 /*FIXME: make the grammar use the raw byte value instead of the 1..6
297 * recoding */
298 switch(xed3_operand_get_ild_seg(d)) {
299 case 0x2e:
300 xed3_operand_set_seg_ovd(d, 1);
301 break;
302 case 0x3e:
303 xed3_operand_set_seg_ovd(d, 2);
304 break;
305 case 0x26:
306 xed3_operand_set_seg_ovd(d, 3);
307 break;
308 case 0x64:
309 xed3_operand_set_seg_ovd(d, 4);
310 break;
311 case 0x65:
312 xed3_operand_set_seg_ovd(d, 5);
313 break;
314 case 0x36:
315 xed3_operand_set_seg_ovd(d, 6);
316 break;
317 default:
318 break;
319 }
320
321 //check max bytes
322 if (length >= max_bytes) {
323 /* all available length was taken by prefixes, but we for sure need
324 * at least one additional byte for an opcode, hence we are out of
325 * bytes. */
326 too_short(d);
327 return;
328 }
329 }
330
331 #if defined(XED_AVX) || defined(XED_SUPPORTS_KNC)
332 //VEX_PREFIX use 2 as F2 and 3 as F3 so table is required.
333 static unsigned int vex_prefix_recoding[/*pp*/] = { 0,1,3,2 };
334 #endif
335
336 #if defined(XED_AVX)
337
338 typedef union { // C4 payload 1
339 struct {
340 xed_uint32_t map:5;
341 xed_uint32_t b_inv:1;
342 xed_uint32_t x_inv:1;
343 xed_uint32_t r_inv:1;
344 xed_uint32_t pad:24;
345 } s;
346 xed_uint32_t u32;
347 } xed_avx_c4_payload1_t;
348
349 typedef union { // C4 payload 2
350 struct {
351 xed_uint32_t pp:2;
352 xed_uint32_t l:1;
353 xed_uint32_t vvv210:3;
354 xed_uint32_t v3:1;
355 xed_uint32_t w:1;
356 xed_uint32_t pad:24;
357 } s;
358 xed_uint32_t u32;
359 } xed_avx_c4_payload2_t;
360
361 typedef union { // C5 payload 1
362 struct {
363 xed_uint32_t pp:2;
364 xed_uint32_t l:1;
365 xed_uint32_t vvv210:3;
366 xed_uint32_t v3:1;
367 xed_uint32_t r_inv:1;
368 xed_uint32_t pad:24;
369 } s;
370 xed_uint32_t u32;
371 } xed_avx_c5_payload_t;
372
373 static void evex_vex_opcode_scanner(xed_decoded_inst_t* d); //prototype
374
vex_c4_scanner(xed_decoded_inst_t * d)375 static void vex_c4_scanner(xed_decoded_inst_t* d)
376 {
377 /* assumption: length < max_bytes
378 * This is checked in prefix_scanner.
379 * If any other scanner is added before vex_scanner, this condition
380 * should be preserved.
381 * FIXME: check length < max_bytes here anyway? This will be less
382 * error-prone, but that's an additional non-necessary branch.
383 */
384 xed_uint8_t max_bytes = xed3_operand_get_max_bytes(d);
385 unsigned char length = xed_decoded_inst_get_length(d);
386 if (xed3_mode_64b(d)) {
387 length++; /* eat the c4/c5 */
388 }
389 else if (length+1 < max_bytes) {
390 xed_uint8_t n = xed_decoded_inst_get_byte(d, length+1);
391 /* in 16/32b modes, the MODRM.MOD field MUST be 0b11 */
392 if ((n&0xC0) == 0xC0) {
393 length++; /* eat the c4/c5 */
394 }
395 else {
396 /* A little optimization:
397 * this is not a vex prefix, we can proceed to
398 * next scanner */
399 return;
400 }
401 }
402 else { /* don't have enough bytes to check if it's vex prefix,
403 * we are out of bytes */
404 too_short(d);
405 return ;
406 }
407
408 /* pointing at first payload byte. we want to make sure, that we have
409 * additional 2 bytes available for reading - for 2nd vex c4 payload
410 * byte and opcode */
411 if (length + 2 < max_bytes) {
412 xed_avx_c4_payload1_t c4byte1;
413 xed_avx_c4_payload2_t c4byte2;
414
415 c4byte1.u32 = xed_decoded_inst_get_byte(d, length);
416 c4byte2.u32 = xed_decoded_inst_get_byte(d, length + 1);
417
418 // these 2 are guaranteed to be 1 in 16/32b mode by above check
419 xed3_operand_set_rexr(d, ~c4byte1.s.r_inv&1);
420 xed3_operand_set_rexx(d, ~c4byte1.s.x_inv&1);
421
422 xed3_operand_set_rexb(d, (xed3_mode_64b(d) & ~c4byte1.s.b_inv)&1);
423
424 xed3_operand_set_rexw(d, c4byte2.s.w);
425
426 xed3_operand_set_vexdest3(d, c4byte2.s.v3);
427 xed3_operand_set_vexdest210(d, c4byte2.s.vvv210);
428
429 xed3_operand_set_vl(d, c4byte2.s.l);
430
431 xed3_operand_set_vex_prefix(d, vex_prefix_recoding[c4byte2.s.pp]);
432
433 xed3_operand_set_map(d,c4byte1.s.map);
434
435 // FIXME: 2017-03-03 this masking of the VEX map with 0x3 an attempt
436 // at matching an undocumented implementation convention that can and
437 // most likely will change as architectural map usage evolves.
438 if ((c4byte1.s.map & 0x3) == XED_ILD_MAP3)
439 xed3_operand_set_imm_width(d, bytes2bits(1));
440
441 // this is a success indicator for downstreaam decoding
442 xed3_operand_set_vexvalid(d, 1); // AVX1/2
443
444 length += 2; /* eat the c4 vex 2B payload */
445 xed_decoded_inst_set_length(d, length);
446
447 evex_vex_opcode_scanner(d);
448 return;
449 }
450 else {
451 /* We don't have 3 bytes available for reading, but we for sure
452 * need to read them - for 2 vex payload bytes and opcode byte,
453 * hence we are out of bytes.
454 */
455 xed_decoded_inst_set_length(d, length);
456 too_short(d);
457 return;
458 }
459 }
460
vex_c5_scanner(xed_decoded_inst_t * d)461 static void vex_c5_scanner(xed_decoded_inst_t* d)
462 {
463 /* assumption: length < max_bytes
464 * This is checked in prefix_scanner.
465 * If any other scanner is added before vex_scanner, this condition
466 * should be preserved.
467 * FIXME: check length < max_bytes here anyway? This will be less
468 * error-prone, but that's an additional non-necessary branch.
469 */
470 xed_uint8_t max_bytes = xed3_operand_get_max_bytes(d);
471 unsigned char length = xed_decoded_inst_get_length(d);
472 if (xed3_mode_64b(d))
473 {
474 length++; /* eat the c4/c5 */
475 }
476 else if (length + 1 < max_bytes)
477 {
478 xed_uint8_t n = xed_decoded_inst_get_byte(d, length+1);
479 /* in 16/32b modes, the MODRM.MOD field MUST be 0b11 */
480 if ((n&0xC0) == 0xC0)
481 {
482 length++; /* eat the c4/c5 */
483 }
484 else
485 {
486 /* A little optimization:
487 * this is not a vex prefix, we can proceed to
488 * next scanner */
489 return;
490 }
491 }
492 else
493 { /* don't have enough bytes to check if it's vex prefix,
494 * we are out of bytes */
495 too_short(d);
496 return ;
497 }
498
499
500 /* pointing at vex c5 payload byte. we want to make sure, that we have
501 * additional 2 bytes available for reading - for vex payload byte and
502 * opcode */
503 if (length + 1 < max_bytes) {
504 xed_avx_c5_payload_t c5byte1;
505 c5byte1.u32 = xed_decoded_inst_get_byte(d, length);
506
507 xed3_operand_set_rexr(d, ~c5byte1.s.r_inv&1);
508 xed3_operand_set_vexdest3(d, c5byte1.s.v3);
509 xed3_operand_set_vexdest210(d, c5byte1.s.vvv210);
510
511 xed3_operand_set_vl(d, c5byte1.s.l);
512 xed3_operand_set_vex_prefix(d, vex_prefix_recoding[c5byte1.s.pp]);
513
514 /* MAP is a special case - although it is a derived operand in
515 * newvex_prexix(), we need to set it here, because we use map
516 * later in ILD - for modrm, imm and disp
517 */
518 xed3_operand_set_map(d, XED_ILD_MAP1);
519
520 // this is a success indicator for downstreaam decoding
521 xed3_operand_set_vexvalid(d, 1); // AVX1/2
522
523 length++; /* eat the vex opcode payload */
524 xed_decoded_inst_set_length(d, length);
525
526 evex_vex_opcode_scanner(d); //eats opcode byte
527 return;
528 }
529 else {
530 /* We don't have 2 bytes available for reading, but we need to read
531 * them - for vex payload byte and opcode bytes, hence we are out
532 * of bytes.
533 */
534 xed_decoded_inst_set_length(d, length);
535 too_short(d);
536 return ;
537 }
538 }
539
540
541 #if defined(XED_AMD_ENABLED)
542
get_modrm_reg_field(xed_uint8_t b)543 static XED_INLINE xed_uint_t get_modrm_reg_field(xed_uint8_t b) {
544 return (b & 0x38) >> 3;
545 }
546
xop_scanner(xed_decoded_inst_t * d)547 static void xop_scanner(xed_decoded_inst_t* d)
548 {
549 /* assumption: length < max_bytes
550 * This is checked in prefix_scanner.
551 * If any other scanner is added before vex_scanner, this condition
552 * should be preserved.
553 * FIXME: check length < max_bytes here anyway? This will be less
554 * error-prone, but that's an additional non-necessary branch.
555 */
556
557 /* we don't need to check (d->length < d->max_bytes) because
558 * it was already checked in previous scanner (prefix_scanner).
559 */
560 xed_uint8_t max_bytes = xed3_operand_get_max_bytes(d);
561 unsigned char length = xed_decoded_inst_get_length(d);
562
563 if (length + 1 < max_bytes) {
564 xed_uint8_t n = xed_decoded_inst_get_byte(d, length+1);
565 /* in all modes, the MODRM.REG field MUST NOT be 0b000.
566 mm-rrr-nnn -> mmrr_rnnn
567 */
568
569 if ( get_modrm_reg_field(n) != 0 ) {
570 length++; /* eat the 0x8f */
571 }
572 else {
573 /* A little optimization: this is not an xop prefix, we can
574 * proceed to next scanner */
575 return;
576 }
577 }
578 else {
579 /* don't have enough bytes to check if it's an xop prefix, we
580 * are out of bytes */
581 too_short(d);
582 return ;
583 }
584
585 /* pointing at the first xop payload byte. we want to make sure, that
586 * we have additional 2 bytes available for reading - for 2nd xop payload
587 * byte and opcode */
588 if (length + 2 < max_bytes)
589 {
590 xed_avx_c4_payload1_t xop_byte1;
591 xed_avx_c4_payload2_t xop_byte2;
592 xed_uint8_t map;
593 xop_byte1.u32 = xed_decoded_inst_get_byte(d, length);
594 xop_byte2.u32 = xed_decoded_inst_get_byte(d, length + 1);
595
596 map = xop_byte1.s.map;
597 if (map == 0x9) {
598 xed3_operand_set_map(d,XED_ILD_MAP_XOP9);
599 xed3_operand_set_imm_width(d, 0); //bits
600 }
601 else if (map == 0x8){
602 xed3_operand_set_map(d,XED_ILD_MAP_XOP8);
603 xed3_operand_set_imm_width(d, bytes2bits(1));
604 }
605 else if (map == 0xA){
606 xed3_operand_set_map(d,XED_ILD_MAP_XOPA);
607 xed3_operand_set_imm_width(d, bytes2bits(4));
608 }
609 else
610 bad_map(d);
611
612
613 xed3_operand_set_rexr(d, ~xop_byte1.s.r_inv&1);
614 xed3_operand_set_rexx(d, ~xop_byte1.s.x_inv&1);
615 xed3_operand_set_rexb(d, (xed3_mode_64b(d) & ~xop_byte1.s.b_inv)&1);
616
617 xed3_operand_set_rexw(d, xop_byte2.s.w);
618
619 xed3_operand_set_vexdest3(d, xop_byte2.s.v3);
620 xed3_operand_set_vexdest210(d, xop_byte2.s.vvv210);
621
622 xed3_operand_set_vl(d, xop_byte2.s.l);
623 xed3_operand_set_vex_prefix(d, vex_prefix_recoding[xop_byte2.s.pp]);
624
625 xed3_operand_set_vexvalid(d, 3);
626
627 length += 2; /* eat the 8f xop 2B payload */
628 /* FIXME: too hardcoded? maybe define graph data structure?*/
629 /* using the VEX opcode scanner for xop opcodes too. */
630 xed_decoded_inst_set_length(d, length);
631 evex_vex_opcode_scanner(d);
632 return;
633 }
634 else {
635 /* We don't have 3 bytes available for reading, but we for sure
636 * need to read them - for 2 vex payload bytes and opcode byte,
637 * hence we are out of bytes.
638 */
639 xed_decoded_inst_set_length(d, length);
640 too_short(d);
641 return;
642 }
643 }
644 #endif
645 #endif
646
647 #if defined(XED_AVX)
648
649 # if defined(XED_AMD_ENABLED)
chip_is_intel_specific(xed_decoded_inst_t * d)650 static XED_INLINE xed_uint_t chip_is_intel_specific(xed_decoded_inst_t* d)
651 {
652 xed_chip_enum_t chip = xed_decoded_inst_get_input_chip(d);
653 if (chip == XED_CHIP_INVALID ||
654 chip == XED_CHIP_ALL ||
655 chip == XED_CHIP_AMD)
656 return 0;
657 return 1;
658 }
659 # endif
660
661
vex_scanner(xed_decoded_inst_t * d)662 static void vex_scanner(xed_decoded_inst_t* d)
663 {
664 /* this handles the AVX C4/C5 VEX prefixes and also the AMD XOP 0x8F
665 * prefix */
666 unsigned char length = xed_decoded_inst_get_length(d);
667 xed_uint8_t b = xed_decoded_inst_get_byte(d, length);
668 if (b == 0xC5) {
669 if (!xed3_operand_get_out_of_bytes(d))
670 vex_c5_scanner(d);
671 return;
672 }
673 else if (b == 0xC4) {
674 if (!xed3_operand_get_out_of_bytes(d))
675 vex_c4_scanner(d);
676 return;
677 }
678 #if defined(XED_AMD_ENABLED)
679 else if (b == 0x8f && chip_is_intel_specific(d)==0 ) {
680 if (!xed3_operand_get_out_of_bytes(d))
681 xop_scanner(d);
682 return;
683 }
684 #endif
685 }
686 #endif
687
get_next_as_opcode(xed_decoded_inst_t * d)688 static void get_next_as_opcode(xed_decoded_inst_t* d) {
689 unsigned char length = xed_decoded_inst_get_length(d);
690 if (length < xed3_operand_get_max_bytes(d)) {
691 xed_uint8_t b = xed_decoded_inst_get_byte(d, length);
692 xed3_operand_set_nominal_opcode(d, b);
693 xed_decoded_inst_inc_length(d);
694 //st SRM (partial opcode instructions need it)
695 xed3_operand_set_srm(d, xed_modrm_rm(b));
696 }
697 else {
698 too_short(d);
699 }
700 }
701
702
703 // has_disp_regular[eamode][modrm.mod][modrm.rm]
704 static xed_uint8_t has_disp_regular[3][4][8];
705
init_has_disp_regular_table(void)706 static void init_has_disp_regular_table(void) {
707 xed_uint8_t eamode;
708 xed_uint8_t rm;
709 xed_uint8_t mod;
710
711 for (eamode = 0; eamode <3; eamode++)
712 for (mod=0; mod < 4; mod++)
713 for (rm=0; rm<8; rm++)
714 has_disp_regular[eamode][mod][rm] = 0;
715
716 //fill the eamode16
717 has_disp_regular[0][0][6] = 2;
718 for (rm = 0; rm < 8; rm++) {
719 for (mod = 1; mod <= 2; mod++)
720 has_disp_regular[0][mod][rm] = mod;
721 }
722
723 //fill eamode32/64
724 for(eamode = 1; eamode <= 2; eamode++) {
725 for (rm = 0; rm < 8; rm++) {
726 has_disp_regular[eamode][1][rm] = 1;
727 has_disp_regular[eamode][2][rm] = 4;
728 };
729 has_disp_regular[eamode][0][5] = 4;
730
731 }
732 }
733
734 // eamode_table[asz][mmode]
735 static xed_uint8_t eamode_table[2][XED_GRAMMAR_MODE_64+1];
736
init_eamode_table(void)737 static void init_eamode_table(void) {
738 xed_uint8_t mode;
739 xed_uint8_t asz;
740
741 for (asz=0; asz<2; asz++)
742 for (mode=0; mode<XED_GRAMMAR_MODE_64+1; mode++)
743 eamode_table[asz][mode] = 0;
744
745
746 for (mode = XED_GRAMMAR_MODE_16; mode <= XED_GRAMMAR_MODE_64; mode ++) {
747 eamode_table[0][mode] = mode;
748 }
749
750 eamode_table[1][XED_GRAMMAR_MODE_16] = XED_GRAMMAR_MODE_32;
751 eamode_table[1][XED_GRAMMAR_MODE_32] = XED_GRAMMAR_MODE_16;
752 eamode_table[1][XED_GRAMMAR_MODE_64] = XED_GRAMMAR_MODE_32;
753 }
754
755
756 // has_sib_table[eamode][modrm.mod][modrm.rm]
757 static xed_uint8_t has_sib_table[3][4][8];
758
init_has_sib_table(void)759 static void init_has_sib_table(void) {
760 xed_uint8_t eamode;
761 xed_uint8_t mod;
762 xed_uint8_t rm;
763
764 for (eamode = 0; eamode <3; eamode++)
765 for (mod=0; mod < 4; mod++)
766 for (rm=0; rm<8; rm++)
767 has_sib_table[eamode][mod][rm] = 0;
768
769 //for eamode32/64 there is sib byte for mod!=3 and rm==4
770 for(eamode = 1; eamode <= 2; eamode++) {
771 for (mod = 0; mod <= 2; mod++) {
772 has_sib_table[eamode][mod][4] = 1;
773 }
774 }
775 }
776
777
778 #if defined(XED_SUPPORTS_AVX512)
bad_ll(xed_decoded_inst_t * d)779 static void bad_ll(xed_decoded_inst_t* d) {
780 xed3_operand_set_error(d,XED_ERROR_BAD_EVEX_LL);
781 }
782
783
bad_ll_check(xed_decoded_inst_t * d)784 static void bad_ll_check(xed_decoded_inst_t* d)
785 {
786 if (xed3_operand_get_llrc(d) == 3) {
787 // we have a potentially bad EVEX.LL field.
788 if (xed3_operand_get_mod(d) != 3) // memop
789 bad_ll(d);
790 else if (xed3_operand_get_bcrc(d)==0) // reg-reg but not rounding
791 bad_ll(d);
792 }
793 }
794 #endif
795
modrm_scanner(xed_decoded_inst_t * d)796 static void modrm_scanner(xed_decoded_inst_t* d)
797 {
798 xed_uint8_t b;
799 xed_uint8_t has_modrm;
800 set_has_modrm(d);
801
802 has_modrm = xed3_operand_get_has_modrm(d);
803
804 if (has_modrm) {
805 unsigned char length = xed_decoded_inst_get_length(d);
806 if (length < xed3_operand_get_max_bytes(d)) {
807 xed_uint8_t eamode;
808 xed_uint8_t mod;
809 xed_uint8_t rm;
810
811 b = xed_decoded_inst_get_byte(d, length);
812 xed3_operand_set_modrm_byte(d, b);
813 xed3_operand_set_pos_modrm(d, length);
814 xed_decoded_inst_inc_length(d); /* eat modrm */
815
816 mod = xed_modrm_mod(b);
817 rm = xed_modrm_rm(b);
818 xed3_operand_set_mod(d, mod);
819 xed3_operand_set_rm(d, rm);
820 xed3_operand_set_reg(d, xed_modrm_reg(b));
821
822 #if defined(XED_SUPPORTS_AVX512)
823 bad_ll_check(d);
824 #endif
825 /*This checks that we are not in MOV_DR or MOV_CR instructions
826 that ignore MODRM.MOD bits and don't have DISP and SIB */
827 if (has_modrm != XED_ILD_HASMODRM_IGNORE_MOD) {
828 xed_uint8_t asz = xed3_operand_get_asz(d);
829 xed_uint8_t mode = xed3_operand_get_mode(d);
830 // KW complains here but it is stupid. Code is fine.
831 eamode = eamode_table[asz][mode];
832
833 xed_assert(eamode <= 2);
834
835 /* opcode scanner (and prefix scanner) doesn't set
836 disp_bytes, hence we can set it to 0 without worrying about
837 overriding a value that was set earlier. */
838
839 xed3_operand_set_disp_width(
840 d,
841 bytes2bits(has_disp_regular[eamode][mod][rm]));
842
843 /*same with sib, we will not override data set earlier here*/
844 xed3_operand_set_has_sib(d, has_sib_table[eamode][mod][rm]);
845
846
847 }
848 return;
849 }
850 else {
851 /*need modrm, but length >= max_bytes, and we are out of bytes*/
852 too_short(d);
853 return;
854 }
855
856 }
857 /*no modrm, set RM from nominal_opcode*/
858 //rm = xed_modrm_rm(xed3_operand_get_nominal_opcode(d));
859 //xed3_operand_set_rm(d, rm);
860
861 /* a little optimization: we don't have modrm and hence don't have sib.
862 * Hence we don't need to call sib scanner and can go straight to disp*/
863 /*FIXME: Better to call next scanner anyway for better modularity?*/
864 }
865
sib_scanner(xed_decoded_inst_t * d)866 static void sib_scanner(xed_decoded_inst_t* d)
867 {
868
869 if (xed3_operand_get_has_sib(d)) {
870 unsigned char length = xed_decoded_inst_get_length(d);
871 if (length < xed3_operand_get_max_bytes(d)) {
872 xed_uint8_t b;
873 b = xed_decoded_inst_get_byte(d, length);
874
875 xed3_operand_set_pos_sib(d, length);
876 xed3_operand_set_sibscale(d, xed_sib_scale(b));
877 xed3_operand_set_sibindex(d, xed_sib_index(b));
878 xed3_operand_set_sibbase(d, xed_sib_base(b));
879
880 xed_decoded_inst_inc_length(d); /* eat sib */
881
882 if (xed_sib_base(b) == 5) {
883 /* other mod values are set by modrm processing */
884 if (xed3_operand_get_mod(d) == 0)
885 xed3_operand_set_disp_width(d, bytes2bits(4));
886 }
887 }
888 else { /*has_sib but not enough length -> out of bytes */
889 too_short(d);
890 return;
891 }
892 }
893 }
894
895
896
897 /*probably this table should be generated. Leaving it here for now.
898 Maybe in one of the following commits it will be moved to auto generated
899 code.*/
900 const xed_ild_l1_func_t* disp_bits_2d[XED_ILD_MAP2] = {
901 disp_width_map_0x0,
902 disp_width_map_0x0F
903 };
904
disp_scanner(xed_decoded_inst_t * d)905 static void disp_scanner(xed_decoded_inst_t* d)
906 {
907 /* 0 1 2 3 4 5 6 7 8 */
908 static const xed_uint8_t ilog2[] = { 99 , 0, 1, 99, 2, 99, 99, 99, 3 };
909
910 xed_ild_map_enum_t map = (xed_ild_map_enum_t)xed3_operand_get_map(d);
911 xed_uint8_t opcode = xed3_operand_get_nominal_opcode(d);
912 xed_uint8_t disp_bytes;
913 xed_uint8_t length = xed_decoded_inst_get_length(d);
914 /*Checked dumped tables of maps 2 ,3 and 3dnow:
915 they all have standard displacement resolution, we are not going
916 to use their lookup tables*/
917 if (map < XED_ILD_MAP2) {
918 /*get the L1 function pointer and use it */
919 xed_ild_l1_func_t fptr = disp_bits_2d[map][opcode];
920 /*most map-opcodes have disp_bytes set in modrm/sib scanners
921 for those we have L1 functions that do nothing*/
922 if (fptr == 0){
923 xed3_operand_set_error(d,XED_ERROR_GENERAL_ERROR);
924 return;
925 }
926 (*fptr)(d);
927 }
928 /*All other maps should have been set earlier*/
929 disp_bytes = bits2bytes(xed3_operand_get_disp_width(d));
930 if (disp_bytes) {
931 xed_uint8_t max_bytes = xed3_operand_get_max_bytes(d);
932 if ((length + disp_bytes) <= max_bytes) {
933
934 //set disp value
935 const xed_uint8_t* itext = d->_byte_array._dec;
936 xed_uint8_t* disp_ptr = (xed_uint8_t*)(itext + length);
937
938 // sign extend the displacement to 64b while passing to accessor
939
940 switch(ilog2[disp_bytes]) {
941 case 0: { // 1B=8b. ilog2(1) = 0
942 xed_int8_t byte = *(xed_int8_t*)disp_ptr;
943 xed3_operand_set_disp(d, byte);
944 break;
945 }
946 case 1: { // 2B=16b ilog2(2) = 1
947 xed_int16_t word = *(xed_int16_t*)disp_ptr;
948 xed3_operand_set_disp(d, word);
949 break;
950 }
951 case 2: { // 4B=32b ilog2(4) = 2
952 xed_int32_t dword = *(xed_int32_t*)disp_ptr;
953 xed3_operand_set_disp(d, dword);
954 break;
955 }
956 case 3: {// 8B=64b ilog2(8) = 3
957 xed_int64_t qword = *(xed_int64_t*)disp_ptr;
958 xed3_operand_set_disp(d, qword);
959 break;
960 }
961 default:
962 xed_assert(0);
963 }
964
965 xed3_operand_set_pos_disp(d, length);
966 xed_decoded_inst_set_length(d, length + disp_bytes);
967 }
968 else {
969 too_short(d);
970 return;
971 }
972 }
973 }
974
975
976
977
978 #if defined(XED_EXTENDED)
979 # include "xed-ild-extension.h"
980 #endif
981
982 /*probably this table should be generated. Leaving it here for now.
983 Maybe in one of the following commits it will be moved to auto generated
984 code.*/
985 const xed_uint8_t* has_modrm_2d[XED_ILD_MAP2] = {
986 has_modrm_map_0x0,
987 has_modrm_map_0x0F
988 };
989
set_has_modrm(xed_decoded_inst_t * d)990 static void set_has_modrm(xed_decoded_inst_t* d) {
991 /* This assumes that the lookup arrays do not have undefined opcodes.
992 It means we must fill has_modrm property for illegal opcodes at
993 build time in (ild.py) */
994 /*some 3dnow instructions conflict on has_modrm property with other
995 instructions. However all 3dnow instructions have modrm, hence we
996 just set has_modrm to 1 in case of 3dnow (map==XED_ILD_MAPAMD) */
997 xed_ild_map_enum_t map = (xed_ild_map_enum_t)xed3_operand_get_map(d);
998 xed_uint8_t opcode = xed3_operand_get_nominal_opcode(d);
999 xed3_operand_set_has_modrm(d,1);
1000 if (map < XED_ILD_MAP2) {
1001 // need to set more complex codes like XED_ILD_HASMODRM_IGNORE_MOD
1002 // from the has_modrm_2d[][] tables.
1003 xed3_operand_set_has_modrm(d,has_modrm_2d[map][opcode]);
1004 }
1005 }
1006
1007
1008
1009 /*probably this table should be generated. Leaving it here for now.
1010 Maybe in one of the following commits it will be moved to auto generated
1011 code.*/
1012 const xed_ild_l1_func_t* imm_bits_2d[XED_ILD_MAP2] = {
1013 imm_width_map_0x0,
1014 imm_width_map_0x0F
1015 };
1016
set_imm_bytes(xed_decoded_inst_t * d)1017 static void set_imm_bytes(xed_decoded_inst_t* d) {
1018 xed_ild_map_enum_t map = (xed_ild_map_enum_t)xed3_operand_get_map(d);
1019 xed_uint8_t opcode = xed3_operand_get_nominal_opcode(d);
1020 xed_uint8_t imm_bits = xed3_operand_get_imm_width(d);
1021 /* FIXME: not taking care of illegal map-opcodes yet.
1022 Probably should fill them in ild_storage.py
1023 Now illegal map-opcodes have 0 as function pointer in lookup tables*/
1024 if (!imm_bits) {
1025 if (map < XED_ILD_MAP2) {
1026 /*get the L1 function pointer and use it */
1027 xed_ild_l1_func_t fptr = imm_bits_2d[map][opcode];
1028 if (fptr == 0){
1029 xed3_operand_set_error(d,XED_ERROR_GENERAL_ERROR);
1030 return;
1031 }
1032 (*fptr)(d);
1033 return;
1034 }
1035 /*All other maps should have been set earlier*/
1036 }
1037 }
1038
1039 ////////////////////////////////////////////////////////////////////////////////
1040
1041 #if !defined(XED_SUPPORTS_AVX512) && !defined(XED_SUPPORTS_KNC)
imm_scanner(xed_decoded_inst_t * d)1042 static void imm_scanner(xed_decoded_inst_t* d)
1043 {
1044 xed_uint8_t imm_bytes;
1045 xed_uint8_t imm1_bytes;
1046 xed_uint8_t max_bytes = xed3_operand_get_max_bytes(d);
1047 unsigned char length = xed_decoded_inst_get_length(d);
1048 unsigned int pos_imm = 0;
1049 const xed_uint8_t* itext = d->_byte_array._dec;
1050 const xed_uint8_t* imm_ptr = 0;
1051
1052 set_imm_bytes(d);
1053 #if defined(XED_AMD_ENABLED)
1054 if (xed3_operand_get_amd3dnow(d)) {
1055 if (length < max_bytes) {
1056 /*opcode is in immediate*/
1057 xed3_operand_set_nominal_opcode(d,
1058 xed_decoded_inst_get_byte(d, length));
1059 /*count the pseudo immediate byte, which is opcode*/
1060 xed_decoded_inst_inc_length(d);
1061 /*imm_bytes == imm_bytes1 == 0 for amd3dnow */
1062 return;
1063 }
1064 else {
1065 too_short(d);
1066 return;
1067 }
1068 }
1069 #endif
1070
1071 imm_bytes = bits2bytes(xed3_operand_get_imm_width(d));
1072 imm1_bytes = xed3_operand_get_imm1_bytes(d);
1073
1074 if (imm_bytes) {
1075 if (length + imm_bytes <= max_bytes) {
1076 xed3_operand_set_pos_imm(d, length);
1077 /* eat imm */
1078 length += imm_bytes;
1079 xed_decoded_inst_set_length(d, length);
1080
1081 if (imm1_bytes) {
1082 if (length + imm1_bytes <= max_bytes) {
1083 xed3_operand_set_pos_imm1(d, length);
1084 imm_ptr = itext + length;
1085 length += imm1_bytes; /* eat imm1 */
1086 xed_decoded_inst_set_length(d, length);
1087 //set uimm1 value
1088 xed3_operand_set_uimm1(d, *imm_ptr);
1089 }
1090 else {/* Ugly code */
1091 too_short(d);
1092 return;
1093 }
1094 }
1095 }
1096 else {
1097 too_short(d);
1098 return;
1099 }
1100 }
1101
1102 /* FIXME: setting UIMM chunks. This can be done better,
1103 * for example special capturing function in ILD, like for imm_bytes*/
1104 pos_imm = xed3_operand_get_pos_imm(d);
1105 imm_ptr = itext + pos_imm;
1106 switch(imm_bytes){
1107 case 0:
1108 break;
1109 case 1: {
1110 xed_uint8_t uimm0 = *(xed_uint8_t*)(imm_ptr);
1111 xed3_operand_set_uimm0(d, uimm0);
1112
1113 //for SE_IMM8() we need to set here the esrc as well
1114 xed3_operand_set_esrc(d,uimm0 >> 4);
1115 break;
1116 }
1117 case 2:{
1118 xed_uint16_t uimm0 = *(xed_uint16_t*)(imm_ptr);
1119 xed3_operand_set_uimm0(d, uimm0);
1120 break;
1121 }
1122 case 4:{
1123 xed_uint32_t uimm0 = *(xed_uint32_t*)(imm_ptr);
1124 xed3_operand_set_uimm0(d, uimm0);
1125 break;
1126 }
1127 case 8:{
1128 xed_uint64_t uimm0 = *(xed_uint64_t*)(imm_ptr);
1129 xed3_operand_set_uimm0(d, uimm0);
1130 break;
1131 }
1132 default:
1133 /*Unexpected immediate width, this should never happen*/
1134 xed_assert(0);
1135 }
1136
1137 /* uimm1 is set earlier */
1138 }
1139 #endif // !defined(XED_SUPPORTS_AVX512) && !defined(XED_SUPPORTS_KNC)
1140 ////////////////////////////////////////////////////////////////////////////////
1141
1142 #if defined(XED_AVX)
catch_invalid_rex_or_legacy_prefixes(xed_decoded_inst_t * d)1143 static void catch_invalid_rex_or_legacy_prefixes(xed_decoded_inst_t* d)
1144 {
1145 // REX, F2, F3, 66 are not allowed before VEX or EVEX prefixes
1146 if ( xed3_mode_64b(d) && xed3_operand_get_rex(d) )
1147 xed3_operand_set_error(d,XED_ERROR_BAD_REX_PREFIX);
1148 else if ( xed3_operand_get_osz(d) ||
1149 xed3_operand_get_ild_f3(d) ||
1150 xed3_operand_get_ild_f2(d) )
1151 xed3_operand_set_error(d,XED_ERROR_BAD_LEGACY_PREFIX);
1152 }
catch_invalid_mode(xed_decoded_inst_t * d)1153 static void catch_invalid_mode(xed_decoded_inst_t* d)
1154 {
1155 // we know we have VEX or EVEX instr.
1156 if(xed3_operand_get_realmode(d)) {
1157 xed3_operand_set_error(d,XED_ERROR_INVALID_MODE);
1158 }
1159 }
1160
evex_vex_opcode_scanner(xed_decoded_inst_t * d)1161 static void evex_vex_opcode_scanner(xed_decoded_inst_t* d)
1162 {
1163 /* no need to check max_bytes here, it was checked in previous
1164 scanner */
1165 unsigned char length = xed_decoded_inst_get_length(d);
1166 xed_uint8_t b = xed_decoded_inst_get_byte(d, length);
1167 xed3_operand_set_nominal_opcode(d, b);
1168 xed3_operand_set_pos_nominal_opcode(d, length);
1169 xed_decoded_inst_inc_length(d);
1170 catch_invalid_rex_or_legacy_prefixes(d);
1171 catch_invalid_mode(d);
1172 }
1173 #endif
1174
opcode_scanner(xed_decoded_inst_t * d)1175 static void opcode_scanner(xed_decoded_inst_t* d)
1176 {
1177 unsigned char length = xed_decoded_inst_get_length(d);
1178 xed_uint8_t b = xed_decoded_inst_get_byte(d, length);
1179 xed_uint8_t opcode = 0;
1180
1181 /*no need to check max_bytes - it was checked in previous scanners*/
1182
1183 /* no need to check for VEX here anymore, because if VEX
1184 prefix was encountered, we would get to evex_vex_opcode_scanner */
1185 if (b != 0x0F) {
1186 xed3_operand_set_map(d, XED_ILD_MAP0);
1187 xed3_operand_set_nominal_opcode(d, b);
1188 xed3_operand_set_pos_nominal_opcode(d, length);
1189 xed_decoded_inst_inc_length(d);
1190 goto out;
1191 }
1192
1193 length++; /* eat the 0x0F */
1194 xed3_operand_set_pos_nominal_opcode(d, length);
1195
1196 /* 0x0F opcodes MAPS 1,2,3 */
1197 //FIXME: finish here
1198 if (length < xed3_operand_get_max_bytes(d)) {
1199 xed_uint8_t m = xed_decoded_inst_get_byte(d, length);
1200 if (m == 0x38) {
1201 length++; /* eat the 0x38 */
1202 xed3_operand_set_map(d, XED_ILD_MAP2);
1203 xed_decoded_inst_set_length(d, length);
1204 get_next_as_opcode( d);
1205 return;
1206 }
1207 else if (m == 0x3A) {
1208 length++; /* eat the 0x3A */
1209 xed3_operand_set_map(d, XED_ILD_MAP3);
1210 xed_decoded_inst_set_length(d, length);
1211 xed3_operand_set_imm_width(d, bytes2bits(1));
1212 get_next_as_opcode( d);
1213 return;
1214 }
1215 else if (m == 0x3B) {
1216 length++; /* eat the 0x3B */
1217 bad_map(d);
1218 xed_decoded_inst_set_length(d, length);
1219 get_next_as_opcode( d);
1220 return;
1221 //FIXME: TNI maps have no modrm, imm, disp ??
1222 /* BTW we use MAP as index to static decoding lookup tables..
1223 * with INVALID_MAP we will have segv there, need to check
1224 * for it after ILD phase.
1225 * Maybe set some common ILD_INVALID member to indicate that
1226 * there is no need to do static decoding?
1227 */
1228 }
1229 else if (m > 0x38 && m <= 0x3F) {
1230 length++; /* eat the 0x39...0x3F (minus 3A and 3B) */
1231 bad_map(d);
1232
1233 xed_decoded_inst_set_length(d, length);
1234 get_next_as_opcode( d);
1235 return; //FIXME: TNI maps have no modrm, imm, disp ??
1236 /* BTW we use MAP as index static decoding lookup tables..
1237 * with INVALID_MAP we will have segv there, need to check
1238 * for it after ILD phase */
1239 }
1240 #if defined(XED_AMD_ENABLED)
1241 else if (m == 0x0F) {
1242 xed3_operand_set_amd3dnow(d, 1);
1243 /* opcode is in immediate later on */
1244 length++; /*eat the second 0F */
1245 xed3_operand_set_nominal_opcode(d, 0x0F);
1246 /*special map for amd3dnow */
1247 xed3_operand_set_map(d, XED_ILD_MAPAMD);
1248 xed_decoded_inst_set_length(d, length);
1249 }
1250 #endif
1251 else {
1252 length++; /* eat the 2nd opcode byte */
1253 xed3_operand_set_nominal_opcode(d, m);
1254 xed3_operand_set_map(d, XED_ILD_MAP1);
1255 xed_decoded_inst_set_length(d, length);
1256 }
1257 }
1258 else{
1259 too_short(d);
1260 return;
1261 }
1262
1263 out:
1264 //set SRM (partial opcode instructions need it)
1265 opcode = xed3_operand_get_nominal_opcode(d);
1266 xed3_operand_set_srm(d, xed_modrm_rm(opcode));
1267 }
1268
1269 //////////////////////////////////////////////////////////////////////////
1270 // KNC/AVX512 EVEX and EVEX-IMM8 scanners
1271
1272
1273
1274 #if defined(XED_SUPPORTS_AVX512) || defined(XED_SUPPORTS_KNC)
1275
1276 typedef union { // Common KNC & AVX512
1277 struct {
1278 xed_uint32_t map:4;
1279 xed_uint32_t rr_inv:1;
1280 xed_uint32_t b_inv:1;
1281 xed_uint32_t x_inv:1;
1282 xed_uint32_t r_inv:1;
1283 xed_uint32_t pad:24;
1284 } s;
1285 xed_uint32_t u32;
1286 } xed_avx512_payload1_t;
1287
1288 typedef union { // Common KNC & AVX512
1289 struct {
1290 xed_uint32_t pp:2;
1291 xed_uint32_t ubit:1;
1292 xed_uint32_t vexdest210:3;
1293 xed_uint32_t vexdest3:1;
1294 xed_uint32_t rexw:1;
1295 xed_uint32_t pad:24;
1296 } s;
1297 xed_uint32_t u32;
1298 } xed_avx512_payload2_t;
1299
1300 typedef union{ // KNC only
1301 struct {
1302 xed_uint32_t mask:3;
1303 xed_uint32_t vexdest4p:1;
1304 xed_uint32_t swiz:3;
1305 xed_uint32_t nr:1;
1306 xed_uint32_t pad:24;
1307 } s;
1308 xed_uint32_t u32;
1309 } xed_knc_payload3_t;
1310
1311
1312 typedef union{ // AVX512 only
1313 struct {
1314 xed_uint32_t mask:3;
1315 xed_uint32_t vexdest4p:1;
1316 xed_uint32_t bcrc:1;
1317 xed_uint32_t llrc:2;
1318 xed_uint32_t z:1;
1319 xed_uint32_t pad:24;
1320 } s;
1321 xed_uint32_t u32;
1322 } xed_avx512_payload3_t;
1323
1324
evex_scanner(xed_decoded_inst_t * d)1325 static void evex_scanner(xed_decoded_inst_t* d)
1326 {
1327 /* assumption: length < max_bytes
1328 * This is checked in prefix_scanner.
1329 * If any other scanner is added before evex_scanner, this condition
1330 * should be preserved.
1331 * FIXME: check length < max_bytes here anyway? This will be less
1332 * error-prone, but that's an additional non-necessary branch.
1333 */
1334 xed_uint8_t max_bytes = xed3_operand_get_max_bytes(d);
1335 unsigned char length = xed_decoded_inst_get_length(d);
1336 xed_uint8_t b = xed_decoded_inst_get_byte(d, length);
1337
1338 if (b == 0x62)
1339 {
1340 /*first check that it is not a BOUND instruction */
1341 if(!xed3_mode_64b(d)) {
1342 /*make sure we can read one additional byte */
1343 if (length + 1 < max_bytes) {
1344 xed_uint8_t n = xed_decoded_inst_get_byte(d, length+1);
1345 if ((n&0xC0) != 0xC0) {
1346 /*this is a BOUND instruction */
1347 /* FIXME: could have set opcode here and call
1348 * modrm_scanner but that would be a code
1349 * duplication */
1350 return;
1351 }
1352 }
1353 else {
1354 too_short(d);
1355 return;
1356 }
1357 }
1358 /*Unlike the vex and xop prefix scanners, here length is pointing
1359 at the evex prefix byte. We want to ensure that we have enough
1360 bytes available to read 4 bytes for evex prefix and 1 byte for an
1361 opcode */
1362 if (length + 4 < max_bytes) {
1363 xed_avx512_payload1_t evex1;
1364 xed_avx512_payload2_t evex2;
1365
1366 evex1.u32 = xed_decoded_inst_get_byte(d, length+1);
1367 evex2.u32 = xed_decoded_inst_get_byte(d, length+2);
1368
1369 // above check guarantees that r and x are 1 in 16/32b mode.
1370 if (xed3_mode_64b(d)) {
1371 xed3_operand_set_rexr(d, ~evex1.s.r_inv&1);
1372 xed3_operand_set_rexx(d, ~evex1.s.x_inv&1);
1373 xed3_operand_set_rexb(d, ~evex1.s.b_inv&1);
1374 xed3_operand_set_rexrr(d, ~evex1.s.rr_inv&1);
1375 }
1376
1377 xed3_operand_set_map(d, evex1.s.map);
1378
1379 xed3_operand_set_rexw(d, evex2.s.rexw);
1380 xed3_operand_set_vexdest3(d, evex2.s.vexdest3);
1381 xed3_operand_set_vexdest210(d, evex2.s.vexdest210);
1382 xed3_operand_set_ubit(d, evex2.s.ubit);
1383 if (evex2.s.ubit)
1384 xed3_operand_set_vexvalid(d, 2); // AVX512 EVEX U=1 req'd
1385 else
1386 {
1387 #if defined(XED_SUPPORTS_KNC)
1388 xed3_operand_set_vexvalid(d, 4); // KNC EVEX U=0 req'd
1389 #else
1390 xed3_operand_set_error(d,XED_ERROR_BAD_EVEX_UBIT);
1391 #endif
1392 }
1393
1394 xed3_operand_set_vex_prefix(d,vex_prefix_recoding[evex2.s.pp]);
1395
1396 if (evex1.s.map == XED_ILD_MAP3)
1397 xed3_operand_set_imm_width(d, bytes2bits(1));
1398
1399 if (evex2.s.ubit) // AVX512 only (Not KNC)
1400 {
1401 #if defined(XED_SUPPORTS_AVX512)
1402 xed_avx512_payload3_t evex3;
1403 evex3.u32 = xed_decoded_inst_get_byte(d, length+3);
1404
1405 xed3_operand_set_zeroing(d, evex3.s.z);
1406
1407 // llrc is still required for rounding fixup much later
1408 // during decode.
1409 xed3_operand_set_llrc(d, evex3.s.llrc);
1410
1411 xed3_operand_set_vl(d, evex3.s.llrc);
1412 xed3_operand_set_bcrc(d, evex3.s.bcrc);
1413 xed3_operand_set_vexdest4(d, ~evex3.s.vexdest4p&1);
1414 if (!xed3_mode_64b(d) && evex3.s.vexdest4p==0)
1415 bad_v4(d);
1416
1417 xed3_operand_set_mask(d, evex3.s.mask);
1418 if (evex3.s.mask == 0 && evex3.s.z == 1)
1419 bad_z_aaa(d);
1420 #endif
1421 }
1422 #if defined(XED_SUPPORTS_KNC)
1423 else // KNC
1424 {
1425 const xed_uint_t vl_512=2;
1426 xed_knc_payload3_t evex3;
1427 evex3.u32 = xed_decoded_inst_get_byte(d, length+3);
1428 xed3_operand_set_vl(d, vl_512); //Indicates vector length 512b
1429
1430 xed3_operand_set_nr(d, evex3.s.nr);
1431 xed3_operand_set_swiz(d, evex3.s.swiz);
1432 xed3_operand_set_vexdest4(d, ~evex3.s.vexdest4p&1);
1433 xed3_operand_set_mask(d, evex3.s.mask);
1434 }
1435 #endif
1436
1437 length += 4;
1438 xed_decoded_inst_set_length(d, length);
1439 /* vex opcode scanner fits for evex instructions too: it just reads
1440 * one byte as nominal opcode, this is exactly what we want for
1441 * evex*/
1442 evex_vex_opcode_scanner(d);
1443 }
1444 else {
1445 /*there is no enough bytes, hence we are out of bytes */
1446 too_short(d);
1447 }
1448 }
1449 }
1450
evex_imm_scanner(xed_decoded_inst_t * d)1451 static void evex_imm_scanner(xed_decoded_inst_t* d)
1452 {
1453 xed_uint8_t imm_bytes;
1454 xed_uint8_t imm1_bytes;
1455 xed_uint8_t max_bytes = xed3_operand_get_max_bytes(d);
1456 unsigned char length = xed_decoded_inst_get_length(d);
1457 unsigned int pos_imm = 0;
1458 const xed_uint8_t* itext = d->_byte_array._dec;
1459 const xed_uint8_t* imm_ptr = 0;
1460
1461 set_imm_bytes(d);
1462
1463 #if defined(XED_AMD_ENABLED)
1464 if (xed3_operand_get_amd3dnow(d)) {
1465 if (length < max_bytes) {
1466 /*opcode is in immediate*/
1467 xed3_operand_set_nominal_opcode(d,
1468 xed_decoded_inst_get_byte(d, length));
1469 /*count the pseudo immediate byte, which is opcode*/
1470 xed_decoded_inst_inc_length(d);
1471 /*imm_bytes == imm_bytes1 == 0 for amd3dnow */
1472 return;
1473 }
1474 else {
1475 too_short(d);
1476 return;
1477 }
1478 }
1479 #endif
1480
1481 imm_bytes = bits2bytes(xed3_operand_get_imm_width(d));
1482 imm1_bytes = xed3_operand_get_imm1_bytes(d);
1483
1484 if (imm_bytes) {
1485 if (length + imm_bytes <= max_bytes) {
1486 xed3_operand_set_pos_imm(d, length);
1487 /* eat imm */
1488 length += imm_bytes;
1489 xed_decoded_inst_set_length(d, length);
1490
1491 if (imm1_bytes) {
1492 if (length + imm1_bytes <= max_bytes) {
1493 xed3_operand_set_pos_imm1(d, length);
1494 imm_ptr = itext + length;
1495 length += imm1_bytes; /* eat imm1 */
1496 xed_decoded_inst_set_length(d, length);
1497 //set uimm1 value
1498 xed3_operand_set_uimm1(d, *imm_ptr);
1499 }
1500 else {/* Ugly code */
1501 too_short(d);
1502 return;
1503 }
1504 }
1505 }
1506 else {
1507 too_short(d);
1508 return;
1509 }
1510 }
1511
1512 /* FIXME: setting UIMM chunks. This can be done better,
1513 * for example special capturing function in ILD, like for imm_bytes*/
1514 pos_imm = xed3_operand_get_pos_imm(d);
1515 imm_ptr = itext + pos_imm;
1516 switch(imm_bytes)
1517 {
1518 case 0:
1519 break;
1520 case 1:
1521 {
1522 xed_uint8_t uimm0 = *imm_ptr;
1523 xed_uint8_t esrc = uimm0 >> 4;
1524
1525 xed3_operand_set_uimm0(d, uimm0);
1526 xed3_operand_set_esrc(d, esrc);
1527 break;
1528 }
1529 case 2:
1530 xed3_operand_set_uimm0(d, *(xed_uint16_t*)imm_ptr);
1531 break;
1532 case 4:
1533 xed3_operand_set_uimm0(d, *(xed_uint32_t*)imm_ptr);
1534 break;
1535 case 8:
1536 xed3_operand_set_uimm0(d, *(xed_uint64_t*)imm_ptr);
1537 break;
1538 default:
1539 /*Unexpected immediate width, this should never happen*/
1540 xed_assert(0);
1541 }
1542
1543 /* uimm1 is set earlier */
1544 }
1545
1546 #endif // defined(XED_SUPPORTS_AVX512)
1547
1548 ////////////////////////////////////////////////////////////////////////////////
1549
xed_ild_lookup_init(void)1550 void xed_ild_lookup_init(void) {
1551 xed_ild_eosz_init();
1552 xed_ild_easz_init();
1553
1554 xed_ild_imm_l3_init();
1555 xed_ild_disp_l3_init();
1556
1557 init_has_disp_regular_table();
1558 init_eamode_table();
1559 init_has_sib_table();
1560
1561 }
1562
xed_ild_init(void)1563 void xed_ild_init(void) {
1564 init_prefix_table();
1565 xed_ild_lookup_init();
1566 }
1567
1568
1569
1570
1571 void
xed_instruction_length_decode(xed_decoded_inst_t * ild)1572 xed_instruction_length_decode(xed_decoded_inst_t* ild)
1573 {
1574 prefix_scanner(ild);
1575 #if defined(XED_AVX)
1576 if (xed3_operand_get_out_of_bytes(ild))
1577 return;
1578 vex_scanner(ild);
1579 #endif
1580 #if defined(XED_SUPPORTS_AVX512) || defined(XED_SUPPORTS_KNC)
1581 // if we got a vex prefix (which also sucks down the opcode),
1582 // then we do not need to scan for evex prefixes.
1583 if (!xed3_operand_get_vexvalid(ild)) {
1584 if (xed3_operand_get_out_of_bytes(ild))
1585 return;
1586 evex_scanner(ild);
1587 }
1588 #endif
1589
1590 if (xed3_operand_get_out_of_bytes(ild))
1591 return;
1592 #if defined(XED_AVX)
1593 // vex/xop prefixes also eat the vex/xop opcode
1594 if (!xed3_operand_get_vexvalid(ild))
1595 opcode_scanner(ild);
1596 #else
1597 opcode_scanner(ild);
1598 #endif
1599 modrm_scanner(ild);
1600 sib_scanner(ild);
1601 disp_scanner(ild);
1602 #if defined(XED_SUPPORTS_AVX512) || defined(XED_SUPPORTS_KNC)
1603 evex_imm_scanner(ild);
1604 #else
1605 imm_scanner(ild);
1606 #endif
1607 }
1608
1609 #include "xed-chip-modes.h"
1610
1611 /// This is the second main entry point for the decoder
1612 /// used for new xed3 decoding.
1613 XED_DLL_EXPORT xed_error_enum_t
xed_ild_decode(xed_decoded_inst_t * xedd,const xed_uint8_t * itext,const unsigned int bytes)1614 xed_ild_decode(xed_decoded_inst_t* xedd,
1615 const xed_uint8_t* itext,
1616 const unsigned int bytes)
1617 {
1618 xed_uint_t tbytes;
1619 xed_chip_enum_t chip = xed_decoded_inst_get_input_chip(xedd);
1620
1621 set_chip_modes(xedd,chip,0); //FIXME: add support for cpuid features
1622
1623 xedd->_byte_array._dec = itext;
1624
1625 tbytes = bytes;
1626 if (bytes > XED_MAX_INSTRUCTION_BYTES)
1627 tbytes = XED_MAX_INSTRUCTION_BYTES;
1628 xed3_operand_set_max_bytes(xedd, tbytes);
1629 xed_instruction_length_decode(xedd);
1630
1631 if (xed3_operand_get_out_of_bytes(xedd))
1632 return XED_ERROR_BUFFER_TOO_SHORT;
1633 if (xed3_operand_get_map(xedd) == XED_ILD_MAP_INVALID)
1634 return XED_ERROR_GENERAL_ERROR;
1635
1636 return XED_ERROR_NONE;
1637 }
1638
1639
1640 // xed-ild-private.h
1641 xed_bits_t
xed_ild_cvt_mode(xed_machine_mode_enum_t mmode)1642 xed_ild_cvt_mode(xed_machine_mode_enum_t mmode) {
1643
1644 xed_bits_t result = 0;
1645 switch(mmode)
1646 {
1647 case XED_MACHINE_MODE_LONG_64:
1648 result = XED_GRAMMAR_MODE_64;
1649
1650 break;
1651 case XED_MACHINE_MODE_LEGACY_32:
1652 case XED_MACHINE_MODE_LONG_COMPAT_32:
1653 result = XED_GRAMMAR_MODE_32;
1654 break;
1655
1656 case XED_MACHINE_MODE_REAL_16:
1657 case XED_MACHINE_MODE_LEGACY_16:
1658 case XED_MACHINE_MODE_LONG_COMPAT_16:
1659 result = XED_GRAMMAR_MODE_16;
1660 break;
1661 default:
1662 xed_derror("Bad machine mode in xed_ild_cvt_mode() call");
1663 }
1664 return result;
1665 }
1666
1667