1 /*BEGIN_LEGAL
2 
3 Copyright (c) 2018 Intel Corporation
4 
5   Licensed under the Apache License, Version 2.0 (the "License");
6   you may not use this file except in compliance with the License.
7   You may obtain a copy of the License at
8 
9       http://www.apache.org/licenses/LICENSE-2.0
10 
11   Unless required by applicable law or agreed to in writing, software
12   distributed under the License is distributed on an "AS IS" BASIS,
13   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   See the License for the specific language governing permissions and
15   limitations under the License.
16 
17 END_LEGAL */
18 /// @file xed-ild.c
19 /// instruction length decoder
20 
21 /*
22   FIXME:
23 
24   need these opcode/mode/prefix based tables:
25     has_modrm (boolean)
26     disp_bytes = 1,2,4,8 bytes
27     imm_bytes = 1,2,4,8 bytes
28 
29     >90% instructions have MODRM.
30 
31         Key on the MOD pattern prebinding
32 
33     >90% of the displacements come from the MODRM.MOD byte processing.
34 
35         Some come from the pattern:
36         Nonterminals: BRDISP8, BRDISP32, MEMDISPv, BRDISPz
37 
38     >90% of the are 1B and come from using map3.
39 
40         xed grammar has UIMM32, UIMM16, UIMM8,UIMM8_1, SIMM8, SIMMz. The
41         signed/unsigned should move to an attribute or the xed_inst_t.  The
42         UIMM32 is used on AMD XOP instructions.
43 
44         uimm16: opcodes 9A, C2, C8, CA, EA
45 
46  */
47 
48     /* FIXME:we might have invalid map (TNI maps) - in this case
49      * we should check for it before looking up in tables for
50      * modrm/imm/disp/static decoding
51      * Also invalid map value cannot be 0xFF - we allocate 3 bits
52      * for MAP operand in key for static lookup.
53      */
54 
55 #include "xed-internal-header.h"
56 #include "xed-ild.h"
57 #include "xed-util-private.h"
58 #include <string.h> // strcmp
59 
60 #include "xed-ild-modrm.h"
61 #include "xed-ild-disp-bytes.h"
62 #include "xed-ild-imm-bytes.h"
63 #include "xed-operand-accessors.h"
64 
65 
66 
xed3_mode_64b(xed_decoded_inst_t * d)67 static XED_INLINE int xed3_mode_64b(xed_decoded_inst_t* d) {
68     return (xed3_operand_get_mode(d) == XED_GRAMMAR_MODE_64);
69 }
70 
71 /*
72  * The scanners cannot return arbitrarily. They MUST return by calling the
73  * next scanner.
74  */
75 
76 
77 static void init_has_disp_regular_table(void);
78 static void init_eamode_table(void);
79 static void init_has_sib_table(void);
80 static void set_has_modrm(xed_decoded_inst_t* d);
81 
82 
83 
set_hint(xed_uint8_t b,xed_decoded_inst_t * d)84 static void set_hint(xed_uint8_t b,  xed_decoded_inst_t* d){
85     switch(b){
86     case 0x2e:
87         xed3_operand_set_hint(d, 1);
88         return;
89     case 0x3e:
90         xed3_operand_set_hint(d, 2);
91         return;
92     default:
93         xed_assert(0);
94     }
95 }
96 
97 // conservative filter table for fast prefix checking
98 // 2014-07-30:
99 // Timing perftest: 2-6% gain
100 // Without the filter:
101 //   Average: 384.08s  Minimum: 352.74s
102 // With the filter:
103 //   Average: 362.97s  Minimum: 346.26s
104 // Could use 2x the space and the 64b mode thing to pick the right table.
105 // That would speed up 32b prefix decodes.
106 
107 #define XED_PREFIX_TABLE_SIZE 8
108 static xed_uint32_t prefix_table[XED_PREFIX_TABLE_SIZE];  // 32B=256b 32*8=2^5*2^3
109 
set_prefix_table_bit(xed_uint8_t a)110 static void set_prefix_table_bit(xed_uint8_t a)
111 {
112     xed_uint32_t x = a >> 5;
113     xed_uint32_t y = a & 0x1F;
114     prefix_table[x] |= (1<<y);
115 }
116 
get_prefix_table_bit(xed_uint8_t a)117 static XED_INLINE xed_uint_t get_prefix_table_bit(xed_uint8_t a)
118 {
119     // return 1 if the bit is set in the table
120     xed_uint32_t x = a >> 5;
121     xed_uint32_t y = a & 0x1F;
122     return (prefix_table[x] >> y ) & 1;
123 }
124 
125 static void init_prefix_table(void);
init_prefix_table(void)126 static void init_prefix_table(void)
127 {
128     int i;
129     static xed_uint8_t legacy_prefixes[] = {
130         0xF0, // lock
131         0x66, // osz
132         0x67, // asz
133 
134         0xF2, 0xF3, // rep/repne
135 
136         0x2E, 0x3E, // 6 segment prefixes
137         0x26, 0x36,
138         0x64, 0x65,
139 
140         0 // sentinel
141     };
142 
143     for (i=0;i<XED_PREFIX_TABLE_SIZE;i++)
144         prefix_table[i]=0;
145 
146     for (i=0;legacy_prefixes[i];i++)
147         set_prefix_table_bit(legacy_prefixes[i]);
148 
149     // add the rex prefixes even for 32b mode
150     for(i=0x40;i<0x50;i++)
151         set_prefix_table_bit(XED_CAST(xed_uint8_t,i));
152 }
153 
too_short(xed_decoded_inst_t * d)154 static void XED_NOINLINE too_short(xed_decoded_inst_t* d)
155 {
156     xed3_operand_set_out_of_bytes(d, 1);
157     if ( xed3_operand_get_max_bytes(d) >= XED_MAX_INSTRUCTION_BYTES)
158         xed3_operand_set_error(d,XED_ERROR_INSTR_TOO_LONG);
159     else
160         xed3_operand_set_error(d,XED_ERROR_BUFFER_TOO_SHORT);
161 }
162 
bad_map(xed_decoded_inst_t * d)163 static void XED_NOINLINE bad_map(xed_decoded_inst_t* d)
164 {
165     xed3_operand_set_map(d,XED_ILD_MAP_INVALID);
166     xed3_operand_set_error(d,XED_ERROR_BAD_MAP);
167 }
168 
169 #if defined(XED_SUPPORTS_AVX512)
bad_v4(xed_decoded_inst_t * d)170 static void XED_NOINLINE bad_v4(xed_decoded_inst_t* d)
171 {
172     xed3_operand_set_error(d,XED_ERROR_BAD_EVEX_V_PRIME);
173 }
bad_z_aaa(xed_decoded_inst_t * d)174 static void XED_NOINLINE bad_z_aaa(xed_decoded_inst_t* d)
175 {
176     xed3_operand_set_error(d,XED_ERROR_BAD_EVEX_Z_NO_MASKING);
177 }
178 #endif
179 
prefix_scanner(xed_decoded_inst_t * d)180 static void prefix_scanner(xed_decoded_inst_t* d)
181 {
182     xed_uint8_t max_bytes = xed3_operand_get_max_bytes(d);
183     unsigned char length = xed_decoded_inst_get_length(d);
184     xed_uint8_t nprefixes = 0;
185     xed_uint8_t nseg_prefixes = 0;
186     xed_uint8_t nrexes = 0;
187     unsigned char rex = 0;
188 
189     while(length < max_bytes)
190     {
191         xed_uint8_t b = xed_decoded_inst_get_byte(d, length);
192 
193         // fast check to see if something might be a prefix
194         // includes REX prefixes in 32b mode
195         if (get_prefix_table_bit(b)==0)
196             goto out;
197 
198         switch(b) {
199           case 0x66:
200             xed3_operand_set_osz(d, 1);
201             xed3_operand_set_prefix66(d, 1);
202             /*ignore possible REX prefix encoutered earlier */
203             rex = 0;
204             break;
205 
206           case 0x67:
207             xed3_operand_set_asz(d, 1);
208             rex = 0;
209             break;
210 
211           /* segment prefixes */
212           case 0x2E:
213           case 0x3E:
214             set_hint(b,d);
215             //INTENTIONAL FALLTHROUGH
216           case 0x26:
217           case 0x36:
218             if (xed3_mode_64b(d)==0)
219                 xed3_operand_set_ild_seg(d, b);
220             nseg_prefixes++;
221             /*ignore possible REX prefix encountered earlier */
222             rex = 0;
223 
224             break;
225           case 0x64:
226           case 0x65:
227             //for 64b mode we are ignoring non valid segment prefixes
228             //only FS=0x64 and GS=0x64 are valid for 64b mode
229             xed3_operand_set_ild_seg(d, b);
230 
231             nseg_prefixes++;
232             /*ignore possible REX prefix encountered earlier */
233             rex = 0;
234             break;
235 
236           case 0xF0:
237             xed3_operand_set_lock(d, 1);
238             rex = 0;
239             break;
240 
241           case 0xF3:
242             xed3_operand_set_ild_f3(d, 1);
243             xed3_operand_set_last_f2f3(d, 3);
244             if(xed3_operand_get_first_f2f3(d) == 0)
245                 xed3_operand_set_first_f2f3(d, 3);
246 
247             rex = 0;
248             break;
249 
250           case 0xF2:
251             xed3_operand_set_ild_f2(d, 1);
252             xed3_operand_set_last_f2f3(d, 2);
253             if(xed3_operand_get_first_f2f3(d) == 0)
254                 xed3_operand_set_first_f2f3(d, 2);
255 
256             rex = 0;
257             break;
258 
259           default:
260              /*Take care of REX prefix */
261             if (xed3_mode_64b(d)  &&
262                 (b & 0xf0) == 0x40) {
263                     nrexes++;
264                     rex = b;
265             }
266             else
267                 goto out;
268         }
269         length++;
270         nprefixes++;
271     }
272 out:
273     //set counts
274     xed_decoded_inst_set_length(d, length);
275     xed3_operand_set_nprefixes(d, nprefixes);
276     xed3_operand_set_nseg_prefixes(d, nseg_prefixes);
277     xed3_operand_set_nrexes(d, nrexes);
278 
279     //set REX, REXW, etc.
280     if (rex) {
281         xed3_operand_set_rexw(d, (rex>>3) & 1);
282         xed3_operand_set_rexr(d, (rex>>2) & 1);
283         xed3_operand_set_rexx(d, (rex>>1) & 1);
284         xed3_operand_set_rexb(d,  (rex) & 1);
285         xed3_operand_set_rex(d, 1);
286     }
287 
288     //set REP and REFINING
289     if (xed3_operand_get_mode_first_prefix(d))
290         xed3_operand_set_rep(d, xed3_operand_get_first_f2f3(d));
291     else
292         xed3_operand_set_rep(d, xed3_operand_get_last_f2f3(d));
293 
294     //set SEG_OVD
295     /*FIXME: lookup table for seg_ovd ? */
296     /*FIXME: make the grammar use the raw byte value instead of the 1..6
297      * recoding */
298     switch(xed3_operand_get_ild_seg(d)) {
299     case 0x2e:
300         xed3_operand_set_seg_ovd(d, 1);
301         break;
302     case 0x3e:
303         xed3_operand_set_seg_ovd(d, 2);
304         break;
305     case 0x26:
306         xed3_operand_set_seg_ovd(d, 3);
307         break;
308     case 0x64:
309         xed3_operand_set_seg_ovd(d, 4);
310         break;
311     case 0x65:
312         xed3_operand_set_seg_ovd(d, 5);
313         break;
314     case 0x36:
315         xed3_operand_set_seg_ovd(d, 6);
316         break;
317     default:
318         break;
319     }
320 
321     //check max bytes
322     if (length >= max_bytes) {
323         /* all available length was taken by prefixes, but we for sure need
324          * at least one additional byte for an opcode, hence we are out of
325          * bytes.         */
326         too_short(d);
327         return;
328     }
329 }
330 
331 #if defined(XED_AVX) || defined(XED_SUPPORTS_KNC)
332 //VEX_PREFIX use 2 as F2 and 3 as F3 so table is required.
333 static unsigned int vex_prefix_recoding[/*pp*/] = { 0,1,3,2 };
334 #endif
335 
336 #if defined(XED_AVX)
337 
338 typedef union { // C4 payload 1
339     struct {
340         xed_uint32_t map:5;
341         xed_uint32_t b_inv:1;
342         xed_uint32_t x_inv:1;
343         xed_uint32_t r_inv:1;
344         xed_uint32_t pad:24;
345     } s;
346     xed_uint32_t u32;
347 } xed_avx_c4_payload1_t;
348 
349 typedef union { // C4 payload 2
350     struct {
351         xed_uint32_t pp:2;
352         xed_uint32_t l:1;
353         xed_uint32_t vvv210:3;
354         xed_uint32_t v3:1;
355         xed_uint32_t w:1;
356         xed_uint32_t pad:24;
357     } s;
358     xed_uint32_t u32;
359 } xed_avx_c4_payload2_t;
360 
361 typedef union { // C5 payload 1
362     struct {
363         xed_uint32_t pp:2;
364         xed_uint32_t l:1;
365         xed_uint32_t vvv210:3;
366         xed_uint32_t v3:1;
367         xed_uint32_t r_inv:1;
368         xed_uint32_t pad:24;
369     } s;
370     xed_uint32_t u32;
371 } xed_avx_c5_payload_t;
372 
373 static void evex_vex_opcode_scanner(xed_decoded_inst_t* d); //prototype
374 
vex_c4_scanner(xed_decoded_inst_t * d)375 static void vex_c4_scanner(xed_decoded_inst_t* d)
376 {
377     /* assumption: length < max_bytes
378      * This is checked in prefix_scanner.
379      * If any other scanner is added before vex_scanner, this condition
380      * should be preserved.
381      * FIXME: check length < max_bytes here anyway? This will be less
382      * error-prone, but that's an additional non-necessary branch.
383      */
384     xed_uint8_t max_bytes = xed3_operand_get_max_bytes(d);
385     unsigned char length  = xed_decoded_inst_get_length(d);
386     if (xed3_mode_64b(d))   {
387         length++; /* eat the c4/c5 */
388     }
389     else if (length+1 < max_bytes)   {
390         xed_uint8_t n = xed_decoded_inst_get_byte(d, length+1);
391         /* in 16/32b modes, the MODRM.MOD field MUST be 0b11 */
392         if ((n&0xC0) == 0xC0)    {
393             length++; /* eat the c4/c5 */
394         }
395         else   {
396             /* A little optimization:
397              * this is not a vex prefix, we can proceed to
398              * next scanner */
399             return;
400         }
401     }
402     else  {   /* don't have enough bytes to check if it's vex prefix,
403            * we are out of bytes */
404         too_short(d);
405         return ;
406     }
407 
408     /* pointing at first payload byte. we want to make sure, that we have
409      * additional 2 bytes available for reading - for 2nd vex c4 payload
410      * byte and opcode */
411     if (length + 2 < max_bytes) {
412       xed_avx_c4_payload1_t c4byte1;
413       xed_avx_c4_payload2_t c4byte2;
414 
415       c4byte1.u32 = xed_decoded_inst_get_byte(d, length);
416       c4byte2.u32 = xed_decoded_inst_get_byte(d, length + 1);
417 
418       // these 2 are guaranteed to be 1 in 16/32b mode by above check
419       xed3_operand_set_rexr(d, ~c4byte1.s.r_inv&1);
420       xed3_operand_set_rexx(d, ~c4byte1.s.x_inv&1);
421 
422       xed3_operand_set_rexb(d, (xed3_mode_64b(d) & ~c4byte1.s.b_inv)&1);
423 
424       xed3_operand_set_rexw(d, c4byte2.s.w);
425 
426       xed3_operand_set_vexdest3(d,   c4byte2.s.v3);
427       xed3_operand_set_vexdest210(d, c4byte2.s.vvv210);
428 
429       xed3_operand_set_vl(d,   c4byte2.s.l);
430 
431       xed3_operand_set_vex_prefix(d, vex_prefix_recoding[c4byte2.s.pp]);
432 
433       xed3_operand_set_map(d,c4byte1.s.map);
434 
435       // FIXME: 2017-03-03 this masking of the VEX map with 0x3 an attempt
436       // at matching an undocumented implementation convention that can and
437       // most likely will change as architectural map usage evolves.
438       if ((c4byte1.s.map & 0x3) == XED_ILD_MAP3)
439           xed3_operand_set_imm_width(d, bytes2bits(1));
440 
441       // this is a success indicator for downstreaam decoding
442       xed3_operand_set_vexvalid(d, 1); // AVX1/2
443 
444       length += 2; /* eat the c4 vex 2B payload */
445       xed_decoded_inst_set_length(d, length);
446 
447       evex_vex_opcode_scanner(d);
448       return;
449     }
450     else {
451       /* We don't have 3 bytes available for reading, but we for sure
452        * need to read them - for 2 vex payload bytes and opcode byte,
453        * hence we are out of bytes.
454        */
455         xed_decoded_inst_set_length(d, length);
456         too_short(d);
457       return;
458     }
459 }
460 
vex_c5_scanner(xed_decoded_inst_t * d)461 static void vex_c5_scanner(xed_decoded_inst_t* d)
462 {
463     /* assumption: length < max_bytes
464      * This is checked in prefix_scanner.
465      * If any other scanner is added before vex_scanner, this condition
466      * should be preserved.
467      * FIXME: check length < max_bytes here anyway? This will be less
468      * error-prone, but that's an additional non-necessary branch.
469      */
470     xed_uint8_t max_bytes = xed3_operand_get_max_bytes(d);
471     unsigned char length  = xed_decoded_inst_get_length(d);
472     if (xed3_mode_64b(d))
473     {
474         length++; /* eat the c4/c5 */
475     }
476     else if (length + 1 < max_bytes)
477     {
478         xed_uint8_t n = xed_decoded_inst_get_byte(d, length+1);
479         /* in 16/32b modes, the MODRM.MOD field MUST be 0b11 */
480         if ((n&0xC0) == 0xC0)
481         {
482             length++; /* eat the c4/c5 */
483         }
484         else
485         {
486             /* A little optimization:
487              * this is not a vex prefix, we can proceed to
488              * next scanner */
489             return;
490         }
491     }
492     else
493     {   /* don't have enough bytes to check if it's vex prefix,
494          * we are out of bytes */
495         too_short(d);
496         return ;
497     }
498 
499 
500     /* pointing at vex c5 payload byte. we want to make sure, that we have
501      * additional 2 bytes available for reading - for vex payload byte and
502      * opcode */
503     if (length + 1 < max_bytes) {
504         xed_avx_c5_payload_t c5byte1;
505         c5byte1.u32 = xed_decoded_inst_get_byte(d, length);
506 
507         xed3_operand_set_rexr(d, ~c5byte1.s.r_inv&1);
508         xed3_operand_set_vexdest3(d,   c5byte1.s.v3);
509         xed3_operand_set_vexdest210(d, c5byte1.s.vvv210);
510 
511         xed3_operand_set_vl(d,   c5byte1.s.l);
512         xed3_operand_set_vex_prefix(d, vex_prefix_recoding[c5byte1.s.pp]);
513 
514         /* MAP is a special case - although it is a derived operand in
515         * newvex_prexix(), we need to set it here, because we use map
516         * later in ILD - for modrm, imm and disp
517         */
518         xed3_operand_set_map(d, XED_ILD_MAP1);
519 
520         // this is a success indicator for downstreaam decoding
521         xed3_operand_set_vexvalid(d, 1); // AVX1/2
522 
523         length++;  /* eat the vex opcode payload */
524         xed_decoded_inst_set_length(d, length);
525 
526         evex_vex_opcode_scanner(d);  //eats opcode byte
527         return;
528     }
529     else {
530         /* We don't have 2 bytes available for reading, but we need to read
531          * them - for vex payload byte and opcode bytes, hence we are out
532          * of bytes.
533          */
534         xed_decoded_inst_set_length(d, length);
535         too_short(d);
536         return ;
537     }
538 }
539 
540 
541 #if defined(XED_AMD_ENABLED)
542 
get_modrm_reg_field(xed_uint8_t b)543 static XED_INLINE xed_uint_t get_modrm_reg_field(xed_uint8_t b) {
544   return (b & 0x38) >> 3;
545 }
546 
xop_scanner(xed_decoded_inst_t * d)547 static void xop_scanner(xed_decoded_inst_t* d)
548 {
549     /* assumption: length < max_bytes
550      * This is checked in prefix_scanner.
551      * If any other scanner is added before vex_scanner, this condition
552      * should be preserved.
553      * FIXME: check length < max_bytes here anyway? This will be less
554      * error-prone, but that's an additional non-necessary branch.
555      */
556 
557     /* we don't need to check (d->length < d->max_bytes) because
558      * it was already checked in previous scanner (prefix_scanner).
559      */
560     xed_uint8_t max_bytes = xed3_operand_get_max_bytes(d);
561     unsigned char length = xed_decoded_inst_get_length(d);
562 
563     if (length + 1 < max_bytes)   {
564         xed_uint8_t n = xed_decoded_inst_get_byte(d, length+1);
565         /* in all modes, the MODRM.REG field MUST NOT be 0b000.
566            mm-rrr-nnn -> mmrr_rnnn
567          */
568 
569         if ( get_modrm_reg_field(n) != 0 ) {
570             length++; /* eat the 0x8f */
571         }
572         else   {
573             /* A little optimization: this is not an xop prefix, we can
574              * proceed to next scanner */
575             return;
576         }
577     }
578     else  {
579         /* don't have enough bytes to check if it's an xop prefix, we
580          * are out of bytes */
581         too_short(d);
582         return ;
583     }
584 
585     /* pointing at the first xop payload byte. we want to make sure, that
586      * we have additional 2 bytes available for reading - for 2nd xop payload
587      * byte and opcode */
588     if (length + 2 < max_bytes)
589     {
590       xed_avx_c4_payload1_t xop_byte1;
591       xed_avx_c4_payload2_t xop_byte2;
592       xed_uint8_t map;
593       xop_byte1.u32 = xed_decoded_inst_get_byte(d, length);
594       xop_byte2.u32 = xed_decoded_inst_get_byte(d, length + 1);
595 
596       map = xop_byte1.s.map;
597       if (map == 0x9) {
598           xed3_operand_set_map(d,XED_ILD_MAP_XOP9);
599           xed3_operand_set_imm_width(d, 0); //bits
600       }
601       else if (map == 0x8){
602           xed3_operand_set_map(d,XED_ILD_MAP_XOP8);
603           xed3_operand_set_imm_width(d, bytes2bits(1));
604       }
605       else if (map == 0xA){
606           xed3_operand_set_map(d,XED_ILD_MAP_XOPA);
607           xed3_operand_set_imm_width(d, bytes2bits(4));
608       }
609       else
610           bad_map(d);
611 
612 
613       xed3_operand_set_rexr(d, ~xop_byte1.s.r_inv&1);
614       xed3_operand_set_rexx(d, ~xop_byte1.s.x_inv&1);
615       xed3_operand_set_rexb(d, (xed3_mode_64b(d) & ~xop_byte1.s.b_inv)&1);
616 
617       xed3_operand_set_rexw(d, xop_byte2.s.w);
618 
619       xed3_operand_set_vexdest3(d, xop_byte2.s.v3);
620       xed3_operand_set_vexdest210(d, xop_byte2.s.vvv210);
621 
622       xed3_operand_set_vl(d, xop_byte2.s.l);
623       xed3_operand_set_vex_prefix(d, vex_prefix_recoding[xop_byte2.s.pp]);
624 
625       xed3_operand_set_vexvalid(d, 3);
626 
627       length += 2; /* eat the 8f xop 2B payload */
628       /* FIXME: too hardcoded? maybe define graph data structure?*/
629       /* using the VEX opcode scanner for xop opcodes too. */
630       xed_decoded_inst_set_length(d, length);
631       evex_vex_opcode_scanner(d);
632       return;
633     }
634     else {
635       /* We don't have 3 bytes available for reading, but we for sure
636        * need to read them - for 2 vex payload bytes and opcode byte,
637        * hence we are out of bytes.
638        */
639         xed_decoded_inst_set_length(d, length);
640         too_short(d);
641       return;
642     }
643 }
644 #endif
645 #endif
646 
647 #if defined(XED_AVX)
648 
649 #  if defined(XED_AMD_ENABLED)
chip_is_intel_specific(xed_decoded_inst_t * d)650 static XED_INLINE xed_uint_t chip_is_intel_specific(xed_decoded_inst_t* d)
651 {
652     xed_chip_enum_t chip = xed_decoded_inst_get_input_chip(d);
653     if (chip == XED_CHIP_INVALID ||
654         chip == XED_CHIP_ALL     ||
655         chip == XED_CHIP_AMD)
656         return 0;
657     return 1;
658 }
659 #  endif
660 
661 
vex_scanner(xed_decoded_inst_t * d)662 static void vex_scanner(xed_decoded_inst_t* d)
663 {
664     /* this handles the AVX C4/C5 VEX prefixes and also the AMD XOP 0x8F
665      * prefix */
666     unsigned char length = xed_decoded_inst_get_length(d);
667     xed_uint8_t b = xed_decoded_inst_get_byte(d, length);
668     if (b == 0xC5) {
669         if (!xed3_operand_get_out_of_bytes(d))
670             vex_c5_scanner(d);
671         return;
672     }
673     else if (b == 0xC4) {
674         if (!xed3_operand_get_out_of_bytes(d))
675             vex_c4_scanner(d);
676         return;
677     }
678 #if defined(XED_AMD_ENABLED)
679     else if (b == 0x8f && chip_is_intel_specific(d)==0 )  {
680         if (!xed3_operand_get_out_of_bytes(d))
681             xop_scanner(d);
682         return;
683     }
684 #endif
685 }
686 #endif
687 
get_next_as_opcode(xed_decoded_inst_t * d)688 static void get_next_as_opcode(xed_decoded_inst_t* d) {
689     unsigned char length = xed_decoded_inst_get_length(d);
690     if (length < xed3_operand_get_max_bytes(d)) {
691         xed_uint8_t b = xed_decoded_inst_get_byte(d, length);
692         xed3_operand_set_nominal_opcode(d, b);
693         xed_decoded_inst_inc_length(d);
694         //st SRM (partial opcode instructions need it)
695         xed3_operand_set_srm(d, xed_modrm_rm(b));
696     }
697     else {
698         too_short(d);
699     }
700 }
701 
702 
703 // has_disp_regular[eamode][modrm.mod][modrm.rm]
704 static xed_uint8_t has_disp_regular[3][4][8];
705 
init_has_disp_regular_table(void)706 static void init_has_disp_regular_table(void) {
707     xed_uint8_t eamode;
708     xed_uint8_t rm;
709     xed_uint8_t mod;
710 
711     for (eamode = 0; eamode <3; eamode++)
712         for (mod=0; mod < 4; mod++)
713             for (rm=0; rm<8; rm++)
714                 has_disp_regular[eamode][mod][rm] = 0;
715 
716     //fill the eamode16
717     has_disp_regular[0][0][6] = 2;
718     for (rm = 0; rm < 8; rm++) {
719         for (mod = 1; mod <= 2; mod++)
720             has_disp_regular[0][mod][rm] = mod;
721     }
722 
723     //fill eamode32/64
724     for(eamode = 1; eamode <= 2; eamode++) {
725         for (rm = 0; rm < 8; rm++) {
726             has_disp_regular[eamode][1][rm] = 1;
727             has_disp_regular[eamode][2][rm] = 4;
728         };
729         has_disp_regular[eamode][0][5] = 4;
730 
731     }
732 }
733 
734 // eamode_table[asz][mmode]
735 static xed_uint8_t eamode_table[2][XED_GRAMMAR_MODE_64+1];
736 
init_eamode_table(void)737 static void init_eamode_table(void) {
738     xed_uint8_t mode;
739     xed_uint8_t asz;
740 
741     for (asz=0; asz<2; asz++)
742         for (mode=0; mode<XED_GRAMMAR_MODE_64+1; mode++)
743             eamode_table[asz][mode] = 0;
744 
745 
746     for (mode = XED_GRAMMAR_MODE_16; mode <= XED_GRAMMAR_MODE_64; mode ++) {
747         eamode_table[0][mode] = mode;
748     }
749 
750     eamode_table[1][XED_GRAMMAR_MODE_16] = XED_GRAMMAR_MODE_32;
751     eamode_table[1][XED_GRAMMAR_MODE_32] = XED_GRAMMAR_MODE_16;
752     eamode_table[1][XED_GRAMMAR_MODE_64] = XED_GRAMMAR_MODE_32;
753 }
754 
755 
756 // has_sib_table[eamode][modrm.mod][modrm.rm]
757 static xed_uint8_t has_sib_table[3][4][8];
758 
init_has_sib_table(void)759 static void init_has_sib_table(void) {
760     xed_uint8_t eamode;
761     xed_uint8_t mod;
762     xed_uint8_t rm;
763 
764     for (eamode = 0; eamode <3; eamode++)
765         for (mod=0; mod < 4; mod++)
766             for (rm=0; rm<8; rm++)
767                 has_sib_table[eamode][mod][rm] = 0;
768 
769     //for eamode32/64 there is sib byte for mod!=3 and rm==4
770     for(eamode = 1; eamode <= 2; eamode++) {
771         for (mod = 0; mod <= 2; mod++) {
772             has_sib_table[eamode][mod][4] = 1;
773         }
774     }
775 }
776 
777 
778 #if defined(XED_SUPPORTS_AVX512)
bad_ll(xed_decoded_inst_t * d)779 static void bad_ll(xed_decoded_inst_t* d) {
780     xed3_operand_set_error(d,XED_ERROR_BAD_EVEX_LL);
781 }
782 
783 
bad_ll_check(xed_decoded_inst_t * d)784 static void bad_ll_check(xed_decoded_inst_t* d)
785 {
786     if (xed3_operand_get_llrc(d) == 3)  {
787         // we have a potentially bad EVEX.LL field.
788         if (xed3_operand_get_mod(d) != 3) // memop
789             bad_ll(d);
790         else if (xed3_operand_get_bcrc(d)==0) // reg-reg but not rounding
791             bad_ll(d);
792     }
793 }
794 #endif
795 
modrm_scanner(xed_decoded_inst_t * d)796 static void modrm_scanner(xed_decoded_inst_t* d)
797 {
798     xed_uint8_t b;
799     xed_uint8_t has_modrm;
800     set_has_modrm(d);
801 
802     has_modrm = xed3_operand_get_has_modrm(d);
803 
804     if (has_modrm) {
805         unsigned char length = xed_decoded_inst_get_length(d);
806         if (length < xed3_operand_get_max_bytes(d)) {
807             xed_uint8_t eamode;
808             xed_uint8_t mod;
809             xed_uint8_t rm;
810 
811             b = xed_decoded_inst_get_byte(d, length);
812             xed3_operand_set_modrm_byte(d, b);
813             xed3_operand_set_pos_modrm(d, length);
814             xed_decoded_inst_inc_length(d); /* eat modrm */
815 
816             mod = xed_modrm_mod(b);
817             rm = xed_modrm_rm(b);
818             xed3_operand_set_mod(d, mod);
819             xed3_operand_set_rm(d, rm);
820             xed3_operand_set_reg(d, xed_modrm_reg(b));
821 
822 #if defined(XED_SUPPORTS_AVX512)
823             bad_ll_check(d);
824 #endif
825             /*This checks that we are not in MOV_DR or MOV_CR instructions
826             that ignore MODRM.MOD bits and don't have DISP and SIB */
827             if (has_modrm != XED_ILD_HASMODRM_IGNORE_MOD) {
828                 xed_uint8_t asz = xed3_operand_get_asz(d);
829                 xed_uint8_t mode = xed3_operand_get_mode(d);
830                 // KW complains here but it is stupid. Code is fine.
831                 eamode = eamode_table[asz][mode];
832 
833                 xed_assert(eamode <= 2);
834 
835                 /* opcode scanner (and prefix scanner) doesn't set
836                 disp_bytes, hence we can set it to 0  without worrying about
837                 overriding a value that was set earlier. */
838 
839                 xed3_operand_set_disp_width(
840                     d,
841                     bytes2bits(has_disp_regular[eamode][mod][rm]));
842 
843                 /*same with sib, we will not override data set earlier here*/
844                 xed3_operand_set_has_sib(d, has_sib_table[eamode][mod][rm]);
845 
846 
847             }
848             return;
849         }
850         else {
851             /*need modrm, but length >= max_bytes, and we are out of bytes*/
852             too_short(d);
853             return;
854         }
855 
856     }
857     /*no modrm, set RM from nominal_opcode*/
858     //rm = xed_modrm_rm(xed3_operand_get_nominal_opcode(d));
859     //xed3_operand_set_rm(d, rm);
860 
861     /* a little optimization: we don't have modrm and hence don't have sib.
862      * Hence we don't need to call sib scanner and can go straight to disp*/
863     /*FIXME: Better to call next scanner anyway for better modularity?*/
864 }
865 
sib_scanner(xed_decoded_inst_t * d)866 static void sib_scanner(xed_decoded_inst_t* d)
867 {
868 
869   if (xed3_operand_get_has_sib(d)) {
870       unsigned char length = xed_decoded_inst_get_length(d);
871       if (length < xed3_operand_get_max_bytes(d)) {
872           xed_uint8_t b;
873           b = xed_decoded_inst_get_byte(d, length);
874 
875           xed3_operand_set_pos_sib(d, length);
876           xed3_operand_set_sibscale(d, xed_sib_scale(b));
877           xed3_operand_set_sibindex(d, xed_sib_index(b));
878           xed3_operand_set_sibbase(d, xed_sib_base(b));
879 
880           xed_decoded_inst_inc_length(d); /* eat sib */
881 
882           if (xed_sib_base(b) == 5) {
883               /* other mod values are set by modrm processing */
884               if (xed3_operand_get_mod(d) == 0)
885                   xed3_operand_set_disp_width(d, bytes2bits(4));
886           }
887       }
888       else { /*has_sib but not enough length -> out of bytes */
889           too_short(d);
890           return;
891       }
892   }
893 }
894 
895 
896 
897 /*probably this table should be generated. Leaving it here for now.
898   Maybe in one of the following commits it will be moved to auto generated
899   code.*/
900 const xed_ild_l1_func_t* disp_bits_2d[XED_ILD_MAP2] = {
901     disp_width_map_0x0,
902     disp_width_map_0x0F
903 };
904 
disp_scanner(xed_decoded_inst_t * d)905 static void disp_scanner(xed_decoded_inst_t* d)
906 {
907     /*                                   0   1  2  3   4  5   6   7   8 */
908     static const xed_uint8_t ilog2[] = { 99 , 0, 1, 99, 2, 99, 99, 99, 3 };
909 
910     xed_ild_map_enum_t map = (xed_ild_map_enum_t)xed3_operand_get_map(d);
911     xed_uint8_t opcode = xed3_operand_get_nominal_opcode(d);
912     xed_uint8_t disp_bytes;
913     xed_uint8_t length = xed_decoded_inst_get_length(d);
914     /*Checked dumped tables of maps 2 ,3 and 3dnow:
915       they all have standard displacement resolution, we are not going
916       to use their lookup tables*/
917   if (map < XED_ILD_MAP2) {
918       /*get the L1 function pointer and use it */
919         xed_ild_l1_func_t fptr = disp_bits_2d[map][opcode];
920         /*most map-opcodes have disp_bytes set in modrm/sib scanners
921           for those we  have L1 functions that do nothing*/
922         if (fptr == 0){
923             xed3_operand_set_error(d,XED_ERROR_GENERAL_ERROR);
924             return;
925         }
926         (*fptr)(d);
927   }
928   /*All other maps should have been set earlier*/
929   disp_bytes = bits2bytes(xed3_operand_get_disp_width(d));
930   if (disp_bytes) {
931       xed_uint8_t max_bytes = xed3_operand_get_max_bytes(d);
932       if ((length + disp_bytes) <= max_bytes) {
933 
934           //set disp value
935           const xed_uint8_t* itext = d->_byte_array._dec;
936           xed_uint8_t* disp_ptr = (xed_uint8_t*)(itext + length);
937 
938           // sign extend the displacement to 64b while passing to accessor
939 
940           switch(ilog2[disp_bytes]) {
941             case 0: { // 1B=8b. ilog2(1) = 0
942                 xed_int8_t byte = *(xed_int8_t*)disp_ptr;
943                 xed3_operand_set_disp(d, byte);
944                 break;
945             }
946             case 1: { // 2B=16b ilog2(2) = 1
947                 xed_int16_t word = *(xed_int16_t*)disp_ptr;
948                 xed3_operand_set_disp(d, word);
949                 break;
950             }
951             case 2: { // 4B=32b ilog2(4) = 2
952                 xed_int32_t dword = *(xed_int32_t*)disp_ptr;
953                 xed3_operand_set_disp(d, dword);
954                 break;
955             }
956             case 3: {// 8B=64b ilog2(8) = 3
957                 xed_int64_t qword = *(xed_int64_t*)disp_ptr;
958                 xed3_operand_set_disp(d, qword);
959                 break;
960             }
961             default:
962               xed_assert(0);
963           }
964 
965           xed3_operand_set_pos_disp(d, length);
966           xed_decoded_inst_set_length(d, length + disp_bytes);
967       }
968       else {
969           too_short(d);
970           return;
971       }
972   }
973 }
974 
975 
976 
977 
978 #if defined(XED_EXTENDED)
979 # include "xed-ild-extension.h"
980 #endif
981 
982 /*probably this table should be generated. Leaving it here for now.
983   Maybe in one of the following commits it will be moved to auto generated
984   code.*/
985 const xed_uint8_t* has_modrm_2d[XED_ILD_MAP2] = {
986     has_modrm_map_0x0,
987     has_modrm_map_0x0F
988 };
989 
set_has_modrm(xed_decoded_inst_t * d)990 static void set_has_modrm(xed_decoded_inst_t* d) {
991     /* This assumes that the lookup arrays do not have undefined opcodes.
992        It means we must fill has_modrm property for illegal opcodes at
993        build time in (ild.py) */
994     /*some 3dnow instructions conflict on has_modrm property with other
995       instructions. However all 3dnow instructions have modrm, hence we
996       just set has_modrm to 1 in case of 3dnow (map==XED_ILD_MAPAMD) */
997     xed_ild_map_enum_t map = (xed_ild_map_enum_t)xed3_operand_get_map(d);
998     xed_uint8_t opcode = xed3_operand_get_nominal_opcode(d);
999     xed3_operand_set_has_modrm(d,1);
1000     if (map < XED_ILD_MAP2) {
1001         // need to set more complex codes like XED_ILD_HASMODRM_IGNORE_MOD
1002         // from the has_modrm_2d[][] tables.
1003         xed3_operand_set_has_modrm(d,has_modrm_2d[map][opcode]);
1004     }
1005 }
1006 
1007 
1008 
1009 /*probably this table should be generated. Leaving it here for now.
1010   Maybe in one of the following commits it will be moved to auto generated
1011   code.*/
1012 const xed_ild_l1_func_t* imm_bits_2d[XED_ILD_MAP2] = {
1013     imm_width_map_0x0,
1014     imm_width_map_0x0F
1015 };
1016 
set_imm_bytes(xed_decoded_inst_t * d)1017 static void set_imm_bytes(xed_decoded_inst_t* d) {
1018     xed_ild_map_enum_t map = (xed_ild_map_enum_t)xed3_operand_get_map(d);
1019     xed_uint8_t opcode = xed3_operand_get_nominal_opcode(d);
1020     xed_uint8_t imm_bits = xed3_operand_get_imm_width(d);
1021     /* FIXME: not taking care of illegal map-opcodes yet.
1022     Probably should fill them in ild_storage.py
1023     Now illegal map-opcodes have 0 as function pointer in lookup tables*/
1024     if (!imm_bits) {
1025          if (map < XED_ILD_MAP2) {
1026              /*get the L1 function pointer and use it */
1027             xed_ild_l1_func_t fptr = imm_bits_2d[map][opcode];
1028             if (fptr == 0){
1029                 xed3_operand_set_error(d,XED_ERROR_GENERAL_ERROR);
1030                 return;
1031             }
1032             (*fptr)(d);
1033             return;
1034          }
1035          /*All other maps should have been set earlier*/
1036     }
1037 }
1038 
1039 ////////////////////////////////////////////////////////////////////////////////
1040 
1041 #if !defined(XED_SUPPORTS_AVX512) && !defined(XED_SUPPORTS_KNC)
imm_scanner(xed_decoded_inst_t * d)1042 static void imm_scanner(xed_decoded_inst_t* d)
1043 {
1044   xed_uint8_t imm_bytes;
1045   xed_uint8_t imm1_bytes;
1046   xed_uint8_t max_bytes = xed3_operand_get_max_bytes(d);
1047   unsigned char length = xed_decoded_inst_get_length(d);
1048   unsigned int pos_imm = 0;
1049   const xed_uint8_t* itext = d->_byte_array._dec;
1050   const xed_uint8_t* imm_ptr = 0;
1051 
1052   set_imm_bytes(d);
1053 #if defined(XED_AMD_ENABLED)
1054   if (xed3_operand_get_amd3dnow(d)) {
1055       if (length < max_bytes) {
1056          /*opcode is in immediate*/
1057           xed3_operand_set_nominal_opcode(d,
1058               xed_decoded_inst_get_byte(d, length));
1059           /*count the pseudo immediate byte, which is opcode*/
1060           xed_decoded_inst_inc_length(d);
1061           /*imm_bytes == imm_bytes1 == 0 for amd3dnow */
1062           return;
1063       }
1064       else {
1065           too_short(d);
1066           return;
1067       }
1068   }
1069 #endif
1070 
1071   imm_bytes = bits2bytes(xed3_operand_get_imm_width(d));
1072   imm1_bytes = xed3_operand_get_imm1_bytes(d);
1073 
1074   if (imm_bytes)  {
1075       if (length + imm_bytes <= max_bytes) {
1076           xed3_operand_set_pos_imm(d, length);
1077           /* eat imm */
1078           length += imm_bytes;
1079           xed_decoded_inst_set_length(d, length);
1080 
1081           if (imm1_bytes)  {
1082               if (length + imm1_bytes <= max_bytes) {
1083                   xed3_operand_set_pos_imm1(d, length);
1084                   imm_ptr = itext + length;
1085                   length += imm1_bytes; /* eat imm1 */
1086                   xed_decoded_inst_set_length(d, length);
1087                   //set uimm1 value
1088                   xed3_operand_set_uimm1(d, *imm_ptr);
1089               }
1090               else {/* Ugly code */
1091                     too_short(d);
1092                     return;
1093               }
1094             }
1095       }
1096       else {
1097           too_short(d);
1098           return;
1099       }
1100   }
1101 
1102   /* FIXME: setting UIMM chunks. This can be done better,
1103    * for example special capturing function in ILD, like for imm_bytes*/
1104   pos_imm = xed3_operand_get_pos_imm(d);
1105   imm_ptr = itext + pos_imm;
1106   switch(imm_bytes){
1107   case 0:
1108       break;
1109   case 1: {
1110       xed_uint8_t uimm0 =  *(xed_uint8_t*)(imm_ptr);
1111       xed3_operand_set_uimm0(d, uimm0);
1112 
1113       //for SE_IMM8() we need to set here the esrc as well
1114       xed3_operand_set_esrc(d,uimm0 >> 4);
1115       break;
1116           }
1117   case 2:{
1118       xed_uint16_t uimm0 =  *(xed_uint16_t*)(imm_ptr);
1119       xed3_operand_set_uimm0(d, uimm0);
1120       break;
1121       }
1122   case 4:{
1123       xed_uint32_t uimm0 =  *(xed_uint32_t*)(imm_ptr);
1124       xed3_operand_set_uimm0(d, uimm0);
1125       break;
1126       }
1127   case 8:{
1128       xed_uint64_t uimm0 =  *(xed_uint64_t*)(imm_ptr);
1129       xed3_operand_set_uimm0(d, uimm0);
1130       break;
1131       }
1132   default:
1133       /*Unexpected immediate width, this should never happen*/
1134       xed_assert(0);
1135   }
1136 
1137   /* uimm1 is set earlier */
1138 }
1139 #endif // !defined(XED_SUPPORTS_AVX512) && !defined(XED_SUPPORTS_KNC)
1140 ////////////////////////////////////////////////////////////////////////////////
1141 
1142 #if defined(XED_AVX)
catch_invalid_rex_or_legacy_prefixes(xed_decoded_inst_t * d)1143 static void catch_invalid_rex_or_legacy_prefixes(xed_decoded_inst_t* d)
1144 {
1145     // REX, F2, F3, 66 are not allowed before VEX or EVEX prefixes
1146     if ( xed3_mode_64b(d) && xed3_operand_get_rex(d) )
1147         xed3_operand_set_error(d,XED_ERROR_BAD_REX_PREFIX);
1148     else if ( xed3_operand_get_osz(d) ||
1149               xed3_operand_get_ild_f3(d) ||
1150               xed3_operand_get_ild_f2(d) )
1151         xed3_operand_set_error(d,XED_ERROR_BAD_LEGACY_PREFIX);
1152 }
catch_invalid_mode(xed_decoded_inst_t * d)1153 static void catch_invalid_mode(xed_decoded_inst_t* d)
1154 {
1155     // we know we have VEX or EVEX instr.
1156     if(xed3_operand_get_realmode(d)) {
1157         xed3_operand_set_error(d,XED_ERROR_INVALID_MODE);
1158     }
1159 }
1160 
evex_vex_opcode_scanner(xed_decoded_inst_t * d)1161 static void evex_vex_opcode_scanner(xed_decoded_inst_t* d)
1162 {
1163     /* no need to check max_bytes here, it was checked in previous
1164     scanner */
1165     unsigned char length = xed_decoded_inst_get_length(d);
1166     xed_uint8_t b = xed_decoded_inst_get_byte(d, length);
1167     xed3_operand_set_nominal_opcode(d, b);
1168     xed3_operand_set_pos_nominal_opcode(d, length);
1169     xed_decoded_inst_inc_length(d);
1170     catch_invalid_rex_or_legacy_prefixes(d);
1171     catch_invalid_mode(d);
1172 }
1173 #endif
1174 
opcode_scanner(xed_decoded_inst_t * d)1175 static void opcode_scanner(xed_decoded_inst_t* d)
1176 {
1177     unsigned char length = xed_decoded_inst_get_length(d);
1178     xed_uint8_t b = xed_decoded_inst_get_byte(d, length);
1179     xed_uint8_t opcode = 0;
1180 
1181     /*no need to check max_bytes - it was checked in previous scanners*/
1182 
1183     /* no need to check for VEX here anymore, because if VEX
1184     prefix was encountered, we would get to evex_vex_opcode_scanner */
1185     if (b != 0x0F) {
1186         xed3_operand_set_map(d, XED_ILD_MAP0);
1187         xed3_operand_set_nominal_opcode(d, b);
1188         xed3_operand_set_pos_nominal_opcode(d, length);
1189         xed_decoded_inst_inc_length(d);
1190         goto out;
1191     }
1192 
1193     length++; /* eat the 0x0F */
1194     xed3_operand_set_pos_nominal_opcode(d, length);
1195 
1196     /* 0x0F opcodes MAPS 1,2,3 */
1197     //FIXME: finish here
1198     if (length < xed3_operand_get_max_bytes(d)) {
1199         xed_uint8_t m = xed_decoded_inst_get_byte(d, length);
1200         if (m == 0x38) {
1201             length++; /* eat the 0x38 */
1202             xed3_operand_set_map(d, XED_ILD_MAP2);
1203             xed_decoded_inst_set_length(d, length);
1204             get_next_as_opcode( d);
1205             return;
1206         }
1207         else if (m == 0x3A) {
1208             length++; /* eat the 0x3A */
1209             xed3_operand_set_map(d, XED_ILD_MAP3);
1210             xed_decoded_inst_set_length(d, length);
1211             xed3_operand_set_imm_width(d, bytes2bits(1));
1212             get_next_as_opcode( d);
1213             return;
1214         }
1215         else if (m == 0x3B) {
1216             length++; /* eat the 0x3B */
1217             bad_map(d);
1218             xed_decoded_inst_set_length(d, length);
1219             get_next_as_opcode( d);
1220             return;
1221             //FIXME: TNI maps have no modrm, imm, disp ??
1222             /* BTW we use MAP as index to static decoding lookup tables..
1223              * with INVALID_MAP we will have segv there, need to check
1224              * for it after ILD phase.
1225              * Maybe set some common ILD_INVALID member to indicate that
1226              * there is no need to do static decoding?
1227              */
1228         }
1229         else if (m > 0x38 && m <= 0x3F) {
1230             length++; /* eat the 0x39...0x3F (minus 3A and 3B) */
1231             bad_map(d);
1232 
1233             xed_decoded_inst_set_length(d, length);
1234             get_next_as_opcode( d);
1235             return; //FIXME: TNI maps have no modrm, imm, disp ??
1236             /* BTW we use MAP as index static decoding lookup tables..
1237              * with INVALID_MAP we will have segv there, need to check
1238              * for it after ILD phase */
1239         }
1240 #if defined(XED_AMD_ENABLED)
1241         else if (m == 0x0F) {
1242             xed3_operand_set_amd3dnow(d, 1);
1243             /* opcode is in immediate later on */
1244             length++; /*eat the second 0F */
1245             xed3_operand_set_nominal_opcode(d, 0x0F);
1246              /*special map for amd3dnow */
1247             xed3_operand_set_map(d, XED_ILD_MAPAMD);
1248             xed_decoded_inst_set_length(d, length);
1249         }
1250 #endif
1251         else {
1252             length++; /* eat the 2nd  opcode byte */
1253             xed3_operand_set_nominal_opcode(d, m);
1254             xed3_operand_set_map(d, XED_ILD_MAP1);
1255             xed_decoded_inst_set_length(d, length);
1256         }
1257     }
1258     else{
1259         too_short(d);
1260         return;
1261     }
1262 
1263 out:
1264     //set SRM (partial opcode instructions need it)
1265     opcode = xed3_operand_get_nominal_opcode(d);
1266     xed3_operand_set_srm(d, xed_modrm_rm(opcode));
1267 }
1268 
1269 //////////////////////////////////////////////////////////////////////////
1270 // KNC/AVX512 EVEX and EVEX-IMM8 scanners
1271 
1272 
1273 
1274 #if defined(XED_SUPPORTS_AVX512) || defined(XED_SUPPORTS_KNC)
1275 
1276 typedef union { // Common KNC & AVX512
1277     struct {
1278         xed_uint32_t map:4;
1279         xed_uint32_t rr_inv:1;
1280         xed_uint32_t b_inv:1;
1281         xed_uint32_t x_inv:1;
1282         xed_uint32_t r_inv:1;
1283         xed_uint32_t pad:24;
1284     } s;
1285     xed_uint32_t u32;
1286 } xed_avx512_payload1_t;
1287 
1288 typedef union { // Common KNC & AVX512
1289     struct {
1290         xed_uint32_t pp:2;
1291         xed_uint32_t ubit:1;
1292         xed_uint32_t vexdest210:3;
1293         xed_uint32_t vexdest3:1;
1294         xed_uint32_t rexw:1;
1295         xed_uint32_t pad:24;
1296     } s;
1297     xed_uint32_t u32;
1298 } xed_avx512_payload2_t;
1299 
1300 typedef union{  // KNC only
1301     struct  {
1302         xed_uint32_t mask:3;
1303         xed_uint32_t vexdest4p:1;
1304         xed_uint32_t swiz:3;
1305         xed_uint32_t nr:1;
1306         xed_uint32_t pad:24;
1307     } s;
1308     xed_uint32_t u32;
1309 } xed_knc_payload3_t;
1310 
1311 
1312 typedef union{  // AVX512 only
1313     struct  {
1314         xed_uint32_t mask:3;
1315         xed_uint32_t vexdest4p:1;
1316         xed_uint32_t bcrc:1;
1317         xed_uint32_t llrc:2;
1318         xed_uint32_t z:1;
1319         xed_uint32_t pad:24;
1320     } s;
1321     xed_uint32_t u32;
1322 } xed_avx512_payload3_t;
1323 
1324 
evex_scanner(xed_decoded_inst_t * d)1325 static void evex_scanner(xed_decoded_inst_t* d)
1326 {
1327      /* assumption: length < max_bytes
1328      * This is checked in prefix_scanner.
1329      * If any other scanner is added before evex_scanner, this condition
1330      * should be preserved.
1331      * FIXME: check length < max_bytes here anyway? This will be less
1332      * error-prone, but that's an additional non-necessary branch.
1333      */
1334     xed_uint8_t max_bytes = xed3_operand_get_max_bytes(d);
1335     unsigned char length = xed_decoded_inst_get_length(d);
1336     xed_uint8_t b = xed_decoded_inst_get_byte(d, length);
1337 
1338     if (b == 0x62)
1339     {
1340         /*first check that it is not a BOUND instruction */
1341         if(!xed3_mode_64b(d)) {
1342             /*make sure we can read one additional byte */
1343             if (length + 1 < max_bytes) {
1344                 xed_uint8_t n = xed_decoded_inst_get_byte(d, length+1);
1345                 if ((n&0xC0) != 0xC0) {
1346                     /*this is a BOUND instruction */
1347                     /* FIXME: could have set opcode here and call
1348                      * modrm_scanner but that would be a code
1349                      * duplication */
1350                     return;
1351                 }
1352             }
1353             else {
1354                 too_short(d);
1355                 return;
1356             }
1357         }
1358         /*Unlike the vex and xop prefix scanners, here length is pointing
1359         at the evex prefix byte.  We want to ensure that we have enough
1360         bytes available to read 4 bytes for evex prefix and 1 byte for an
1361         opcode */
1362         if (length + 4 < max_bytes) {
1363             xed_avx512_payload1_t evex1;
1364             xed_avx512_payload2_t evex2;
1365 
1366             evex1.u32 = xed_decoded_inst_get_byte(d, length+1);
1367             evex2.u32 = xed_decoded_inst_get_byte(d, length+2);
1368 
1369             // above check guarantees that r and x are 1 in 16/32b mode.
1370             if (xed3_mode_64b(d)) {
1371                 xed3_operand_set_rexr(d,  ~evex1.s.r_inv&1);
1372                 xed3_operand_set_rexx(d,  ~evex1.s.x_inv&1);
1373                 xed3_operand_set_rexb(d,  ~evex1.s.b_inv&1);
1374                 xed3_operand_set_rexrr(d, ~evex1.s.rr_inv&1);
1375             }
1376 
1377             xed3_operand_set_map(d, evex1.s.map);
1378 
1379             xed3_operand_set_rexw(d,   evex2.s.rexw);
1380             xed3_operand_set_vexdest3(d,  evex2.s.vexdest3);
1381             xed3_operand_set_vexdest210(d, evex2.s.vexdest210);
1382             xed3_operand_set_ubit(d, evex2.s.ubit);
1383             if (evex2.s.ubit)
1384                 xed3_operand_set_vexvalid(d, 2); // AVX512 EVEX U=1 req'd
1385             else
1386             {
1387 #if defined(XED_SUPPORTS_KNC)
1388                 xed3_operand_set_vexvalid(d, 4); // KNC EVEX U=0 req'd
1389 #else
1390                 xed3_operand_set_error(d,XED_ERROR_BAD_EVEX_UBIT);
1391 #endif
1392             }
1393 
1394             xed3_operand_set_vex_prefix(d,vex_prefix_recoding[evex2.s.pp]);
1395 
1396             if (evex1.s.map == XED_ILD_MAP3)
1397                 xed3_operand_set_imm_width(d, bytes2bits(1));
1398 
1399             if (evex2.s.ubit)  // AVX512 only (Not KNC)
1400             {
1401 #if defined(XED_SUPPORTS_AVX512)
1402                 xed_avx512_payload3_t evex3;
1403                 evex3.u32 = xed_decoded_inst_get_byte(d, length+3);
1404 
1405                 xed3_operand_set_zeroing(d, evex3.s.z);
1406 
1407                 // llrc is still required for rounding fixup much later
1408                 // during decode.
1409                 xed3_operand_set_llrc(d, evex3.s.llrc);
1410 
1411                 xed3_operand_set_vl(d, evex3.s.llrc);
1412                 xed3_operand_set_bcrc(d, evex3.s.bcrc);
1413                 xed3_operand_set_vexdest4(d, ~evex3.s.vexdest4p&1);
1414                 if (!xed3_mode_64b(d) && evex3.s.vexdest4p==0)
1415                     bad_v4(d);
1416 
1417                 xed3_operand_set_mask(d, evex3.s.mask);
1418                 if (evex3.s.mask == 0 && evex3.s.z == 1)
1419                     bad_z_aaa(d);
1420 #endif
1421             }
1422 #if defined(XED_SUPPORTS_KNC)
1423             else // KNC
1424             {
1425                 const xed_uint_t vl_512=2;
1426                 xed_knc_payload3_t evex3;
1427                 evex3.u32 = xed_decoded_inst_get_byte(d, length+3);
1428                 xed3_operand_set_vl(d, vl_512); //Indicates vector length 512b
1429 
1430                 xed3_operand_set_nr(d, evex3.s.nr);
1431                 xed3_operand_set_swiz(d, evex3.s.swiz);
1432                 xed3_operand_set_vexdest4(d, ~evex3.s.vexdest4p&1);
1433                 xed3_operand_set_mask(d, evex3.s.mask);
1434             }
1435 #endif
1436 
1437             length += 4;
1438             xed_decoded_inst_set_length(d, length);
1439             /* vex opcode scanner fits for evex instructions too: it just reads
1440              * one byte as nominal opcode, this is exactly what we want for
1441              * evex*/
1442             evex_vex_opcode_scanner(d);
1443         }
1444         else {
1445             /*there is no enough bytes, hence we are out of bytes */
1446             too_short(d);
1447         }
1448     }
1449 }
1450 
evex_imm_scanner(xed_decoded_inst_t * d)1451 static void evex_imm_scanner(xed_decoded_inst_t* d)
1452 {
1453   xed_uint8_t imm_bytes;
1454   xed_uint8_t imm1_bytes;
1455   xed_uint8_t max_bytes = xed3_operand_get_max_bytes(d);
1456   unsigned char length = xed_decoded_inst_get_length(d);
1457   unsigned int pos_imm = 0;
1458   const xed_uint8_t* itext = d->_byte_array._dec;
1459   const xed_uint8_t* imm_ptr = 0;
1460 
1461   set_imm_bytes(d);
1462 
1463 #if defined(XED_AMD_ENABLED)
1464   if (xed3_operand_get_amd3dnow(d)) {
1465       if (length < max_bytes) {
1466          /*opcode is in immediate*/
1467           xed3_operand_set_nominal_opcode(d,
1468               xed_decoded_inst_get_byte(d, length));
1469           /*count the pseudo immediate byte, which is opcode*/
1470           xed_decoded_inst_inc_length(d);
1471           /*imm_bytes == imm_bytes1 == 0 for amd3dnow */
1472           return;
1473       }
1474       else {
1475           too_short(d);
1476           return;
1477       }
1478   }
1479 #endif
1480 
1481   imm_bytes = bits2bytes(xed3_operand_get_imm_width(d));
1482   imm1_bytes = xed3_operand_get_imm1_bytes(d);
1483 
1484   if (imm_bytes)  {
1485       if (length + imm_bytes <= max_bytes) {
1486           xed3_operand_set_pos_imm(d, length);
1487           /* eat imm */
1488           length += imm_bytes;
1489           xed_decoded_inst_set_length(d, length);
1490 
1491           if (imm1_bytes)  {
1492               if (length + imm1_bytes <= max_bytes) {
1493                   xed3_operand_set_pos_imm1(d, length);
1494                   imm_ptr = itext + length;
1495                   length += imm1_bytes; /* eat imm1 */
1496                   xed_decoded_inst_set_length(d, length);
1497                   //set uimm1 value
1498                   xed3_operand_set_uimm1(d, *imm_ptr);
1499               }
1500               else {/* Ugly code */
1501                     too_short(d);
1502                     return;
1503               }
1504             }
1505       }
1506       else {
1507           too_short(d);
1508           return;
1509       }
1510   }
1511 
1512   /* FIXME: setting UIMM chunks. This can be done better,
1513    * for example special capturing function in ILD, like for imm_bytes*/
1514   pos_imm = xed3_operand_get_pos_imm(d);
1515   imm_ptr = itext + pos_imm;
1516   switch(imm_bytes)
1517   {
1518   case 0:
1519       break;
1520   case 1:
1521     {
1522         xed_uint8_t uimm0 =  *imm_ptr;
1523         xed_uint8_t esrc = uimm0 >> 4;
1524 
1525         xed3_operand_set_uimm0(d, uimm0);
1526         xed3_operand_set_esrc(d, esrc);
1527         break;
1528     }
1529   case 2:
1530       xed3_operand_set_uimm0(d, *(xed_uint16_t*)imm_ptr);
1531       break;
1532   case 4:
1533       xed3_operand_set_uimm0(d, *(xed_uint32_t*)imm_ptr);
1534       break;
1535   case 8:
1536       xed3_operand_set_uimm0(d, *(xed_uint64_t*)imm_ptr);
1537       break;
1538   default:
1539       /*Unexpected immediate width, this should never happen*/
1540       xed_assert(0);
1541   }
1542 
1543   /* uimm1 is set earlier */
1544 }
1545 
1546 #endif // defined(XED_SUPPORTS_AVX512)
1547 
1548 ////////////////////////////////////////////////////////////////////////////////
1549 
xed_ild_lookup_init(void)1550 void xed_ild_lookup_init(void) {
1551     xed_ild_eosz_init();
1552     xed_ild_easz_init();
1553 
1554     xed_ild_imm_l3_init();
1555     xed_ild_disp_l3_init();
1556 
1557     init_has_disp_regular_table();
1558     init_eamode_table();
1559     init_has_sib_table();
1560 
1561 }
1562 
xed_ild_init(void)1563 void xed_ild_init(void) {
1564     init_prefix_table();
1565     xed_ild_lookup_init();
1566 }
1567 
1568 
1569 
1570 
1571 void
xed_instruction_length_decode(xed_decoded_inst_t * ild)1572 xed_instruction_length_decode(xed_decoded_inst_t* ild)
1573 {
1574     prefix_scanner(ild);
1575 #if defined(XED_AVX)
1576     if (xed3_operand_get_out_of_bytes(ild))
1577         return;
1578     vex_scanner(ild);
1579 #endif
1580 #if defined(XED_SUPPORTS_AVX512) || defined(XED_SUPPORTS_KNC)
1581     // if we got a vex prefix (which also sucks down the opcode),
1582     // then we do not need to scan for evex prefixes.
1583     if (!xed3_operand_get_vexvalid(ild)) {
1584         if (xed3_operand_get_out_of_bytes(ild))
1585             return;
1586         evex_scanner(ild);
1587     }
1588 #endif
1589 
1590     if (xed3_operand_get_out_of_bytes(ild))
1591         return;
1592 #if defined(XED_AVX)
1593     // vex/xop prefixes also eat the vex/xop opcode
1594     if (!xed3_operand_get_vexvalid(ild))
1595         opcode_scanner(ild);
1596 #else
1597     opcode_scanner(ild);
1598 #endif
1599     modrm_scanner(ild);
1600     sib_scanner(ild);
1601     disp_scanner(ild);
1602 #if defined(XED_SUPPORTS_AVX512) || defined(XED_SUPPORTS_KNC)
1603     evex_imm_scanner(ild);
1604 #else
1605     imm_scanner(ild);
1606 #endif
1607 }
1608 
1609 #include "xed-chip-modes.h"
1610 
1611 /// This is the second main entry point for the decoder
1612 /// used for new xed3 decoding.
1613 XED_DLL_EXPORT xed_error_enum_t
xed_ild_decode(xed_decoded_inst_t * xedd,const xed_uint8_t * itext,const unsigned int bytes)1614 xed_ild_decode(xed_decoded_inst_t* xedd,
1615            const xed_uint8_t* itext,
1616            const unsigned int bytes)
1617 {
1618     xed_uint_t tbytes;
1619     xed_chip_enum_t chip = xed_decoded_inst_get_input_chip(xedd);
1620 
1621     set_chip_modes(xedd,chip,0); //FIXME: add support for cpuid features
1622 
1623     xedd->_byte_array._dec = itext;
1624 
1625     tbytes =  bytes;
1626     if (bytes > XED_MAX_INSTRUCTION_BYTES)
1627         tbytes = XED_MAX_INSTRUCTION_BYTES;
1628     xed3_operand_set_max_bytes(xedd, tbytes);
1629     xed_instruction_length_decode(xedd);
1630 
1631     if (xed3_operand_get_out_of_bytes(xedd))
1632         return XED_ERROR_BUFFER_TOO_SHORT;
1633     if (xed3_operand_get_map(xedd) == XED_ILD_MAP_INVALID)
1634         return XED_ERROR_GENERAL_ERROR;
1635 
1636     return XED_ERROR_NONE;
1637 }
1638 
1639 
1640 // xed-ild-private.h
1641 xed_bits_t
xed_ild_cvt_mode(xed_machine_mode_enum_t mmode)1642 xed_ild_cvt_mode(xed_machine_mode_enum_t mmode) {
1643 
1644     xed_bits_t result = 0;
1645     switch(mmode)
1646     {
1647       case XED_MACHINE_MODE_LONG_64:
1648         result = XED_GRAMMAR_MODE_64;
1649 
1650         break;
1651       case XED_MACHINE_MODE_LEGACY_32:
1652       case XED_MACHINE_MODE_LONG_COMPAT_32:
1653         result  = XED_GRAMMAR_MODE_32;
1654         break;
1655 
1656       case XED_MACHINE_MODE_REAL_16:
1657       case XED_MACHINE_MODE_LEGACY_16:
1658       case XED_MACHINE_MODE_LONG_COMPAT_16:
1659         result = XED_GRAMMAR_MODE_16;
1660         break;
1661       default:
1662         xed_derror("Bad machine mode in xed_ild_cvt_mode() call");
1663     }
1664     return result;
1665 }
1666 
1667