xref: /qemu/target/i386/tcg/decode-new.c.inc (revision 73b49878)
1/*
2 * New-style decoder for i386 instructions
3 *
4 *  Copyright (c) 2022 Red Hat, Inc.
5 *
6 * Author: Paolo Bonzini <pbonzini@redhat.com>
7 *
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 */
21
22/*
23 * The decoder is mostly based on tables copied from the Intel SDM.  As
24 * a result, most operand load and writeback is done entirely in common
25 * table-driven code using the same operand type (X86_TYPE_*) and
26 * size (X86_SIZE_*) codes used in the manual.  There are a few differences
27 * though.
28 *
29 * Operand sizes
30 * -------------
31 *
32 * The manual lists d64 ("cannot encode 32-bit size in 64-bit mode") and f64
33 * ("cannot encode 16-bit or 32-bit size in 64-bit mode") as modifiers of the
34 * "v" or "z" sizes.  The decoder simply makes them separate operand sizes.
35 *
36 * Vector operands
37 * ---------------
38 *
39 * The main difference is that the V, U and W types are extended to
40 * cover MMX as well; if an instruction is like
41 *
42 *      por   Pq, Qq
43 *  66  por   Vx, Hx, Wx
44 *
45 * only the second row is included and the instruction is marked as a
46 * valid MMX instruction.  The MMX flag directs the decoder to rewrite
47 * the V/U/H/W types to P/N/P/Q if there is no prefix, as well as changing
48 * "x" to "q" if there is no prefix.
49 *
50 * In addition, the ss/ps/sd/pd types are sometimes mushed together as "x"
51 * if the difference is expressed via prefixes.  Individual instructions
52 * are separated by prefix in the generator functions.
53 *
54 * There is a custom size "xh" used to address half of a SSE/AVX operand.
55 * This points to a 64-bit operand for SSE operations, 128-bit operand
56 * for 256-bit AVX operands, etc.  It is used for conversion operations
57 * such as VCVTPH2PS or VCVTSS2SD.
58 *
59 * There are a couple cases in which instructions (e.g. MOVD) write the
60 * whole XMM or MM register but are established incorrectly in the manual
61 * as "d" or "q".  These have to be fixed for the decoder to work correctly.
62 *
63 * VEX exception classes
64 * ---------------------
65 *
66 * Speaking about imprecisions in the manual, the decoder treats all
67 * exception-class 4 instructions as having an optional VEX prefix, and
68 * all exception-class 6 instructions as having a mandatory VEX prefix.
69 * This is true except for a dozen instructions; these are in exception
70 * class 4 but do not ignore the VEX.W bit (which does not even exist
71 * without a VEX prefix).  These instructions are mostly listed in Intel's
72 * table 2-16, but with a few exceptions.
73 *
74 * The AMD manual has more precise subclasses for exceptions, and unlike Intel
75 * they list the VEX.W requirements in the exception classes as well (except
76 * when they don't).  AMD describes class 6 as "AVX Mixed Memory Argument"
77 * without defining what a mixed memory argument is, but still use 4 as the
78 * primary exception class... except when they don't.
79 *
80 * The summary is:
81 *                       Intel     AMD         VEX.W           note
82 * -------------------------------------------------------------------
83 * vpblendd              4         4J          0
84 * vpblendvb             4         4E-X        0               (*)
85 * vpbroadcastq          6         6D          0               (+)
86 * vpermd/vpermps        4         4H          0               (§)
87 * vpermq/vpermpd        4         4H-1        1               (§)
88 * vpermilpd/vpermilps   4         6E          0               (^)
89 * vpmaskmovd            6         4K          significant     (^)
90 * vpsllv                4         4K          significant
91 * vpsrav                4         4J          0
92 * vpsrlv                4         4K          significant
93 * vtestps/vtestpd       4         4G          0
94 *
95 *    (*)  AMD lists VPBLENDVB as related to SSE4.1 PBLENDVB, which may
96 *         explain why it is considered exception class 4.  However,
97 *         Intel says that VEX-only instructions should be in class 6...
98 *
99 *    (+)  Not found in Intel's table 2-16
100 *
101 *    (§)  4H and 4H-1 do not mention VEX.W requirements, which are
102 *         however present in the description of the instruction
103 *
104 *    (^)  these are the two cases in which Intel and AMD disagree on the
105 *         primary exception class
106 */
107
108#define X86_OP_NONE { 0 },
109
110#define X86_OP_GROUP3(op, op0_, s0_, op1_, s1_, op2_, s2_, ...) { \
111    .decode = glue(decode_, op),                                  \
112    .op0 = glue(X86_TYPE_, op0_),                                 \
113    .s0 = glue(X86_SIZE_, s0_),                                   \
114    .op1 = glue(X86_TYPE_, op1_),                                 \
115    .s1 = glue(X86_SIZE_, s1_),                                   \
116    .op2 = glue(X86_TYPE_, op2_),                                 \
117    .s2 = glue(X86_SIZE_, s2_),                                   \
118    .is_decode = true,                                            \
119    ## __VA_ARGS__                                                \
120}
121
122#define X86_OP_GROUP2(op, op0, s0, op1, s1, ...)                  \
123    X86_OP_GROUP3(op, op0, s0, 2op, s0, op1, s1, ## __VA_ARGS__)
124#define X86_OP_GROUP0(op, ...)                                    \
125    X86_OP_GROUP3(op, None, None, None, None, None, None, ## __VA_ARGS__)
126
127#define X86_OP_ENTRY3(op, op0_, s0_, op1_, s1_, op2_, s2_, ...) { \
128    .gen = glue(gen_, op),                                        \
129    .op0 = glue(X86_TYPE_, op0_),                                 \
130    .s0 = glue(X86_SIZE_, s0_),                                   \
131    .op1 = glue(X86_TYPE_, op1_),                                 \
132    .s1 = glue(X86_SIZE_, s1_),                                   \
133    .op2 = glue(X86_TYPE_, op2_),                                 \
134    .s2 = glue(X86_SIZE_, s2_),                                   \
135    ## __VA_ARGS__                                                \
136}
137
138#define X86_OP_ENTRY4(op, op0_, s0_, op1_, s1_, op2_, s2_, ...)   \
139    X86_OP_ENTRY3(op, op0_, s0_, op1_, s1_, op2_, s2_,            \
140        .op3 = X86_TYPE_I, .s3 = X86_SIZE_b,                      \
141        ## __VA_ARGS__)
142
143#define X86_OP_ENTRY2(op, op0, s0, op1, s1, ...)                  \
144    X86_OP_ENTRY3(op, op0, s0, 2op, s0, op1, s1, ## __VA_ARGS__)
145#define X86_OP_ENTRYw(op, op0, s0, ...)                           \
146    X86_OP_ENTRY3(op, op0, s0, None, None, None, None, ## __VA_ARGS__)
147#define X86_OP_ENTRYr(op, op0, s0, ...)                           \
148    X86_OP_ENTRY3(op, None, None, None, None, op0, s0, ## __VA_ARGS__)
149#define X86_OP_ENTRY0(op, ...)                                    \
150    X86_OP_ENTRY3(op, None, None, None, None, None, None, ## __VA_ARGS__)
151
152#define cpuid(feat) .cpuid = X86_FEAT_##feat,
153#define xchg .special = X86_SPECIAL_Locked,
154#define lock .special = X86_SPECIAL_HasLock,
155#define mmx .special = X86_SPECIAL_MMX,
156#define op0_Rd .special = X86_SPECIAL_Op0_Rd,
157#define op2_Ry .special = X86_SPECIAL_Op2_Ry,
158#define avx_movx .special = X86_SPECIAL_AVXExtMov,
159#define sextT0 .special = X86_SPECIAL_SExtT0,
160#define zextT0 .special = X86_SPECIAL_ZExtT0,
161
162#define vex1 .vex_class = 1,
163#define vex1_rep3 .vex_class = 1, .vex_special = X86_VEX_REPScalar,
164#define vex2 .vex_class = 2,
165#define vex2_rep3 .vex_class = 2, .vex_special = X86_VEX_REPScalar,
166#define vex3 .vex_class = 3,
167#define vex4 .vex_class = 4,
168#define vex4_unal .vex_class = 4, .vex_special = X86_VEX_SSEUnaligned,
169#define vex4_rep5 .vex_class = 4, .vex_special = X86_VEX_REPScalar,
170#define vex5 .vex_class = 5,
171#define vex6 .vex_class = 6,
172#define vex7 .vex_class = 7,
173#define vex8 .vex_class = 8,
174#define vex11 .vex_class = 11,
175#define vex12 .vex_class = 12,
176#define vex13 .vex_class = 13,
177
178#define chk(a) .check = X86_CHECK_##a,
179#define svm(a) .intercept = SVM_EXIT_##a,
180
181#define avx2_256 .vex_special = X86_VEX_AVX2_256,
182
183#define P_00          1
184#define P_66          (1 << PREFIX_DATA)
185#define P_F3          (1 << PREFIX_REPZ)
186#define P_F2          (1 << PREFIX_REPNZ)
187
188#define p_00          .valid_prefix = P_00,
189#define p_66          .valid_prefix = P_66,
190#define p_f3          .valid_prefix = P_F3,
191#define p_f2          .valid_prefix = P_F2,
192#define p_00_66       .valid_prefix = P_00 | P_66,
193#define p_00_f3       .valid_prefix = P_00 | P_F3,
194#define p_66_f2       .valid_prefix = P_66 | P_F2,
195#define p_00_66_f3    .valid_prefix = P_00 | P_66 | P_F3,
196#define p_66_f3_f2    .valid_prefix = P_66 | P_F3 | P_F2,
197#define p_00_66_f3_f2 .valid_prefix = P_00 | P_66 | P_F3 | P_F2,
198
199static uint8_t get_modrm(DisasContext *s, CPUX86State *env)
200{
201    if (!s->has_modrm) {
202        s->modrm = x86_ldub_code(env, s);
203        s->has_modrm = true;
204    }
205    return s->modrm;
206}
207
208static inline const X86OpEntry *decode_by_prefix(DisasContext *s, const X86OpEntry entries[4])
209{
210    if (s->prefix & PREFIX_REPNZ) {
211        return &entries[3];
212    } else if (s->prefix & PREFIX_REPZ) {
213        return &entries[2];
214    } else if (s->prefix & PREFIX_DATA) {
215        return &entries[1];
216    } else {
217        return &entries[0];
218    }
219}
220
221static void decode_group15(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
222{
223    /* only includes ldmxcsr and stmxcsr, because they have AVX variants.  */
224    static const X86OpEntry group15_reg[8] = {
225    };
226
227    static const X86OpEntry group15_mem[8] = {
228        [2] = X86_OP_ENTRYr(LDMXCSR,    E,d, vex5 chk(VEX128)),
229        [3] = X86_OP_ENTRYw(STMXCSR,    E,d, vex5 chk(VEX128)),
230    };
231
232    uint8_t modrm = get_modrm(s, env);
233    if ((modrm >> 6) == 3) {
234        *entry = group15_reg[(modrm >> 3) & 7];
235    } else {
236        *entry = group15_mem[(modrm >> 3) & 7];
237    }
238}
239
240static void decode_group17(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
241{
242    static const X86GenFunc group17_gen[8] = {
243        NULL, gen_BLSR, gen_BLSMSK, gen_BLSI,
244    };
245    int op = (get_modrm(s, env) >> 3) & 7;
246    entry->gen = group17_gen[op];
247}
248
249static void decode_group12(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
250{
251    static const X86OpEntry opcodes_group12[8] = {
252        {},
253        {},
254        X86_OP_ENTRY3(PSRLW_i,  H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66),
255        {},
256        X86_OP_ENTRY3(PSRAW_i,  H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66),
257        {},
258        X86_OP_ENTRY3(PSLLW_i,  H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66),
259        {},
260    };
261
262    int op = (get_modrm(s, env) >> 3) & 7;
263    *entry = opcodes_group12[op];
264}
265
266static void decode_group13(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
267{
268    static const X86OpEntry opcodes_group13[8] = {
269        {},
270        {},
271        X86_OP_ENTRY3(PSRLD_i,  H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66),
272        {},
273        X86_OP_ENTRY3(PSRAD_i,  H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66),
274        {},
275        X86_OP_ENTRY3(PSLLD_i,  H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66),
276        {},
277    };
278
279    int op = (get_modrm(s, env) >> 3) & 7;
280    *entry = opcodes_group13[op];
281}
282
283static void decode_group14(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
284{
285    static const X86OpEntry opcodes_group14[8] = {
286        /* grp14 */
287        {},
288        {},
289        X86_OP_ENTRY3(PSRLQ_i,  H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66),
290        X86_OP_ENTRY3(PSRLDQ_i, H,x, U,x, I,b, vex7 avx2_256 p_66),
291        {},
292        {},
293        X86_OP_ENTRY3(PSLLQ_i,  H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66),
294        X86_OP_ENTRY3(PSLLDQ_i, H,x, U,x, I,b, vex7 avx2_256 p_66),
295    };
296
297    int op = (get_modrm(s, env) >> 3) & 7;
298    *entry = opcodes_group14[op];
299}
300
301static void decode_0F6F(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
302{
303    static const X86OpEntry opcodes_0F6F[4] = {
304        X86_OP_ENTRY3(MOVDQ,       P,q, None,None, Q,q, vex5 mmx),  /* movq */
305        X86_OP_ENTRY3(MOVDQ,       V,x, None,None, W,x, vex1),      /* movdqa */
306        X86_OP_ENTRY3(MOVDQ,       V,x, None,None, W,x, vex4_unal), /* movdqu */
307        {},
308    };
309    *entry = *decode_by_prefix(s, opcodes_0F6F);
310}
311
312static void decode_0F70(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
313{
314    static const X86OpEntry pshufw[4] = {
315        X86_OP_ENTRY3(PSHUFW,  P,q, Q,q, I,b, vex4 mmx),
316        X86_OP_ENTRY3(PSHUFD,  V,x, W,x, I,b, vex4 avx2_256),
317        X86_OP_ENTRY3(PSHUFHW, V,x, W,x, I,b, vex4 avx2_256),
318        X86_OP_ENTRY3(PSHUFLW, V,x, W,x, I,b, vex4 avx2_256),
319    };
320
321    *entry = *decode_by_prefix(s, pshufw);
322}
323
324static void decode_0F77(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
325{
326    if (!(s->prefix & PREFIX_VEX)) {
327        entry->gen = gen_EMMS;
328    } else if (!s->vex_l) {
329        entry->gen = gen_VZEROUPPER;
330        entry->vex_class = 8;
331    } else {
332        entry->gen = gen_VZEROALL;
333        entry->vex_class = 8;
334    }
335}
336
337static void decode_0F78(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
338{
339    static const X86OpEntry opcodes_0F78[4] = {
340        {},
341        X86_OP_ENTRY3(EXTRQ_i,       V,x, None,None, I,w,  cpuid(SSE4A)), /* AMD extension */
342        {},
343        X86_OP_ENTRY3(INSERTQ_i,     V,x, U,x, I,w,        cpuid(SSE4A)), /* AMD extension */
344    };
345    *entry = *decode_by_prefix(s, opcodes_0F78);
346}
347
348static void decode_0F79(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
349{
350    if (s->prefix & PREFIX_REPNZ) {
351        entry->gen = gen_INSERTQ_r; /* AMD extension */
352    } else if (s->prefix & PREFIX_DATA) {
353        entry->gen = gen_EXTRQ_r; /* AMD extension */
354    } else {
355        entry->gen = NULL;
356    };
357}
358
359static void decode_0F7E(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
360{
361    static const X86OpEntry opcodes_0F7E[4] = {
362        X86_OP_ENTRY3(MOVD_from,  E,y, None,None, P,y, vex5 mmx),
363        X86_OP_ENTRY3(MOVD_from,  E,y, None,None, V,y, vex5),
364        X86_OP_ENTRY3(MOVQ,       V,x, None,None, W,q, vex5),  /* wrong dest Vy on SDM! */
365        {},
366    };
367    *entry = *decode_by_prefix(s, opcodes_0F7E);
368}
369
370static void decode_0F7F(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
371{
372    static const X86OpEntry opcodes_0F7F[4] = {
373        X86_OP_ENTRY3(MOVDQ,       W,x, None,None, V,x, vex5 mmx), /* movq */
374        X86_OP_ENTRY3(MOVDQ,       W,x, None,None, V,x, vex1), /* movdqa */
375        X86_OP_ENTRY3(MOVDQ,       W,x, None,None, V,x, vex4_unal), /* movdqu */
376        {},
377    };
378    *entry = *decode_by_prefix(s, opcodes_0F7F);
379}
380
381static void decode_0FD6(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
382{
383    static const X86OpEntry movq[4] = {
384        {},
385        X86_OP_ENTRY3(MOVQ,    W,x,  None, None, V,q, vex5),
386        X86_OP_ENTRY3(MOVq_dq, V,dq, None, None, N,q),
387        X86_OP_ENTRY3(MOVq_dq, P,q,  None, None, U,q),
388    };
389
390    *entry = *decode_by_prefix(s, movq);
391}
392
393static const X86OpEntry opcodes_0F38_00toEF[240] = {
394    [0x00] = X86_OP_ENTRY3(PSHUFB,    V,x,  H,x,   W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
395    [0x01] = X86_OP_ENTRY3(PHADDW,    V,x,  H,x,   W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
396    [0x02] = X86_OP_ENTRY3(PHADDD,    V,x,  H,x,   W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
397    [0x03] = X86_OP_ENTRY3(PHADDSW,   V,x,  H,x,   W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
398    [0x04] = X86_OP_ENTRY3(PMADDUBSW, V,x,  H,x,   W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
399    [0x05] = X86_OP_ENTRY3(PHSUBW,    V,x,  H,x,   W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
400    [0x06] = X86_OP_ENTRY3(PHSUBD,    V,x,  H,x,   W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
401    [0x07] = X86_OP_ENTRY3(PHSUBSW,   V,x,  H,x,   W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
402
403    [0x10] = X86_OP_ENTRY2(PBLENDVB,  V,x,         W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
404    [0x13] = X86_OP_ENTRY2(VCVTPH2PS, V,x,         W,xh, vex11 chk(W0) cpuid(F16C) p_66),
405    [0x14] = X86_OP_ENTRY2(BLENDVPS,  V,x,         W,x,  vex4 cpuid(SSE41) p_66),
406    [0x15] = X86_OP_ENTRY2(BLENDVPD,  V,x,         W,x,  vex4 cpuid(SSE41) p_66),
407    /* Listed incorrectly as type 4 */
408    [0x16] = X86_OP_ENTRY3(VPERMD,    V,qq, H,qq,      W,qq,  vex6 chk(W0) cpuid(AVX2) p_66), /* vpermps */
409    [0x17] = X86_OP_ENTRY3(VPTEST,    None,None, V,x,  W,x,   vex4 cpuid(SSE41) p_66),
410
411    /*
412     * Source operand listed as Mq/Ux and similar in the manual; incorrectly listed
413     * as 128-bit only in 2-17.
414     */
415    [0x20] = X86_OP_ENTRY3(VPMOVSXBW, V,x,  None,None, W,q,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
416    [0x21] = X86_OP_ENTRY3(VPMOVSXBD, V,x,  None,None, W,d,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
417    [0x22] = X86_OP_ENTRY3(VPMOVSXBQ, V,x,  None,None, W,w,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
418    [0x23] = X86_OP_ENTRY3(VPMOVSXWD, V,x,  None,None, W,q,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
419    [0x24] = X86_OP_ENTRY3(VPMOVSXWQ, V,x,  None,None, W,d,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
420    [0x25] = X86_OP_ENTRY3(VPMOVSXDQ, V,x,  None,None, W,q,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
421
422    /* Same as PMOVSX.  */
423    [0x30] = X86_OP_ENTRY3(VPMOVZXBW, V,x,  None,None, W,q,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
424    [0x31] = X86_OP_ENTRY3(VPMOVZXBD, V,x,  None,None, W,d,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
425    [0x32] = X86_OP_ENTRY3(VPMOVZXBQ, V,x,  None,None, W,w,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
426    [0x33] = X86_OP_ENTRY3(VPMOVZXWD, V,x,  None,None, W,q,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
427    [0x34] = X86_OP_ENTRY3(VPMOVZXWQ, V,x,  None,None, W,d,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
428    [0x35] = X86_OP_ENTRY3(VPMOVZXDQ, V,x,  None,None, W,q,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
429    [0x36] = X86_OP_ENTRY3(VPERMD,    V,qq, H,qq,      W,qq,  vex6 chk(W0) cpuid(AVX2) p_66),
430    [0x37] = X86_OP_ENTRY3(PCMPGTQ,   V,x,  H,x,       W,x,   vex4 cpuid(SSE42) avx2_256 p_66),
431
432    [0x40] = X86_OP_ENTRY3(PMULLD,      V,x,  H,x,       W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
433    [0x41] = X86_OP_ENTRY3(VPHMINPOSUW, V,dq, None,None, W,dq, vex4 cpuid(SSE41) p_66),
434    /* Listed incorrectly as type 4 */
435    [0x45] = X86_OP_ENTRY3(VPSRLV,      V,x,  H,x,       W,x,  vex6 cpuid(AVX2) p_66),
436    [0x46] = X86_OP_ENTRY3(VPSRAV,      V,x,  H,x,       W,x,  vex6 chk(W0) cpuid(AVX2) p_66),
437    [0x47] = X86_OP_ENTRY3(VPSLLV,      V,x,  H,x,       W,x,  vex6 cpuid(AVX2) p_66),
438
439    [0x90] = X86_OP_ENTRY3(VPGATHERD, V,x,  H,x,  M,d,  vex12 cpuid(AVX2) p_66), /* vpgatherdd/q */
440    [0x91] = X86_OP_ENTRY3(VPGATHERQ, V,x,  H,x,  M,q,  vex12 cpuid(AVX2) p_66), /* vpgatherqd/q */
441    [0x92] = X86_OP_ENTRY3(VPGATHERD, V,x,  H,x,  M,d,  vex12 cpuid(AVX2) p_66), /* vgatherdps/d */
442    [0x93] = X86_OP_ENTRY3(VPGATHERQ, V,x,  H,x,  M,q,  vex12 cpuid(AVX2) p_66), /* vgatherqps/d */
443
444    /* Should be exception type 2 but they do not have legacy SSE equivalents? */
445    [0x96] = X86_OP_ENTRY3(VFMADDSUB132Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
446    [0x97] = X86_OP_ENTRY3(VFMSUBADD132Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
447
448    [0xa6] = X86_OP_ENTRY3(VFMADDSUB213Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
449    [0xa7] = X86_OP_ENTRY3(VFMSUBADD213Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
450
451    [0xb6] = X86_OP_ENTRY3(VFMADDSUB231Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
452    [0xb7] = X86_OP_ENTRY3(VFMSUBADD231Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
453
454    [0x08] = X86_OP_ENTRY3(PSIGNB,    V,x,        H,x,  W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
455    [0x09] = X86_OP_ENTRY3(PSIGNW,    V,x,        H,x,  W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
456    [0x0a] = X86_OP_ENTRY3(PSIGND,    V,x,        H,x,  W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
457    [0x0b] = X86_OP_ENTRY3(PMULHRSW,  V,x,        H,x,  W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
458    /* Listed incorrectly as type 4 */
459    [0x0c] = X86_OP_ENTRY3(VPERMILPS, V,x,        H,x,  W,x,  vex6 chk(W0) cpuid(AVX) p_00_66),
460    [0x0d] = X86_OP_ENTRY3(VPERMILPD, V,x,        H,x,  W,x,  vex6 chk(W0) cpuid(AVX) p_66),
461    [0x0e] = X86_OP_ENTRY3(VTESTPS,   None,None,  V,x,  W,x,  vex6 chk(W0) cpuid(AVX) p_66),
462    [0x0f] = X86_OP_ENTRY3(VTESTPD,   None,None,  V,x,  W,x,  vex6 chk(W0) cpuid(AVX) p_66),
463
464    [0x18] = X86_OP_ENTRY3(VPBROADCASTD,   V,x,  None,None, W,d,  vex6 chk(W0) cpuid(AVX) p_66), /* vbroadcastss */
465    [0x19] = X86_OP_ENTRY3(VPBROADCASTQ,   V,qq, None,None, W,q,  vex6 chk(W0) cpuid(AVX) p_66), /* vbroadcastsd */
466    [0x1a] = X86_OP_ENTRY3(VBROADCASTx128, V,qq, None,None, WM,dq,vex6 chk(W0) cpuid(AVX) p_66),
467    [0x1c] = X86_OP_ENTRY3(PABSB,          V,x,  None,None, W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
468    [0x1d] = X86_OP_ENTRY3(PABSW,          V,x,  None,None, W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
469    [0x1e] = X86_OP_ENTRY3(PABSD,          V,x,  None,None, W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
470
471    [0x28] = X86_OP_ENTRY3(PMULDQ,        V,x, H,x,       W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
472    [0x29] = X86_OP_ENTRY3(PCMPEQQ,       V,x, H,x,       W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
473    [0x2a] = X86_OP_ENTRY3(MOVDQ,         V,x, None,None, WM,x, vex1 cpuid(SSE41) avx2_256 p_66), /* movntdqa */
474    [0x2b] = X86_OP_ENTRY3(VPACKUSDW,     V,x, H,x,       W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
475    [0x2c] = X86_OP_ENTRY3(VMASKMOVPS,    V,x, H,x,       WM,x, vex6 chk(W0) cpuid(AVX) p_66),
476    [0x2d] = X86_OP_ENTRY3(VMASKMOVPD,    V,x, H,x,       WM,x, vex6 chk(W0) cpuid(AVX) p_66),
477    /* Incorrectly listed as Mx,Hx,Vx in the manual */
478    [0x2e] = X86_OP_ENTRY3(VMASKMOVPS_st, M,x, V,x,       H,x,  vex6 chk(W0) cpuid(AVX) p_66),
479    [0x2f] = X86_OP_ENTRY3(VMASKMOVPD_st, M,x, V,x,       H,x,  vex6 chk(W0) cpuid(AVX) p_66),
480
481    [0x38] = X86_OP_ENTRY3(PMINSB,        V,x,  H,x, W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
482    [0x39] = X86_OP_ENTRY3(PMINSD,        V,x,  H,x, W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
483    [0x3a] = X86_OP_ENTRY3(PMINUW,        V,x,  H,x, W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
484    [0x3b] = X86_OP_ENTRY3(PMINUD,        V,x,  H,x, W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
485    [0x3c] = X86_OP_ENTRY3(PMAXSB,        V,x,  H,x, W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
486    [0x3d] = X86_OP_ENTRY3(PMAXSD,        V,x,  H,x, W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
487    [0x3e] = X86_OP_ENTRY3(PMAXUW,        V,x,  H,x, W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
488    [0x3f] = X86_OP_ENTRY3(PMAXUD,        V,x,  H,x, W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
489
490    /* VPBROADCASTQ not listed as W0 in table 2-16 */
491    [0x58] = X86_OP_ENTRY3(VPBROADCASTD,   V,x,  None,None, W,d,  vex6 chk(W0) cpuid(AVX2) p_66),
492    [0x59] = X86_OP_ENTRY3(VPBROADCASTQ,   V,x,  None,None, W,q,  vex6 chk(W0) cpuid(AVX2) p_66),
493    [0x5a] = X86_OP_ENTRY3(VBROADCASTx128, V,qq, None,None, WM,dq,vex6 chk(W0) cpuid(AVX2) p_66),
494
495    [0x78] = X86_OP_ENTRY3(VPBROADCASTB,   V,x,  None,None, W,b,  vex6 chk(W0) cpuid(AVX2) p_66),
496    [0x79] = X86_OP_ENTRY3(VPBROADCASTW,   V,x,  None,None, W,w,  vex6 chk(W0) cpuid(AVX2) p_66),
497
498    [0x8c] = X86_OP_ENTRY3(VPMASKMOV,    V,x,  H,x, WM,x, vex6 cpuid(AVX2) p_66),
499    [0x8e] = X86_OP_ENTRY3(VPMASKMOV_st, M,x,  V,x, H,x,  vex6 cpuid(AVX2) p_66),
500
501    /* Should be exception type 2 or 3 but they do not have legacy SSE equivalents? */
502    [0x98] = X86_OP_ENTRY3(VFMADD132Px,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
503    [0x99] = X86_OP_ENTRY3(VFMADD132Sx,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
504    [0x9a] = X86_OP_ENTRY3(VFMSUB132Px,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
505    [0x9b] = X86_OP_ENTRY3(VFMSUB132Sx,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
506    [0x9c] = X86_OP_ENTRY3(VFNMADD132Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
507    [0x9d] = X86_OP_ENTRY3(VFNMADD132Sx, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
508    [0x9e] = X86_OP_ENTRY3(VFNMSUB132Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
509    [0x9f] = X86_OP_ENTRY3(VFNMSUB132Sx, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
510
511    [0xa8] = X86_OP_ENTRY3(VFMADD213Px,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
512    [0xa9] = X86_OP_ENTRY3(VFMADD213Sx,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
513    [0xaa] = X86_OP_ENTRY3(VFMSUB213Px,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
514    [0xab] = X86_OP_ENTRY3(VFMSUB213Sx,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
515    [0xac] = X86_OP_ENTRY3(VFNMADD213Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
516    [0xad] = X86_OP_ENTRY3(VFNMADD213Sx, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
517    [0xae] = X86_OP_ENTRY3(VFNMSUB213Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
518    [0xaf] = X86_OP_ENTRY3(VFNMSUB213Sx, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
519
520    [0xb8] = X86_OP_ENTRY3(VFMADD231Px,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
521    [0xb9] = X86_OP_ENTRY3(VFMADD231Sx,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
522    [0xba] = X86_OP_ENTRY3(VFMSUB231Px,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
523    [0xbb] = X86_OP_ENTRY3(VFMSUB231Sx,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
524    [0xbc] = X86_OP_ENTRY3(VFNMADD231Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
525    [0xbd] = X86_OP_ENTRY3(VFNMADD231Sx, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
526    [0xbe] = X86_OP_ENTRY3(VFNMSUB231Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
527    [0xbf] = X86_OP_ENTRY3(VFNMSUB231Sx, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
528
529    [0xc8] = X86_OP_ENTRY2(SHA1NEXTE,   V,dq, W,dq, cpuid(SHA_NI)),
530    [0xc9] = X86_OP_ENTRY2(SHA1MSG1,    V,dq, W,dq, cpuid(SHA_NI)),
531    [0xca] = X86_OP_ENTRY2(SHA1MSG2,    V,dq, W,dq, cpuid(SHA_NI)),
532    [0xcb] = X86_OP_ENTRY2(SHA256RNDS2, V,dq, W,dq, cpuid(SHA_NI)),
533    [0xcc] = X86_OP_ENTRY2(SHA256MSG1,  V,dq, W,dq, cpuid(SHA_NI)),
534    [0xcd] = X86_OP_ENTRY2(SHA256MSG2,  V,dq, W,dq, cpuid(SHA_NI)),
535
536    [0xdb] = X86_OP_ENTRY3(VAESIMC,     V,dq, None,None, W,dq, vex4 cpuid(AES) p_66),
537    [0xdc] = X86_OP_ENTRY3(VAESENC,     V,x,  H,x,       W,x,  vex4 cpuid(AES) p_66),
538    [0xdd] = X86_OP_ENTRY3(VAESENCLAST, V,x,  H,x,       W,x,  vex4 cpuid(AES) p_66),
539    [0xde] = X86_OP_ENTRY3(VAESDEC,     V,x,  H,x,       W,x,  vex4 cpuid(AES) p_66),
540    [0xdf] = X86_OP_ENTRY3(VAESDECLAST, V,x,  H,x,       W,x,  vex4 cpuid(AES) p_66),
541
542    /*
543     * REG selects srcdest2 operand, VEX.vvvv selects src3.  VEX class not found
544     * in manual, assumed to be 13 from the VEX.L0 constraint.
545     */
546    [0xe0] = X86_OP_ENTRY3(CMPccXADD,   M,y, G,y, B,y,  vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
547    [0xe1] = X86_OP_ENTRY3(CMPccXADD,   M,y, G,y, B,y,  vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
548    [0xe2] = X86_OP_ENTRY3(CMPccXADD,   M,y, G,y, B,y,  vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
549    [0xe3] = X86_OP_ENTRY3(CMPccXADD,   M,y, G,y, B,y,  vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
550    [0xe4] = X86_OP_ENTRY3(CMPccXADD,   M,y, G,y, B,y,  vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
551    [0xe5] = X86_OP_ENTRY3(CMPccXADD,   M,y, G,y, B,y,  vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
552    [0xe6] = X86_OP_ENTRY3(CMPccXADD,   M,y, G,y, B,y,  vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
553    [0xe7] = X86_OP_ENTRY3(CMPccXADD,   M,y, G,y, B,y,  vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
554
555    [0xe8] = X86_OP_ENTRY3(CMPccXADD,   M,y, G,y, B,y,  vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
556    [0xe9] = X86_OP_ENTRY3(CMPccXADD,   M,y, G,y, B,y,  vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
557    [0xea] = X86_OP_ENTRY3(CMPccXADD,   M,y, G,y, B,y,  vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
558    [0xeb] = X86_OP_ENTRY3(CMPccXADD,   M,y, G,y, B,y,  vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
559    [0xec] = X86_OP_ENTRY3(CMPccXADD,   M,y, G,y, B,y,  vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
560    [0xed] = X86_OP_ENTRY3(CMPccXADD,   M,y, G,y, B,y,  vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
561    [0xee] = X86_OP_ENTRY3(CMPccXADD,   M,y, G,y, B,y,  vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
562    [0xef] = X86_OP_ENTRY3(CMPccXADD,   M,y, G,y, B,y,  vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
563};
564
565/* five rows for no prefix, 66, F3, F2, 66+F2  */
566static const X86OpEntry opcodes_0F38_F0toFF[16][5] = {
567    [0] = {
568        X86_OP_ENTRY3(MOVBE, G,y, M,y, None,None, cpuid(MOVBE)),
569        X86_OP_ENTRY3(MOVBE, G,w, M,w, None,None, cpuid(MOVBE)),
570        {},
571        X86_OP_ENTRY2(CRC32, G,d, E,b, cpuid(SSE42)),
572        X86_OP_ENTRY2(CRC32, G,d, E,b, cpuid(SSE42)),
573    },
574    [1] = {
575        X86_OP_ENTRY3(MOVBE, M,y, G,y, None,None, cpuid(MOVBE)),
576        X86_OP_ENTRY3(MOVBE, M,w, G,w, None,None, cpuid(MOVBE)),
577        {},
578        X86_OP_ENTRY2(CRC32, G,d, E,y, cpuid(SSE42)),
579        X86_OP_ENTRY2(CRC32, G,d, E,w, cpuid(SSE42)),
580    },
581    [2] = {
582        X86_OP_ENTRY3(ANDN, G,y, B,y, E,y, vex13 cpuid(BMI1)),
583        {},
584        {},
585        {},
586        {},
587    },
588    [3] = {
589        X86_OP_GROUP3(group17, B,y, E,y, None,None, vex13 cpuid(BMI1)),
590        {},
591        {},
592        {},
593        {},
594    },
595    [5] = {
596        X86_OP_ENTRY3(BZHI, G,y, E,y, B,y, vex13 cpuid(BMI1)),
597        {},
598        X86_OP_ENTRY3(PEXT, G,y, B,y, E,y, vex13 zextT0 cpuid(BMI2)),
599        X86_OP_ENTRY3(PDEP, G,y, B,y, E,y, vex13 zextT0 cpuid(BMI2)),
600        {},
601    },
602    [6] = {
603        {},
604        X86_OP_ENTRY2(ADCX, G,y, E,y, cpuid(ADX)),
605        X86_OP_ENTRY2(ADOX, G,y, E,y, cpuid(ADX)),
606        X86_OP_ENTRY3(MULX, /* B,y, */ G,y, E,y, 2,y, vex13 cpuid(BMI2)),
607        {},
608    },
609    [7] = {
610        X86_OP_ENTRY3(BEXTR, G,y, E,y, B,y, vex13 zextT0 cpuid(BMI1)),
611        X86_OP_ENTRY3(SHLX, G,y, E,y, B,y, vex13 cpuid(BMI1)),
612        X86_OP_ENTRY3(SARX, G,y, E,y, B,y, vex13 sextT0 cpuid(BMI1)),
613        X86_OP_ENTRY3(SHRX, G,y, E,y, B,y, vex13 zextT0 cpuid(BMI1)),
614        {},
615    },
616};
617
618static void decode_0F38(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
619{
620    *b = x86_ldub_code(env, s);
621    if (*b < 0xf0) {
622        *entry = opcodes_0F38_00toEF[*b];
623    } else {
624        int row = 0;
625        if (s->prefix & PREFIX_REPZ) {
626            /* The REPZ (F3) prefix has priority over 66 */
627            row = 2;
628        } else {
629            row += s->prefix & PREFIX_REPNZ ? 3 : 0;
630            row += s->prefix & PREFIX_DATA ? 1 : 0;
631        }
632        *entry = opcodes_0F38_F0toFF[*b & 15][row];
633    }
634}
635
636static void decode_VINSERTPS(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
637{
638    static const X86OpEntry
639        vinsertps_reg = X86_OP_ENTRY4(VINSERTPS_r, V,dq, H,dq, U,dq, vex5 cpuid(SSE41) p_66),
640        vinsertps_mem = X86_OP_ENTRY4(VINSERTPS_m, V,dq, H,dq, M,d,  vex5 cpuid(SSE41) p_66);
641
642    int modrm = get_modrm(s, env);
643    *entry = (modrm >> 6) == 3 ? vinsertps_reg : vinsertps_mem;
644}
645
646static const X86OpEntry opcodes_0F3A[256] = {
647    /*
648     * These are VEX-only, but incorrectly listed in the manual as exception type 4.
649     * Also the "qq" instructions are sometimes omitted by Table 2-17, but are VEX256
650     * only.
651     */
652    [0x00] = X86_OP_ENTRY3(VPERMQ,      V,qq, W,qq, I,b,  vex6 chk(W1) cpuid(AVX2) p_66),
653    [0x01] = X86_OP_ENTRY3(VPERMQ,      V,qq, W,qq, I,b,  vex6 chk(W1) cpuid(AVX2) p_66), /* VPERMPD */
654    [0x02] = X86_OP_ENTRY4(VBLENDPS,    V,x,  H,x,  W,x,  vex6 chk(W0) cpuid(AVX2) p_66), /* VPBLENDD */
655    [0x04] = X86_OP_ENTRY3(VPERMILPS_i, V,x,  W,x,  I,b,  vex6 chk(W0) cpuid(AVX) p_66),
656    [0x05] = X86_OP_ENTRY3(VPERMILPD_i, V,x,  W,x,  I,b,  vex6 chk(W0) cpuid(AVX) p_66),
657    [0x06] = X86_OP_ENTRY4(VPERM2x128,  V,qq, H,qq, W,qq, vex6 chk(W0) cpuid(AVX) p_66),
658
659    [0x14] = X86_OP_ENTRY3(PEXTRB,     E,b,  V,dq, I,b,  vex5 cpuid(SSE41) op0_Rd p_66),
660    [0x15] = X86_OP_ENTRY3(PEXTRW,     E,w,  V,dq, I,b,  vex5 cpuid(SSE41) op0_Rd p_66),
661    [0x16] = X86_OP_ENTRY3(PEXTR,      E,y,  V,dq, I,b,  vex5 cpuid(SSE41) p_66),
662    [0x17] = X86_OP_ENTRY3(VEXTRACTPS, E,d,  V,dq, I,b,  vex5 cpuid(SSE41) p_66),
663    [0x1d] = X86_OP_ENTRY3(VCVTPS2PH,  W,xh, V,x,  I,b,  vex11 chk(W0) cpuid(F16C) p_66),
664
665    [0x20] = X86_OP_ENTRY4(PINSRB,     V,dq, H,dq, E,b,  vex5 cpuid(SSE41) op2_Ry p_66),
666    [0x21] = X86_OP_GROUP0(VINSERTPS),
667    [0x22] = X86_OP_ENTRY4(PINSR,      V,dq, H,dq, E,y,  vex5 cpuid(SSE41) p_66),
668
669    [0x40] = X86_OP_ENTRY4(VDDPS,      V,x,  H,x,  W,x,  vex2 cpuid(SSE41) p_66),
670    [0x41] = X86_OP_ENTRY4(VDDPD,      V,dq, H,dq, W,dq, vex2 cpuid(SSE41) p_66),
671    [0x42] = X86_OP_ENTRY4(VMPSADBW,   V,x,  H,x,  W,x,  vex2 cpuid(SSE41) avx2_256 p_66),
672    [0x44] = X86_OP_ENTRY4(PCLMULQDQ,  V,dq, H,dq, W,dq, vex4 cpuid(PCLMULQDQ) p_66),
673    [0x46] = X86_OP_ENTRY4(VPERM2x128, V,qq, H,qq, W,qq, vex6 chk(W0) cpuid(AVX2) p_66),
674
675    [0x60] = X86_OP_ENTRY4(PCMPESTRM,  None,None, V,dq, W,dq, vex4_unal cpuid(SSE42) p_66),
676    [0x61] = X86_OP_ENTRY4(PCMPESTRI,  None,None, V,dq, W,dq, vex4_unal cpuid(SSE42) p_66),
677    [0x62] = X86_OP_ENTRY4(PCMPISTRM,  None,None, V,dq, W,dq, vex4_unal cpuid(SSE42) p_66),
678    [0x63] = X86_OP_ENTRY4(PCMPISTRI,  None,None, V,dq, W,dq, vex4_unal cpuid(SSE42) p_66),
679
680    [0x08] = X86_OP_ENTRY3(VROUNDPS,   V,x,  W,x,  I,b,  vex2 cpuid(SSE41) p_66),
681    [0x09] = X86_OP_ENTRY3(VROUNDPD,   V,x,  W,x,  I,b,  vex2 cpuid(SSE41) p_66),
682    /*
683     * Not listed as four operand in the manual.  Also writes and reads 128-bits
684     * from the first two operands due to the V operand picking higher entries of
685     * the H operand; the "Vss,Hss,Wss" description from the manual is incorrect.
686     * For other unary operations such as VSQRTSx this is hidden by the "REPScalar"
687     * value of vex_special, because the table lists the operand types of VSQRTPx.
688     */
689    [0x0a] = X86_OP_ENTRY4(VROUNDSS,   V,x,  H,x, W,ss, vex3 cpuid(SSE41) p_66),
690    [0x0b] = X86_OP_ENTRY4(VROUNDSD,   V,x,  H,x, W,sd, vex3 cpuid(SSE41) p_66),
691    [0x0c] = X86_OP_ENTRY4(VBLENDPS,   V,x,  H,x,  W,x,  vex4 cpuid(SSE41) p_66),
692    [0x0d] = X86_OP_ENTRY4(VBLENDPD,   V,x,  H,x,  W,x,  vex4 cpuid(SSE41) p_66),
693    [0x0e] = X86_OP_ENTRY4(VPBLENDW,   V,x,  H,x,  W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
694    [0x0f] = X86_OP_ENTRY4(PALIGNR,    V,x,  H,x,  W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
695
696    [0x18] = X86_OP_ENTRY4(VINSERTx128,  V,qq, H,qq, W,qq, vex6 chk(W0) cpuid(AVX) p_66),
697    [0x19] = X86_OP_ENTRY3(VEXTRACTx128, W,dq, V,qq, I,b,  vex6 chk(W0) cpuid(AVX) p_66),
698
699    [0x38] = X86_OP_ENTRY4(VINSERTx128,  V,qq, H,qq, W,qq, vex6 chk(W0) cpuid(AVX2) p_66),
700    [0x39] = X86_OP_ENTRY3(VEXTRACTx128, W,dq, V,qq, I,b,  vex6 chk(W0) cpuid(AVX2) p_66),
701
702    /* Listed incorrectly as type 4 */
703    [0x4a] = X86_OP_ENTRY4(VBLENDVPS, V,x,  H,x,  W,x,   vex6 chk(W0) cpuid(AVX) p_66),
704    [0x4b] = X86_OP_ENTRY4(VBLENDVPD, V,x,  H,x,  W,x,   vex6 chk(W0) cpuid(AVX) p_66),
705    [0x4c] = X86_OP_ENTRY4(VPBLENDVB, V,x,  H,x,  W,x,   vex6 chk(W0) cpuid(AVX) p_66 avx2_256),
706
707    [0xcc] = X86_OP_ENTRY3(SHA1RNDS4,  V,dq, W,dq, I,b,  cpuid(SHA_NI)),
708
709    [0xdf] = X86_OP_ENTRY3(VAESKEYGEN, V,dq, W,dq, I,b,  vex4 cpuid(AES) p_66),
710
711    [0xF0] = X86_OP_ENTRY3(RORX, G,y, E,y, I,b, vex13 cpuid(BMI2) p_f2),
712};
713
714static void decode_0F3A(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
715{
716    *b = x86_ldub_code(env, s);
717    *entry = opcodes_0F3A[*b];
718}
719
720/*
721 * There are some mistakes in the operands in the manual, and the load/store/register
722 * cases are easiest to keep separate, so the entries for 10-17 follow simplicity and
723 * efficiency of implementation rather than copying what the manual says.
724 *
725 * In particular:
726 *
727 * 1) "VMOVSS m32, xmm1" and "VMOVSD m64, xmm1" do not support VEX.vvvv != 1111b,
728 * but this is not mentioned in the tables.
729 *
730 * 2) MOVHLPS, MOVHPS, MOVHPD, MOVLPD, MOVLPS read the high quadword of one of their
731 * operands, which must therefore be dq; MOVLPD and MOVLPS also write the high
732 * quadword of the V operand.
733 */
734static void decode_0F10(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
735{
736    static const X86OpEntry opcodes_0F10_reg[4] = {
737        X86_OP_ENTRY3(MOVDQ,   V,x,  None,None, W,x, vex4_unal), /* MOVUPS */
738        X86_OP_ENTRY3(MOVDQ,   V,x,  None,None, W,x, vex4_unal), /* MOVUPD */
739        X86_OP_ENTRY3(VMOVSS,  V,x,  H,x,       W,x, vex5),
740        X86_OP_ENTRY3(VMOVLPx, V,x,  H,x,       W,x, vex5), /* MOVSD */
741    };
742
743    static const X86OpEntry opcodes_0F10_mem[4] = {
744        X86_OP_ENTRY3(MOVDQ,      V,x,  None,None, W,x,  vex4_unal), /* MOVUPS */
745        X86_OP_ENTRY3(MOVDQ,      V,x,  None,None, W,x,  vex4_unal), /* MOVUPD */
746        X86_OP_ENTRY3(VMOVSS_ld,  V,x,  H,x,       M,ss, vex5),
747        X86_OP_ENTRY3(VMOVSD_ld,  V,x,  H,x,       M,sd, vex5),
748    };
749
750    if ((get_modrm(s, env) >> 6) == 3) {
751        *entry = *decode_by_prefix(s, opcodes_0F10_reg);
752    } else {
753        *entry = *decode_by_prefix(s, opcodes_0F10_mem);
754    }
755}
756
757static void decode_0F11(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
758{
759    static const X86OpEntry opcodes_0F11_reg[4] = {
760        X86_OP_ENTRY3(MOVDQ,   W,x,  None,None, V,x, vex4), /* MOVUPS */
761        X86_OP_ENTRY3(MOVDQ,   W,x,  None,None, V,x, vex4), /* MOVUPD */
762        X86_OP_ENTRY3(VMOVSS,  W,x,  H,x,       V,x, vex5),
763        X86_OP_ENTRY3(VMOVLPx, W,x,  H,x,       V,q, vex5), /* MOVSD */
764    };
765
766    static const X86OpEntry opcodes_0F11_mem[4] = {
767        X86_OP_ENTRY3(MOVDQ,      W,x,  None,None, V,x, vex4), /* MOVUPS */
768        X86_OP_ENTRY3(MOVDQ,      W,x,  None,None, V,x, vex4), /* MOVUPD */
769        X86_OP_ENTRY3(VMOVSS_st,  M,ss, None,None, V,x, vex5),
770        X86_OP_ENTRY3(VMOVLPx_st, M,sd, None,None, V,x, vex5), /* MOVSD */
771    };
772
773    if ((get_modrm(s, env) >> 6) == 3) {
774        *entry = *decode_by_prefix(s, opcodes_0F11_reg);
775    } else {
776        *entry = *decode_by_prefix(s, opcodes_0F11_mem);
777    }
778}
779
780static void decode_0F12(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
781{
782    static const X86OpEntry opcodes_0F12_mem[4] = {
783        /*
784         * Use dq for operand for compatibility with gen_MOVSD and
785         * to allow VEX128 only.
786         */
787        X86_OP_ENTRY3(VMOVLPx_ld, V,dq, H,dq,      M,q, vex5), /* MOVLPS */
788        X86_OP_ENTRY3(VMOVLPx_ld, V,dq, H,dq,      M,q, vex5), /* MOVLPD */
789        X86_OP_ENTRY3(VMOVSLDUP,  V,x,  None,None, W,x, vex4 cpuid(SSE3)),
790        X86_OP_ENTRY3(VMOVDDUP,   V,x,  None,None, WM,q, vex5 cpuid(SSE3)), /* qq if VEX.256 */
791    };
792    static const X86OpEntry opcodes_0F12_reg[4] = {
793        X86_OP_ENTRY3(VMOVHLPS,  V,dq, H,dq,       U,dq, vex7),
794        X86_OP_ENTRY3(VMOVLPx,   W,x,  H,x,        U,q,  vex5), /* MOVLPD */
795        X86_OP_ENTRY3(VMOVSLDUP, V,x,  None,None,  U,x,  vex4 cpuid(SSE3)),
796        X86_OP_ENTRY3(VMOVDDUP,  V,x,  None,None,  U,x,  vex5 cpuid(SSE3)),
797    };
798
799    if ((get_modrm(s, env) >> 6) == 3) {
800        *entry = *decode_by_prefix(s, opcodes_0F12_reg);
801    } else {
802        *entry = *decode_by_prefix(s, opcodes_0F12_mem);
803        if ((s->prefix & PREFIX_REPNZ) && s->vex_l) {
804            entry->s2 = X86_SIZE_qq;
805        }
806    }
807}
808
809static void decode_0F16(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
810{
811    static const X86OpEntry opcodes_0F16_mem[4] = {
812        /*
813         * Operand 1 technically only reads the low 64 bits, but uses dq so that
814         * it is easier to check for op0 == op1 in an endianness-neutral manner.
815         */
816        X86_OP_ENTRY3(VMOVHPx_ld, V,dq, H,dq,      M,q, vex5), /* MOVHPS */
817        X86_OP_ENTRY3(VMOVHPx_ld, V,dq, H,dq,      M,q, vex5), /* MOVHPD */
818        X86_OP_ENTRY3(VMOVSHDUP,  V,x,  None,None, W,x, vex4 cpuid(SSE3)),
819        {},
820    };
821    static const X86OpEntry opcodes_0F16_reg[4] = {
822        /* Same as above, operand 1 could be Hq if it wasn't for big-endian.  */
823        X86_OP_ENTRY3(VMOVLHPS,  V,dq, H,dq,      U,q, vex7),
824        X86_OP_ENTRY3(VMOVHPx,   V,x,  H,x,       U,x, vex5), /* MOVHPD */
825        X86_OP_ENTRY3(VMOVSHDUP, V,x,  None,None, U,x, vex4 cpuid(SSE3)),
826        {},
827    };
828
829    if ((get_modrm(s, env) >> 6) == 3) {
830        *entry = *decode_by_prefix(s, opcodes_0F16_reg);
831    } else {
832        *entry = *decode_by_prefix(s, opcodes_0F16_mem);
833    }
834}
835
836static void decode_0F2A(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
837{
838    static const X86OpEntry opcodes_0F2A[4] = {
839        X86_OP_ENTRY3(CVTPI2Px,  V,x,  None,None, Q,q),
840        X86_OP_ENTRY3(CVTPI2Px,  V,x,  None,None, Q,q),
841        X86_OP_ENTRY3(VCVTSI2Sx, V,x,  H,x, E,y,        vex3),
842        X86_OP_ENTRY3(VCVTSI2Sx, V,x,  H,x, E,y,        vex3),
843    };
844    *entry = *decode_by_prefix(s, opcodes_0F2A);
845}
846
847static void decode_0F2B(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
848{
849    static const X86OpEntry opcodes_0F2B[4] = {
850        X86_OP_ENTRY3(MOVDQ,      M,x,  None,None, V,x, vex1), /* MOVNTPS */
851        X86_OP_ENTRY3(MOVDQ,      M,x,  None,None, V,x, vex1), /* MOVNTPD */
852        /* AMD extensions */
853        X86_OP_ENTRY3(VMOVSS_st,  M,ss, None,None, V,x, vex4 cpuid(SSE4A)), /* MOVNTSS */
854        X86_OP_ENTRY3(VMOVLPx_st, M,sd, None,None, V,x, vex4 cpuid(SSE4A)), /* MOVNTSD */
855    };
856
857    *entry = *decode_by_prefix(s, opcodes_0F2B);
858}
859
860static void decode_0F2C(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
861{
862    static const X86OpEntry opcodes_0F2C[4] = {
863        /* Listed as ps/pd in the manual, but CVTTPS2PI only reads 64-bit.  */
864        X86_OP_ENTRY3(CVTTPx2PI,  P,q,  None,None, W,q),
865        X86_OP_ENTRY3(CVTTPx2PI,  P,q,  None,None, W,dq),
866        X86_OP_ENTRY3(VCVTTSx2SI, G,y,  None,None, W,ss, vex3),
867        X86_OP_ENTRY3(VCVTTSx2SI, G,y,  None,None, W,sd, vex3),
868    };
869    *entry = *decode_by_prefix(s, opcodes_0F2C);
870}
871
872static void decode_0F2D(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
873{
874    static const X86OpEntry opcodes_0F2D[4] = {
875        /* Listed as ps/pd in the manual, but CVTPS2PI only reads 64-bit.  */
876        X86_OP_ENTRY3(CVTPx2PI,  P,q,  None,None, W,q),
877        X86_OP_ENTRY3(CVTPx2PI,  P,q,  None,None, W,dq),
878        X86_OP_ENTRY3(VCVTSx2SI, G,y,  None,None, W,ss, vex3),
879        X86_OP_ENTRY3(VCVTSx2SI, G,y,  None,None, W,sd, vex3),
880    };
881    *entry = *decode_by_prefix(s, opcodes_0F2D);
882}
883
884static void decode_VxCOMISx(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
885{
886    /*
887     * VUCOMISx and VCOMISx are different and use no-prefix and 0x66 for SS and SD
888     * respectively.  Scalar values usually are associated with 0xF2 and 0xF3, for
889     * which X86_VEX_REPScalar exists, but here it has to be decoded by hand.
890     */
891    entry->s1 = entry->s2 = (s->prefix & PREFIX_DATA ? X86_SIZE_sd : X86_SIZE_ss);
892    entry->gen = (*b == 0x2E ? gen_VUCOMI : gen_VCOMI);
893}
894
895static void decode_sse_unary(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
896{
897    if (!(s->prefix & (PREFIX_REPZ | PREFIX_REPNZ))) {
898        entry->op1 = X86_TYPE_None;
899        entry->s1 = X86_SIZE_None;
900    }
901    switch (*b) {
902    case 0x51: entry->gen = gen_VSQRT; break;
903    case 0x52: entry->gen = gen_VRSQRT; break;
904    case 0x53: entry->gen = gen_VRCP; break;
905    }
906}
907
908static void decode_0F5A(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
909{
910    static const X86OpEntry opcodes_0F5A[4] = {
911        X86_OP_ENTRY2(VCVTPS2PD,  V,x,       W,xh, vex2),      /* VCVTPS2PD */
912        X86_OP_ENTRY2(VCVTPD2PS,  V,x,       W,x,  vex2),      /* VCVTPD2PS */
913        X86_OP_ENTRY3(VCVTSS2SD,  V,x,  H,x, W,x,  vex2_rep3), /* VCVTSS2SD */
914        X86_OP_ENTRY3(VCVTSD2SS,  V,x,  H,x, W,x,  vex2_rep3), /* VCVTSD2SS */
915    };
916    *entry = *decode_by_prefix(s, opcodes_0F5A);
917}
918
919static void decode_0F5B(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
920{
921    static const X86OpEntry opcodes_0F5B[4] = {
922        X86_OP_ENTRY2(VCVTDQ2PS,   V,x, W,x,      vex2),
923        X86_OP_ENTRY2(VCVTPS2DQ,   V,x, W,x,      vex2),
924        X86_OP_ENTRY2(VCVTTPS2DQ,  V,x, W,x,      vex2),
925        {},
926    };
927    *entry = *decode_by_prefix(s, opcodes_0F5B);
928}
929
930static void decode_0FE6(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
931{
932    static const X86OpEntry opcodes_0FE6[4] = {
933        {},
934        X86_OP_ENTRY2(VCVTTPD2DQ,  V,x, W,x,      vex2),
935        X86_OP_ENTRY2(VCVTDQ2PD,   V,x, W,x,      vex5),
936        X86_OP_ENTRY2(VCVTPD2DQ,   V,x, W,x,      vex2),
937    };
938    *entry = *decode_by_prefix(s, opcodes_0FE6);
939}
940
941static const X86OpEntry opcodes_0F[256] = {
942    [0x0E] = X86_OP_ENTRY0(EMMS,                              cpuid(3DNOW)), /* femms */
943    /*
944     * 3DNow!'s opcode byte comes *after* modrm and displacements, making it
945     * more like an Ib operand.  Dispatch to the right helper in a single gen_*
946     * function.
947     */
948    [0x0F] = X86_OP_ENTRY3(3dnow,       P,q, Q,q, I,b,        cpuid(3DNOW)),
949
950    [0x10] = X86_OP_GROUP0(0F10),
951    [0x11] = X86_OP_GROUP0(0F11),
952    [0x12] = X86_OP_GROUP0(0F12),
953    [0x13] = X86_OP_ENTRY3(VMOVLPx_st,  M,q, None,None, V,q,  vex5 p_00_66),
954    [0x14] = X86_OP_ENTRY3(VUNPCKLPx,   V,x, H,x, W,x,        vex4 p_00_66),
955    [0x15] = X86_OP_ENTRY3(VUNPCKHPx,   V,x, H,x, W,x,        vex4 p_00_66),
956    [0x16] = X86_OP_GROUP0(0F16),
957    /* Incorrectly listed as Mq,Vq in the manual */
958    [0x17] = X86_OP_ENTRY3(VMOVHPx_st,  M,q, None,None, V,dq, vex5 p_00_66),
959
960    [0x50] = X86_OP_ENTRY3(MOVMSK,     G,y, None,None, U,x, vex7 p_00_66),
961    [0x51] = X86_OP_GROUP3(sse_unary,  V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2), /* sqrtps */
962    [0x52] = X86_OP_GROUP3(sse_unary,  V,x, H,x, W,x, vex4_rep5 p_00_f3), /* rsqrtps */
963    [0x53] = X86_OP_GROUP3(sse_unary,  V,x, H,x, W,x, vex4_rep5 p_00_f3), /* rcpps */
964    [0x54] = X86_OP_ENTRY3(PAND,       V,x, H,x, W,x,  vex4 p_00_66), /* vand */
965    [0x55] = X86_OP_ENTRY3(PANDN,      V,x, H,x, W,x,  vex4 p_00_66), /* vandn */
966    [0x56] = X86_OP_ENTRY3(POR,        V,x, H,x, W,x,  vex4 p_00_66), /* vor */
967    [0x57] = X86_OP_ENTRY3(PXOR,       V,x, H,x, W,x,  vex4 p_00_66), /* vxor */
968
969    [0x60] = X86_OP_ENTRY3(PUNPCKLBW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
970    [0x61] = X86_OP_ENTRY3(PUNPCKLWD,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
971    [0x62] = X86_OP_ENTRY3(PUNPCKLDQ,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
972    [0x63] = X86_OP_ENTRY3(PACKSSWB,   V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
973    [0x64] = X86_OP_ENTRY3(PCMPGTB,    V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
974    [0x65] = X86_OP_ENTRY3(PCMPGTW,    V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
975    [0x66] = X86_OP_ENTRY3(PCMPGTD,    V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
976    [0x67] = X86_OP_ENTRY3(PACKUSWB,   V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
977
978    [0x70] = X86_OP_GROUP0(0F70),
979    [0x71] = X86_OP_GROUP0(group12),
980    [0x72] = X86_OP_GROUP0(group13),
981    [0x73] = X86_OP_GROUP0(group14),
982    [0x74] = X86_OP_ENTRY3(PCMPEQB,    V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
983    [0x75] = X86_OP_ENTRY3(PCMPEQW,    V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
984    [0x76] = X86_OP_ENTRY3(PCMPEQD,    V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
985    [0x77] = X86_OP_GROUP0(0F77),
986
987    [0x28] = X86_OP_ENTRY3(MOVDQ,      V,x,  None,None, W,x, vex1 p_00_66), /* MOVAPS */
988    [0x29] = X86_OP_ENTRY3(MOVDQ,      W,x,  None,None, V,x, vex1 p_00_66), /* MOVAPS */
989    [0x2A] = X86_OP_GROUP0(0F2A),
990    [0x2B] = X86_OP_GROUP0(0F2B),
991    [0x2C] = X86_OP_GROUP0(0F2C),
992    [0x2D] = X86_OP_GROUP0(0F2D),
993    [0x2E] = X86_OP_GROUP3(VxCOMISx,   None,None, V,x, W,x,  vex3 p_00_66), /* VUCOMISS/SD */
994    [0x2F] = X86_OP_GROUP3(VxCOMISx,   None,None, V,x, W,x,  vex3 p_00_66), /* VCOMISS/SD */
995
996    [0x38] = X86_OP_GROUP0(0F38),
997    [0x3a] = X86_OP_GROUP0(0F3A),
998
999    [0x58] = X86_OP_ENTRY3(VADD,       V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
1000    [0x59] = X86_OP_ENTRY3(VMUL,       V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
1001    [0x5a] = X86_OP_GROUP0(0F5A),
1002    [0x5b] = X86_OP_GROUP0(0F5B),
1003    [0x5c] = X86_OP_ENTRY3(VSUB,       V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
1004    [0x5d] = X86_OP_ENTRY3(VMIN,       V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
1005    [0x5e] = X86_OP_ENTRY3(VDIV,       V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
1006    [0x5f] = X86_OP_ENTRY3(VMAX,       V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
1007
1008    [0x68] = X86_OP_ENTRY3(PUNPCKHBW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1009    [0x69] = X86_OP_ENTRY3(PUNPCKHWD,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1010    [0x6a] = X86_OP_ENTRY3(PUNPCKHDQ,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1011    [0x6b] = X86_OP_ENTRY3(PACKSSDW,   V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1012    [0x6c] = X86_OP_ENTRY3(PUNPCKLQDQ, V,x, H,x, W,x,  vex4 p_66 avx2_256),
1013    [0x6d] = X86_OP_ENTRY3(PUNPCKHQDQ, V,x, H,x, W,x,  vex4 p_66 avx2_256),
1014    [0x6e] = X86_OP_ENTRY3(MOVD_to,    V,x, None,None, E,y, vex5 mmx p_00_66),  /* wrong dest Vy on SDM! */
1015    [0x6f] = X86_OP_GROUP0(0F6F),
1016
1017    [0x78] = X86_OP_GROUP0(0F78),
1018    [0x79] = X86_OP_GROUP2(0F79,       V,x, U,x,       cpuid(SSE4A)),
1019    [0x7c] = X86_OP_ENTRY3(VHADD,      V,x, H,x, W,x,  vex2 cpuid(SSE3) p_66_f2),
1020    [0x7d] = X86_OP_ENTRY3(VHSUB,      V,x, H,x, W,x,  vex2 cpuid(SSE3) p_66_f2),
1021    [0x7e] = X86_OP_GROUP0(0F7E),
1022    [0x7f] = X86_OP_GROUP0(0F7F),
1023
1024    [0xae] = X86_OP_GROUP0(group15),
1025
1026    [0xc2] = X86_OP_ENTRY4(VCMP,       V,x, H,x, W,x,       vex2_rep3 p_00_66_f3_f2),
1027    [0xc4] = X86_OP_ENTRY4(PINSRW,     V,dq,H,dq,E,w,       vex5 mmx p_00_66),
1028    [0xc5] = X86_OP_ENTRY3(PEXTRW,     G,d, U,dq,I,b,       vex5 mmx p_00_66),
1029    [0xc6] = X86_OP_ENTRY4(VSHUF,      V,x, H,x, W,x,       vex4 p_00_66),
1030
1031    [0xd0] = X86_OP_ENTRY3(VADDSUB,   V,x, H,x, W,x,        vex2 cpuid(SSE3) p_66_f2),
1032    [0xd1] = X86_OP_ENTRY3(PSRLW_r,   V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
1033    [0xd2] = X86_OP_ENTRY3(PSRLD_r,   V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
1034    [0xd3] = X86_OP_ENTRY3(PSRLQ_r,   V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
1035    [0xd4] = X86_OP_ENTRY3(PADDQ,     V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
1036    [0xd5] = X86_OP_ENTRY3(PMULLW,    V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
1037    [0xd6] = X86_OP_GROUP0(0FD6),
1038    [0xd7] = X86_OP_ENTRY3(PMOVMSKB,  G,d, None,None, U,x,  vex7 mmx avx2_256 p_00_66),
1039
1040    [0xe0] = X86_OP_ENTRY3(PAVGB,     V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
1041    [0xe1] = X86_OP_ENTRY3(PSRAW_r,   V,x, H,x, W,x,        vex7 mmx avx2_256 p_00_66),
1042    [0xe2] = X86_OP_ENTRY3(PSRAD_r,   V,x, H,x, W,x,        vex7 mmx avx2_256 p_00_66),
1043    [0xe3] = X86_OP_ENTRY3(PAVGW,     V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
1044    [0xe4] = X86_OP_ENTRY3(PMULHUW,   V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
1045    [0xe5] = X86_OP_ENTRY3(PMULHW,    V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
1046    [0xe6] = X86_OP_GROUP0(0FE6),
1047    [0xe7] = X86_OP_ENTRY3(MOVDQ,     W,x, None,None, V,x,  vex1 mmx p_00_66), /* MOVNTQ/MOVNTDQ */
1048
1049    [0xf0] = X86_OP_ENTRY3(MOVDQ,    V,x, None,None, WM,x,  vex4_unal cpuid(SSE3) p_f2), /* LDDQU */
1050    [0xf1] = X86_OP_ENTRY3(PSLLW_r,  V,x, H,x, W,x,         vex7 mmx avx2_256 p_00_66),
1051    [0xf2] = X86_OP_ENTRY3(PSLLD_r,  V,x, H,x, W,x,         vex7 mmx avx2_256 p_00_66),
1052    [0xf3] = X86_OP_ENTRY3(PSLLQ_r,  V,x, H,x, W,x,         vex7 mmx avx2_256 p_00_66),
1053    [0xf4] = X86_OP_ENTRY3(PMULUDQ,  V,x, H,x, W,x,         vex4 mmx avx2_256 p_00_66),
1054    [0xf5] = X86_OP_ENTRY3(PMADDWD,  V,x, H,x, W,x,         vex4 mmx avx2_256 p_00_66),
1055    [0xf6] = X86_OP_ENTRY3(PSADBW,   V,x, H,x, W,x,         vex4 mmx avx2_256 p_00_66),
1056    [0xf7] = X86_OP_ENTRY3(MASKMOV,  None,None, V,dq, U,dq, vex4_unal avx2_256 mmx p_00_66),
1057
1058    /* Incorrectly missing from 2-17 */
1059    [0xd8] = X86_OP_ENTRY3(PSUBUSB,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1060    [0xd9] = X86_OP_ENTRY3(PSUBUSW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1061    [0xda] = X86_OP_ENTRY3(PMINUB,   V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1062    [0xdb] = X86_OP_ENTRY3(PAND,     V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1063    [0xdc] = X86_OP_ENTRY3(PADDUSB,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1064    [0xdd] = X86_OP_ENTRY3(PADDUSW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1065    [0xde] = X86_OP_ENTRY3(PMAXUB,   V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1066    [0xdf] = X86_OP_ENTRY3(PANDN,    V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1067
1068    [0xe8] = X86_OP_ENTRY3(PSUBSB,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1069    [0xe9] = X86_OP_ENTRY3(PSUBSW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1070    [0xea] = X86_OP_ENTRY3(PMINSW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1071    [0xeb] = X86_OP_ENTRY3(POR,     V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1072    [0xec] = X86_OP_ENTRY3(PADDSB,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1073    [0xed] = X86_OP_ENTRY3(PADDSW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1074    [0xee] = X86_OP_ENTRY3(PMAXSW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1075    [0xef] = X86_OP_ENTRY3(PXOR,    V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1076
1077    [0xf8] = X86_OP_ENTRY3(PSUBB,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1078    [0xf9] = X86_OP_ENTRY3(PSUBW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1079    [0xfa] = X86_OP_ENTRY3(PSUBD,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1080    [0xfb] = X86_OP_ENTRY3(PSUBQ,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1081    [0xfc] = X86_OP_ENTRY3(PADDB,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1082    [0xfd] = X86_OP_ENTRY3(PADDW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1083    [0xfe] = X86_OP_ENTRY3(PADDD,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
1084    /* 0xff = UD0 */
1085};
1086
1087static void do_decode_0F(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
1088{
1089    *entry = opcodes_0F[*b];
1090}
1091
1092static void decode_0F(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
1093{
1094    *b = x86_ldub_code(env, s);
1095    do_decode_0F(s, env, entry, b);
1096}
1097
1098static const X86OpEntry opcodes_root[256] = {
1099    [0x0F] = X86_OP_GROUP0(0F),
1100};
1101
1102#undef mmx
1103#undef vex1
1104#undef vex2
1105#undef vex3
1106#undef vex4
1107#undef vex4_unal
1108#undef vex5
1109#undef vex6
1110#undef vex7
1111#undef vex8
1112#undef vex11
1113#undef vex12
1114#undef vex13
1115
1116/*
1117 * Decode the fixed part of the opcode and place the last
1118 * in b.
1119 */
1120static void decode_root(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
1121{
1122    *entry = opcodes_root[*b];
1123}
1124
1125
1126static int decode_modrm(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode,
1127                        X86DecodedOp *op, X86OpType type)
1128{
1129    int modrm = get_modrm(s, env);
1130    if ((modrm >> 6) == 3) {
1131        op->n = (modrm & 7);
1132        if (type != X86_TYPE_Q && type != X86_TYPE_N) {
1133            op->n |= REX_B(s);
1134        }
1135    } else {
1136        op->has_ea = true;
1137        op->n = -1;
1138        decode->mem = gen_lea_modrm_0(env, s, get_modrm(s, env));
1139    }
1140    return modrm;
1141}
1142
1143static bool decode_op_size(DisasContext *s, X86OpEntry *e, X86OpSize size, MemOp *ot)
1144{
1145    switch (size) {
1146    case X86_SIZE_b:  /* byte */
1147        *ot = MO_8;
1148        return true;
1149
1150    case X86_SIZE_d:  /* 32-bit */
1151    case X86_SIZE_ss: /* SSE/AVX scalar single precision */
1152        *ot = MO_32;
1153        return true;
1154
1155    case X86_SIZE_p:  /* Far pointer, return offset size */
1156    case X86_SIZE_s:  /* Descriptor, return offset size */
1157    case X86_SIZE_v:  /* 16/32/64-bit, based on operand size */
1158        *ot = s->dflag;
1159        return true;
1160
1161    case X86_SIZE_pi: /* MMX */
1162    case X86_SIZE_q:  /* 64-bit */
1163    case X86_SIZE_sd: /* SSE/AVX scalar double precision */
1164        *ot = MO_64;
1165        return true;
1166
1167    case X86_SIZE_w:  /* 16-bit */
1168        *ot = MO_16;
1169        return true;
1170
1171    case X86_SIZE_y:  /* 32/64-bit, based on operand size */
1172        *ot = s->dflag == MO_16 ? MO_32 : s->dflag;
1173        return true;
1174
1175    case X86_SIZE_z:  /* 16-bit for 16-bit operand size, else 32-bit */
1176        *ot = s->dflag == MO_16 ? MO_16 : MO_32;
1177        return true;
1178
1179    case X86_SIZE_dq: /* SSE/AVX 128-bit */
1180        if (e->special == X86_SPECIAL_MMX &&
1181            !(s->prefix & (PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ))) {
1182            *ot = MO_64;
1183            return true;
1184        }
1185        if (s->vex_l && e->s0 != X86_SIZE_qq && e->s1 != X86_SIZE_qq) {
1186            return false;
1187        }
1188        *ot = MO_128;
1189        return true;
1190
1191    case X86_SIZE_qq: /* AVX 256-bit */
1192        if (!s->vex_l) {
1193            return false;
1194        }
1195        *ot = MO_256;
1196        return true;
1197
1198    case X86_SIZE_x:  /* 128/256-bit, based on operand size */
1199        if (e->special == X86_SPECIAL_MMX &&
1200            !(s->prefix & (PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ))) {
1201            *ot = MO_64;
1202            return true;
1203        }
1204        /* fall through */
1205    case X86_SIZE_ps: /* SSE/AVX packed single precision */
1206    case X86_SIZE_pd: /* SSE/AVX packed double precision */
1207        *ot = s->vex_l ? MO_256 : MO_128;
1208        return true;
1209
1210    case X86_SIZE_xh: /* SSE/AVX packed half register */
1211        *ot = s->vex_l ? MO_128 : MO_64;
1212        return true;
1213
1214    case X86_SIZE_d64:  /* Default to 64-bit in 64-bit mode */
1215        *ot = CODE64(s) && s->dflag == MO_32 ? MO_64 : s->dflag;
1216        return true;
1217
1218    case X86_SIZE_f64:  /* Ignore size override prefix in 64-bit mode */
1219        *ot = CODE64(s) ? MO_64 : s->dflag;
1220        return true;
1221
1222    default:
1223        *ot = -1;
1224        return true;
1225    }
1226}
1227
1228static bool decode_op(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode,
1229                      X86DecodedOp *op, X86OpType type, int b)
1230{
1231    int modrm;
1232
1233    switch (type) {
1234    case X86_TYPE_None:  /* Implicit or absent */
1235    case X86_TYPE_A:  /* Implicit */
1236    case X86_TYPE_F:  /* EFLAGS/RFLAGS */
1237    case X86_TYPE_X:  /* string source */
1238    case X86_TYPE_Y:  /* string destination */
1239        break;
1240
1241    case X86_TYPE_B:  /* VEX.vvvv selects a GPR */
1242        op->unit = X86_OP_INT;
1243        op->n = s->vex_v;
1244        break;
1245
1246    case X86_TYPE_C:  /* REG in the modrm byte selects a control register */
1247        op->unit = X86_OP_CR;
1248        goto get_reg;
1249
1250    case X86_TYPE_D:  /* REG in the modrm byte selects a debug register */
1251        op->unit = X86_OP_DR;
1252        goto get_reg;
1253
1254    case X86_TYPE_G:  /* REG in the modrm byte selects a GPR */
1255        op->unit = X86_OP_INT;
1256        goto get_reg;
1257
1258    case X86_TYPE_S:  /* reg selects a segment register */
1259        op->unit = X86_OP_SEG;
1260        goto get_reg;
1261
1262    case X86_TYPE_P:
1263        op->unit = X86_OP_MMX;
1264        goto get_reg;
1265
1266    case X86_TYPE_V:  /* reg in the modrm byte selects an XMM/YMM register */
1267        if (decode->e.special == X86_SPECIAL_MMX &&
1268            !(s->prefix & (PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ))) {
1269            op->unit = X86_OP_MMX;
1270        } else {
1271            op->unit = X86_OP_SSE;
1272        }
1273    get_reg:
1274        op->n = ((get_modrm(s, env) >> 3) & 7) | REX_R(s);
1275        break;
1276
1277    case X86_TYPE_E:  /* ALU modrm operand */
1278        op->unit = X86_OP_INT;
1279        goto get_modrm;
1280
1281    case X86_TYPE_Q:  /* MMX modrm operand */
1282        op->unit = X86_OP_MMX;
1283        goto get_modrm;
1284
1285    case X86_TYPE_W:  /* XMM/YMM modrm operand */
1286        if (decode->e.special == X86_SPECIAL_MMX &&
1287            !(s->prefix & (PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ))) {
1288            op->unit = X86_OP_MMX;
1289        } else {
1290            op->unit = X86_OP_SSE;
1291        }
1292        goto get_modrm;
1293
1294    case X86_TYPE_N:  /* R/M in the modrm byte selects an MMX register */
1295        op->unit = X86_OP_MMX;
1296        goto get_modrm_reg;
1297
1298    case X86_TYPE_U:  /* R/M in the modrm byte selects an XMM/YMM register */
1299        if (decode->e.special == X86_SPECIAL_MMX &&
1300            !(s->prefix & (PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ))) {
1301            op->unit = X86_OP_MMX;
1302        } else {
1303            op->unit = X86_OP_SSE;
1304        }
1305        goto get_modrm_reg;
1306
1307    case X86_TYPE_R:  /* R/M in the modrm byte selects a register */
1308        op->unit = X86_OP_INT;
1309    get_modrm_reg:
1310        modrm = get_modrm(s, env);
1311        if ((modrm >> 6) != 3) {
1312            return false;
1313        }
1314        goto get_modrm;
1315
1316    case X86_TYPE_WM:  /* modrm byte selects an XMM/YMM memory operand */
1317        op->unit = X86_OP_SSE;
1318        /* fall through */
1319    case X86_TYPE_M:  /* modrm byte selects a memory operand */
1320        modrm = get_modrm(s, env);
1321        if ((modrm >> 6) == 3) {
1322            return false;
1323        }
1324    get_modrm:
1325        decode_modrm(s, env, decode, op, type);
1326        break;
1327
1328    case X86_TYPE_O:  /* Absolute address encoded in the instruction */
1329        op->unit = X86_OP_INT;
1330        op->has_ea = true;
1331        op->n = -1;
1332        decode->mem = (AddressParts) {
1333            .def_seg = R_DS,
1334            .base = -1,
1335            .index = -1,
1336            .disp = insn_get_addr(env, s, s->aflag)
1337        };
1338        break;
1339
1340    case X86_TYPE_H:  /* For AVX, VEX.vvvv selects an XMM/YMM register */
1341        if ((s->prefix & PREFIX_VEX)) {
1342            op->unit = X86_OP_SSE;
1343            op->n = s->vex_v;
1344            break;
1345        }
1346        if (op == &decode->op[0]) {
1347            /* shifts place the destination in VEX.vvvv, use modrm */
1348            return decode_op(s, env, decode, op, decode->e.op1, b);
1349        } else {
1350            return decode_op(s, env, decode, op, decode->e.op0, b);
1351        }
1352
1353    case X86_TYPE_I:  /* Immediate */
1354    case X86_TYPE_J:  /* Relative offset for a jump */
1355        op->unit = X86_OP_IMM;
1356        decode->immediate = insn_get_signed(env, s, op->ot);
1357        break;
1358
1359    case X86_TYPE_L:  /* The upper 4 bits of the immediate select a 128-bit register */
1360        op->n = insn_get(env, s, op->ot) >> 4;
1361        break;
1362
1363    case X86_TYPE_2op:
1364        *op = decode->op[0];
1365        break;
1366
1367    case X86_TYPE_LoBits:
1368        op->n = (b & 7) | REX_B(s);
1369        op->unit = X86_OP_INT;
1370        break;
1371
1372    case X86_TYPE_0 ... X86_TYPE_7:
1373        op->n = type - X86_TYPE_0;
1374        op->unit = X86_OP_INT;
1375        break;
1376
1377    case X86_TYPE_ES ... X86_TYPE_GS:
1378        op->n = type - X86_TYPE_ES;
1379        op->unit = X86_OP_SEG;
1380        break;
1381    }
1382
1383    return true;
1384}
1385
1386static bool validate_sse_prefix(DisasContext *s, X86OpEntry *e)
1387{
1388    uint16_t sse_prefixes;
1389
1390    if (!e->valid_prefix) {
1391        return true;
1392    }
1393    if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
1394        /* In SSE instructions, 0xF3 and 0xF2 cancel 0x66.  */
1395        s->prefix &= ~PREFIX_DATA;
1396    }
1397
1398    /* Now, either zero or one bit is set in sse_prefixes.  */
1399    sse_prefixes = s->prefix & (PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA);
1400    return e->valid_prefix & (1 << sse_prefixes);
1401}
1402
1403static bool decode_insn(DisasContext *s, CPUX86State *env, X86DecodeFunc decode_func,
1404                        X86DecodedInsn *decode)
1405{
1406    X86OpEntry *e = &decode->e;
1407
1408    decode_func(s, env, e, &decode->b);
1409    while (e->is_decode) {
1410        e->is_decode = false;
1411        e->decode(s, env, e, &decode->b);
1412    }
1413
1414    if (!validate_sse_prefix(s, e)) {
1415        return false;
1416    }
1417
1418    /* First compute size of operands in order to initialize s->rip_offset.  */
1419    if (e->op0 != X86_TYPE_None) {
1420        if (!decode_op_size(s, e, e->s0, &decode->op[0].ot)) {
1421            return false;
1422        }
1423        if (e->op0 == X86_TYPE_I) {
1424            s->rip_offset += 1 << decode->op[0].ot;
1425        }
1426    }
1427    if (e->op1 != X86_TYPE_None) {
1428        if (!decode_op_size(s, e, e->s1, &decode->op[1].ot)) {
1429            return false;
1430        }
1431        if (e->op1 == X86_TYPE_I) {
1432            s->rip_offset += 1 << decode->op[1].ot;
1433        }
1434    }
1435    if (e->op2 != X86_TYPE_None) {
1436        if (!decode_op_size(s, e, e->s2, &decode->op[2].ot)) {
1437            return false;
1438        }
1439        if (e->op2 == X86_TYPE_I) {
1440            s->rip_offset += 1 << decode->op[2].ot;
1441        }
1442    }
1443    if (e->op3 != X86_TYPE_None) {
1444        /*
1445         * A couple instructions actually use the extra immediate byte for an Lx
1446         * register operand; those are handled in the gen_* functions as one off.
1447         */
1448        assert(e->op3 == X86_TYPE_I && e->s3 == X86_SIZE_b);
1449        s->rip_offset += 1;
1450    }
1451
1452    if (e->op0 != X86_TYPE_None &&
1453        !decode_op(s, env, decode, &decode->op[0], e->op0, decode->b)) {
1454        return false;
1455    }
1456
1457    if (e->op1 != X86_TYPE_None &&
1458        !decode_op(s, env, decode, &decode->op[1], e->op1, decode->b)) {
1459        return false;
1460    }
1461
1462    if (e->op2 != X86_TYPE_None &&
1463        !decode_op(s, env, decode, &decode->op[2], e->op2, decode->b)) {
1464        return false;
1465    }
1466
1467    if (e->op3 != X86_TYPE_None) {
1468        decode->immediate = insn_get_signed(env, s, MO_8);
1469    }
1470
1471    return true;
1472}
1473
1474static bool has_cpuid_feature(DisasContext *s, X86CPUIDFeature cpuid)
1475{
1476    switch (cpuid) {
1477    case X86_FEAT_None:
1478        return true;
1479    case X86_FEAT_F16C:
1480        return (s->cpuid_ext_features & CPUID_EXT_F16C);
1481    case X86_FEAT_FMA:
1482        return (s->cpuid_ext_features & CPUID_EXT_FMA);
1483    case X86_FEAT_MOVBE:
1484        return (s->cpuid_ext_features & CPUID_EXT_MOVBE);
1485    case X86_FEAT_PCLMULQDQ:
1486        return (s->cpuid_ext_features & CPUID_EXT_PCLMULQDQ);
1487    case X86_FEAT_SSE:
1488        return (s->cpuid_ext_features & CPUID_SSE);
1489    case X86_FEAT_SSE2:
1490        return (s->cpuid_ext_features & CPUID_SSE2);
1491    case X86_FEAT_SSE3:
1492        return (s->cpuid_ext_features & CPUID_EXT_SSE3);
1493    case X86_FEAT_SSSE3:
1494        return (s->cpuid_ext_features & CPUID_EXT_SSSE3);
1495    case X86_FEAT_SSE41:
1496        return (s->cpuid_ext_features & CPUID_EXT_SSE41);
1497    case X86_FEAT_SSE42:
1498        return (s->cpuid_ext_features & CPUID_EXT_SSE42);
1499    case X86_FEAT_AES:
1500        if (!(s->cpuid_ext_features & CPUID_EXT_AES)) {
1501            return false;
1502        } else if (!(s->prefix & PREFIX_VEX)) {
1503            return true;
1504        } else if (!(s->cpuid_ext_features & CPUID_EXT_AVX)) {
1505            return false;
1506        } else {
1507            return !s->vex_l || (s->cpuid_7_0_ecx_features & CPUID_7_0_ECX_VAES);
1508        }
1509
1510    case X86_FEAT_AVX:
1511        return (s->cpuid_ext_features & CPUID_EXT_AVX);
1512
1513    case X86_FEAT_3DNOW:
1514        return (s->cpuid_ext2_features & CPUID_EXT2_3DNOW);
1515    case X86_FEAT_SSE4A:
1516        return (s->cpuid_ext3_features & CPUID_EXT3_SSE4A);
1517
1518    case X86_FEAT_ADX:
1519        return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_ADX);
1520    case X86_FEAT_BMI1:
1521        return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1);
1522    case X86_FEAT_BMI2:
1523        return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2);
1524    case X86_FEAT_AVX2:
1525        return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_AVX2);
1526    case X86_FEAT_SHA_NI:
1527        return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SHA_NI);
1528
1529    case X86_FEAT_CMPCCXADD:
1530        return (s->cpuid_7_1_eax_features & CPUID_7_1_EAX_CMPCCXADD);
1531    }
1532    g_assert_not_reached();
1533}
1534
1535static bool validate_vex(DisasContext *s, X86DecodedInsn *decode)
1536{
1537    X86OpEntry *e = &decode->e;
1538
1539    switch (e->vex_special) {
1540    case X86_VEX_REPScalar:
1541        /*
1542         * Instructions which differ between 00/66 and F2/F3 in the
1543         * exception classification and the size of the memory operand.
1544         */
1545        assert(e->vex_class == 1 || e->vex_class == 2 || e->vex_class == 4);
1546        if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
1547            e->vex_class = e->vex_class < 4 ? 3 : 5;
1548            if (s->vex_l) {
1549                goto illegal;
1550            }
1551            assert(decode->e.s2 == X86_SIZE_x);
1552            if (decode->op[2].has_ea) {
1553                decode->op[2].ot = s->prefix & PREFIX_REPZ ? MO_32 : MO_64;
1554            }
1555        }
1556        break;
1557
1558    case X86_VEX_SSEUnaligned:
1559        /* handled in sse_needs_alignment.  */
1560        break;
1561
1562    case X86_VEX_AVX2_256:
1563        if ((s->prefix & PREFIX_VEX) && s->vex_l && !has_cpuid_feature(s, X86_FEAT_AVX2)) {
1564            goto illegal;
1565        }
1566    }
1567
1568    switch (e->vex_class) {
1569    case 0:
1570        if (s->prefix & PREFIX_VEX) {
1571            goto illegal;
1572        }
1573        return true;
1574    case 1:
1575    case 2:
1576    case 3:
1577    case 4:
1578    case 5:
1579    case 7:
1580        if (s->prefix & PREFIX_VEX) {
1581            if (!(s->flags & HF_AVX_EN_MASK)) {
1582                goto illegal;
1583            }
1584        } else if (e->special != X86_SPECIAL_MMX ||
1585                   (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA))) {
1586            if (!(s->flags & HF_OSFXSR_MASK)) {
1587                goto illegal;
1588            }
1589        }
1590        break;
1591    case 12:
1592        /* Must have a VSIB byte and no address prefix.  */
1593        assert(s->has_modrm);
1594        if ((s->modrm & 7) != 4 || s->aflag == MO_16) {
1595            goto illegal;
1596        }
1597
1598        /* Check no overlap between registers.  */
1599        if (!decode->op[0].has_ea &&
1600            (decode->op[0].n == decode->mem.index || decode->op[0].n == decode->op[1].n)) {
1601            goto illegal;
1602        }
1603        assert(!decode->op[1].has_ea);
1604        if (decode->op[1].n == decode->mem.index) {
1605            goto illegal;
1606        }
1607        if (!decode->op[2].has_ea &&
1608            (decode->op[2].n == decode->mem.index || decode->op[2].n == decode->op[1].n)) {
1609            goto illegal;
1610        }
1611        /* fall through */
1612    case 6:
1613    case 11:
1614        if (!(s->prefix & PREFIX_VEX)) {
1615            goto illegal;
1616        }
1617        if (!(s->flags & HF_AVX_EN_MASK)) {
1618            goto illegal;
1619        }
1620        break;
1621    case 8:
1622        /* Non-VEX case handled in decode_0F77.  */
1623        assert(s->prefix & PREFIX_VEX);
1624        if (!(s->flags & HF_AVX_EN_MASK)) {
1625            goto illegal;
1626        }
1627        break;
1628    case 13:
1629        if (!(s->prefix & PREFIX_VEX)) {
1630            goto illegal;
1631        }
1632        if (s->vex_l) {
1633            goto illegal;
1634        }
1635        /* All integer instructions use VEX.vvvv, so exit.  */
1636        return true;
1637    }
1638
1639    if (s->vex_v != 0 &&
1640        e->op0 != X86_TYPE_H && e->op0 != X86_TYPE_B &&
1641        e->op1 != X86_TYPE_H && e->op1 != X86_TYPE_B &&
1642        e->op2 != X86_TYPE_H && e->op2 != X86_TYPE_B) {
1643        goto illegal;
1644    }
1645
1646    if (s->flags & HF_TS_MASK) {
1647        goto nm_exception;
1648    }
1649    if (s->flags & HF_EM_MASK) {
1650        goto illegal;
1651    }
1652
1653    if (e->check) {
1654        if (e->check & X86_CHECK_VEX128) {
1655            if (s->vex_l) {
1656                goto illegal;
1657            }
1658        }
1659        if (e->check & X86_CHECK_W0) {
1660            if (s->vex_w) {
1661                goto illegal;
1662            }
1663        }
1664        if (e->check & X86_CHECK_W1) {
1665            if (!s->vex_w) {
1666                goto illegal;
1667            }
1668        }
1669    }
1670    return true;
1671
1672nm_exception:
1673    gen_NM_exception(s);
1674    return false;
1675illegal:
1676    gen_illegal_opcode(s);
1677    return false;
1678}
1679
1680/*
1681 * Convert one instruction. s->base.is_jmp is set if the translation must
1682 * be stopped.
1683 */
1684static void disas_insn_new(DisasContext *s, CPUState *cpu, int b)
1685{
1686    CPUX86State *env = cpu_env(cpu);
1687    bool first = true;
1688    X86DecodedInsn decode;
1689    X86DecodeFunc decode_func = decode_root;
1690    uint8_t cc_live;
1691
1692    s->has_modrm = false;
1693
1694 next_byte:
1695    if (first) {
1696        first = false;
1697    } else {
1698        b = x86_ldub_code(env, s);
1699    }
1700    /* Collect prefixes.  */
1701    switch (b) {
1702    case 0xf3:
1703        s->prefix |= PREFIX_REPZ;
1704        s->prefix &= ~PREFIX_REPNZ;
1705        goto next_byte;
1706    case 0xf2:
1707        s->prefix |= PREFIX_REPNZ;
1708        s->prefix &= ~PREFIX_REPZ;
1709        goto next_byte;
1710    case 0xf0:
1711        s->prefix |= PREFIX_LOCK;
1712        goto next_byte;
1713    case 0x2e:
1714        s->override = R_CS;
1715        goto next_byte;
1716    case 0x36:
1717        s->override = R_SS;
1718        goto next_byte;
1719    case 0x3e:
1720        s->override = R_DS;
1721        goto next_byte;
1722    case 0x26:
1723        s->override = R_ES;
1724        goto next_byte;
1725    case 0x64:
1726        s->override = R_FS;
1727        goto next_byte;
1728    case 0x65:
1729        s->override = R_GS;
1730        goto next_byte;
1731    case 0x66:
1732        s->prefix |= PREFIX_DATA;
1733        goto next_byte;
1734    case 0x67:
1735        s->prefix |= PREFIX_ADR;
1736        goto next_byte;
1737#ifdef TARGET_X86_64
1738    case 0x40 ... 0x4f:
1739        if (CODE64(s)) {
1740            /* REX prefix */
1741            s->prefix |= PREFIX_REX;
1742            s->vex_w = (b >> 3) & 1;
1743            s->rex_r = (b & 0x4) << 1;
1744            s->rex_x = (b & 0x2) << 2;
1745            s->rex_b = (b & 0x1) << 3;
1746            goto next_byte;
1747        }
1748        break;
1749#endif
1750    case 0xc5: /* 2-byte VEX */
1751    case 0xc4: /* 3-byte VEX */
1752        /*
1753         * VEX prefixes cannot be used except in 32-bit mode.
1754         * Otherwise the instruction is LES or LDS.
1755         */
1756        if (CODE32(s) && !VM86(s)) {
1757            static const int pp_prefix[4] = {
1758                0, PREFIX_DATA, PREFIX_REPZ, PREFIX_REPNZ
1759            };
1760            int vex3, vex2 = x86_ldub_code(env, s);
1761
1762            if (!CODE64(s) && (vex2 & 0xc0) != 0xc0) {
1763                /*
1764                 * 4.1.4.6: In 32-bit mode, bits [7:6] must be 11b,
1765                 * otherwise the instruction is LES or LDS.
1766                 */
1767                s->pc--; /* rewind the advance_pc() x86_ldub_code() did */
1768                break;
1769            }
1770
1771            /* 4.1.1-4.1.3: No preceding lock, 66, f2, f3, or rex prefixes. */
1772            if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ
1773                             | PREFIX_LOCK | PREFIX_DATA | PREFIX_REX)) {
1774                goto illegal_op;
1775            }
1776#ifdef TARGET_X86_64
1777            s->rex_r = (~vex2 >> 4) & 8;
1778#endif
1779            if (b == 0xc5) {
1780                /* 2-byte VEX prefix: RVVVVlpp, implied 0f leading opcode byte */
1781                vex3 = vex2;
1782                decode_func = decode_0F;
1783            } else {
1784                /* 3-byte VEX prefix: RXBmmmmm wVVVVlpp */
1785                vex3 = x86_ldub_code(env, s);
1786#ifdef TARGET_X86_64
1787                s->rex_x = (~vex2 >> 3) & 8;
1788                s->rex_b = (~vex2 >> 2) & 8;
1789#endif
1790                s->vex_w = (vex3 >> 7) & 1;
1791                switch (vex2 & 0x1f) {
1792                case 0x01: /* Implied 0f leading opcode bytes.  */
1793                    decode_func = decode_0F;
1794                    break;
1795                case 0x02: /* Implied 0f 38 leading opcode bytes.  */
1796                    decode_func = decode_0F38;
1797                    break;
1798                case 0x03: /* Implied 0f 3a leading opcode bytes.  */
1799                    decode_func = decode_0F3A;
1800                    break;
1801                default:   /* Reserved for future use.  */
1802                    goto unknown_op;
1803                }
1804            }
1805            s->vex_v = (~vex3 >> 3) & 0xf;
1806            s->vex_l = (vex3 >> 2) & 1;
1807            s->prefix |= pp_prefix[vex3 & 3] | PREFIX_VEX;
1808        }
1809        break;
1810    default:
1811        if (b >= 0x100) {
1812            b -= 0x100;
1813            decode_func = do_decode_0F;
1814        }
1815        break;
1816    }
1817
1818    /* Post-process prefixes.  */
1819    if (CODE64(s)) {
1820        /*
1821         * In 64-bit mode, the default data size is 32-bit.  Select 64-bit
1822         * data with rex_w, and 16-bit data with 0x66; rex_w takes precedence
1823         * over 0x66 if both are present.
1824         */
1825        s->dflag = (REX_W(s) ? MO_64 : s->prefix & PREFIX_DATA ? MO_16 : MO_32);
1826        /* In 64-bit mode, 0x67 selects 32-bit addressing.  */
1827        s->aflag = (s->prefix & PREFIX_ADR ? MO_32 : MO_64);
1828    } else {
1829        /* In 16/32-bit mode, 0x66 selects the opposite data size.  */
1830        if (CODE32(s) ^ ((s->prefix & PREFIX_DATA) != 0)) {
1831            s->dflag = MO_32;
1832        } else {
1833            s->dflag = MO_16;
1834        }
1835        /* In 16/32-bit mode, 0x67 selects the opposite addressing.  */
1836        if (CODE32(s) ^ ((s->prefix & PREFIX_ADR) != 0)) {
1837            s->aflag = MO_32;
1838        }  else {
1839            s->aflag = MO_16;
1840        }
1841    }
1842
1843    memset(&decode, 0, sizeof(decode));
1844    decode.cc_op = -1;
1845    decode.b = b;
1846    if (!decode_insn(s, env, decode_func, &decode)) {
1847        goto illegal_op;
1848    }
1849    if (!decode.e.gen) {
1850        goto unknown_op;
1851    }
1852
1853    if (!has_cpuid_feature(s, decode.e.cpuid)) {
1854        goto illegal_op;
1855    }
1856
1857    /* Checks that result in #UD come first.  */
1858    if (decode.e.check) {
1859        if (decode.e.check & X86_CHECK_i64) {
1860            if (CODE64(s)) {
1861                goto illegal_op;
1862            }
1863        }
1864        if (decode.e.check & X86_CHECK_o64) {
1865            if (!CODE64(s)) {
1866                goto illegal_op;
1867            }
1868        }
1869        if (decode.e.check & X86_CHECK_prot) {
1870            if (!PE(s) || VM86(s)) {
1871                goto illegal_op;
1872            }
1873        }
1874    }
1875
1876    switch (decode.e.special) {
1877    case X86_SPECIAL_None:
1878        break;
1879
1880    case X86_SPECIAL_Locked:
1881        if (decode.op[0].has_ea) {
1882            s->prefix |= PREFIX_LOCK;
1883        }
1884        decode.e.special = X86_SPECIAL_HasLock;
1885        /* fallthrough */
1886    case X86_SPECIAL_HasLock:
1887        break;
1888
1889    case X86_SPECIAL_Op0_Rd:
1890        assert(decode.op[0].unit == X86_OP_INT);
1891        if (!decode.op[0].has_ea) {
1892            decode.op[0].ot = MO_32;
1893        }
1894        break;
1895
1896    case X86_SPECIAL_Op2_Ry:
1897        assert(decode.op[2].unit == X86_OP_INT);
1898        if (!decode.op[2].has_ea) {
1899            decode.op[2].ot = s->dflag == MO_16 ? MO_32 : s->dflag;
1900        }
1901        break;
1902
1903    case X86_SPECIAL_AVXExtMov:
1904        if (!decode.op[2].has_ea) {
1905            decode.op[2].ot = s->vex_l ? MO_256 : MO_128;
1906        } else if (s->vex_l) {
1907            decode.op[2].ot++;
1908        }
1909        break;
1910
1911    case X86_SPECIAL_SExtT0:
1912    case X86_SPECIAL_ZExtT0:
1913        /* Handled in gen_load.  */
1914        assert(decode.op[1].unit == X86_OP_INT);
1915        break;
1916
1917    default:
1918        break;
1919    }
1920
1921    if (s->prefix & PREFIX_LOCK) {
1922        if (decode.e.special != X86_SPECIAL_HasLock || !decode.op[0].has_ea) {
1923            goto illegal_op;
1924        }
1925    }
1926
1927    if (!validate_vex(s, &decode)) {
1928        return;
1929    }
1930
1931    /*
1932     * Checks that result in #GP or VMEXIT come second.  Intercepts are
1933     * generally checked after non-memory exceptions (i.e. before all
1934     * exceptions if there is no memory operand).  Exceptions are
1935     * vm86 checks (INTn, IRET, PUSHF/POPF), RSM and XSETBV (!).
1936     *
1937     * RSM and XSETBV will be handled in the gen_* functions
1938     * instead of using chk().
1939     */
1940    if (decode.e.check & X86_CHECK_cpl0) {
1941        if (CPL(s) != 0) {
1942            goto gp_fault;
1943        }
1944    }
1945    if (decode.e.intercept && unlikely(GUEST(s))) {
1946        gen_helper_svm_check_intercept(tcg_env,
1947                                       tcg_constant_i32(decode.e.intercept));
1948    }
1949    if (decode.e.check) {
1950        if ((decode.e.check & X86_CHECK_vm86_iopl) && VM86(s)) {
1951            if (IOPL(s) < 3) {
1952                goto gp_fault;
1953            }
1954        } else if (decode.e.check & X86_CHECK_cpl_iopl) {
1955            if (IOPL(s) < CPL(s)) {
1956                goto gp_fault;
1957            }
1958        }
1959    }
1960
1961    if (decode.e.special == X86_SPECIAL_MMX &&
1962        !(s->prefix & (PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA))) {
1963        gen_helper_enter_mmx(tcg_env);
1964    }
1965
1966    if (decode.op[0].has_ea || decode.op[1].has_ea || decode.op[2].has_ea) {
1967        gen_load_ea(s, &decode.mem, decode.e.vex_class == 12);
1968    }
1969    if (s->prefix & PREFIX_LOCK) {
1970        gen_load(s, &decode, 2, s->T1);
1971        decode.e.gen(s, env, &decode);
1972    } else {
1973        if (decode.op[0].unit == X86_OP_MMX) {
1974            compute_mmx_offset(&decode.op[0]);
1975        } else if (decode.op[0].unit == X86_OP_SSE) {
1976            compute_xmm_offset(&decode.op[0]);
1977        }
1978        gen_load(s, &decode, 1, s->T0);
1979        gen_load(s, &decode, 2, s->T1);
1980        decode.e.gen(s, env, &decode);
1981        gen_writeback(s, &decode, 0, s->T0);
1982    }
1983
1984    /*
1985     * Write back flags after last memory access.  Some newer ALU instructions, as
1986     * well as SSE instructions, write flags in the gen_* function, but that can
1987     * cause incorrect tracking of CC_OP for instructions that write to both memory
1988     * and flags.
1989     */
1990    if (decode.cc_op != -1) {
1991        if (decode.cc_dst) {
1992            tcg_gen_mov_tl(cpu_cc_dst, decode.cc_dst);
1993        }
1994        if (decode.cc_src) {
1995            tcg_gen_mov_tl(cpu_cc_src, decode.cc_src);
1996        }
1997        if (decode.cc_src2) {
1998            tcg_gen_mov_tl(cpu_cc_src2, decode.cc_src2);
1999        }
2000        if (decode.cc_op == CC_OP_DYNAMIC) {
2001            tcg_gen_mov_i32(cpu_cc_op, decode.cc_op_dynamic);
2002        }
2003        set_cc_op(s, decode.cc_op);
2004        cc_live = cc_op_live[decode.cc_op];
2005    } else {
2006        cc_live = 0;
2007    }
2008    if (decode.cc_op != CC_OP_DYNAMIC) {
2009        assert(!decode.cc_op_dynamic);
2010        assert(!!decode.cc_dst == !!(cc_live & USES_CC_DST));
2011        assert(!!decode.cc_src == !!(cc_live & USES_CC_SRC));
2012        assert(!!decode.cc_src2 == !!(cc_live & USES_CC_SRC2));
2013    }
2014
2015    return;
2016 gp_fault:
2017    gen_exception_gpf(s);
2018    return;
2019 illegal_op:
2020    gen_illegal_opcode(s);
2021    return;
2022 unknown_op:
2023    gen_unknown_opcode(env, s);
2024}
2025