xref: /qemu/target/i386/tcg/translate.c (revision 76eb88b1)
1 /*
2  *  i386 translation
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 #include "qemu/osdep.h"
20 
21 #include "qemu/host-utils.h"
22 #include "cpu.h"
23 #include "disas/disas.h"
24 #include "exec/exec-all.h"
25 #include "tcg/tcg-op.h"
26 #include "exec/cpu_ldst.h"
27 #include "exec/translator.h"
28 
29 #include "exec/helper-proto.h"
30 #include "exec/helper-gen.h"
31 #include "helper-tcg.h"
32 
33 #include "exec/log.h"
34 
35 #define PREFIX_REPZ   0x01
36 #define PREFIX_REPNZ  0x02
37 #define PREFIX_LOCK   0x04
38 #define PREFIX_DATA   0x08
39 #define PREFIX_ADR    0x10
40 #define PREFIX_VEX    0x20
41 #define PREFIX_REX    0x40
42 
43 #ifdef TARGET_X86_64
44 # define ctztl  ctz64
45 # define clztl  clz64
46 #else
47 # define ctztl  ctz32
48 # define clztl  clz32
49 #endif
50 
51 /* For a switch indexed by MODRM, match all memory operands for a given OP.  */
52 #define CASE_MODRM_MEM_OP(OP) \
53     case (0 << 6) | (OP << 3) | 0 ... (0 << 6) | (OP << 3) | 7: \
54     case (1 << 6) | (OP << 3) | 0 ... (1 << 6) | (OP << 3) | 7: \
55     case (2 << 6) | (OP << 3) | 0 ... (2 << 6) | (OP << 3) | 7
56 
57 #define CASE_MODRM_OP(OP) \
58     case (0 << 6) | (OP << 3) | 0 ... (0 << 6) | (OP << 3) | 7: \
59     case (1 << 6) | (OP << 3) | 0 ... (1 << 6) | (OP << 3) | 7: \
60     case (2 << 6) | (OP << 3) | 0 ... (2 << 6) | (OP << 3) | 7: \
61     case (3 << 6) | (OP << 3) | 0 ... (3 << 6) | (OP << 3) | 7
62 
63 //#define MACRO_TEST   1
64 
65 /* global register indexes */
66 static TCGv cpu_cc_dst, cpu_cc_src, cpu_cc_src2;
67 static TCGv_i32 cpu_cc_op;
68 static TCGv cpu_regs[CPU_NB_REGS];
69 static TCGv cpu_seg_base[6];
70 static TCGv_i64 cpu_bndl[4];
71 static TCGv_i64 cpu_bndu[4];
72 
73 #include "exec/gen-icount.h"
74 
75 typedef struct DisasContext {
76     DisasContextBase base;
77 
78     target_ulong pc;       /* pc = eip + cs_base */
79     target_ulong pc_start; /* pc at TB entry */
80     target_ulong cs_base;  /* base of CS segment */
81 
82     MemOp aflag;
83     MemOp dflag;
84 
85     int8_t override; /* -1 if no override, else R_CS, R_DS, etc */
86     uint8_t prefix;
87 
88 #ifndef CONFIG_USER_ONLY
89     uint8_t cpl;   /* code priv level */
90     uint8_t iopl;  /* i/o priv level */
91 #endif
92     uint8_t vex_l;  /* vex vector length */
93     uint8_t vex_v;  /* vex vvvv register, without 1's complement.  */
94     uint8_t popl_esp_hack; /* for correct popl with esp base handling */
95     uint8_t rip_offset; /* only used in x86_64, but left for simplicity */
96 
97 #ifdef TARGET_X86_64
98     uint8_t rex_r;
99     uint8_t rex_x;
100     uint8_t rex_b;
101     bool rex_w;
102 #endif
103     bool jmp_opt; /* use direct block chaining for direct jumps */
104     bool repz_opt; /* optimize jumps within repz instructions */
105     bool cc_op_dirty;
106 
107     CCOp cc_op;  /* current CC operation */
108     int mem_index; /* select memory access functions */
109     uint32_t flags; /* all execution flags */
110     int cpuid_features;
111     int cpuid_ext_features;
112     int cpuid_ext2_features;
113     int cpuid_ext3_features;
114     int cpuid_7_0_ebx_features;
115     int cpuid_xsave_features;
116 
117     /* TCG local temps */
118     TCGv cc_srcT;
119     TCGv A0;
120     TCGv T0;
121     TCGv T1;
122 
123     /* TCG local register indexes (only used inside old micro ops) */
124     TCGv tmp0;
125     TCGv tmp4;
126     TCGv_ptr ptr0;
127     TCGv_ptr ptr1;
128     TCGv_i32 tmp2_i32;
129     TCGv_i32 tmp3_i32;
130     TCGv_i64 tmp1_i64;
131 
132     sigjmp_buf jmpbuf;
133     TCGOp *prev_insn_end;
134 } DisasContext;
135 
136 /* The environment in which user-only runs is constrained. */
137 #ifdef CONFIG_USER_ONLY
138 #define PE(S)     true
139 #define CPL(S)    3
140 #define IOPL(S)   0
141 #define SVME(S)   false
142 #define GUEST(S)  false
143 #else
144 #define PE(S)     (((S)->flags & HF_PE_MASK) != 0)
145 #define CPL(S)    ((S)->cpl)
146 #define IOPL(S)   ((S)->iopl)
147 #define SVME(S)   (((S)->flags & HF_SVME_MASK) != 0)
148 #define GUEST(S)  (((S)->flags & HF_GUEST_MASK) != 0)
149 #endif
150 #if defined(CONFIG_USER_ONLY) && defined(TARGET_X86_64)
151 #define VM86(S)   false
152 #define CODE32(S) true
153 #define SS32(S)   true
154 #define ADDSEG(S) false
155 #else
156 #define VM86(S)   (((S)->flags & HF_VM_MASK) != 0)
157 #define CODE32(S) (((S)->flags & HF_CS32_MASK) != 0)
158 #define SS32(S)   (((S)->flags & HF_SS32_MASK) != 0)
159 #define ADDSEG(S) (((S)->flags & HF_ADDSEG_MASK) != 0)
160 #endif
161 #if !defined(TARGET_X86_64)
162 #define CODE64(S) false
163 #define LMA(S)    false
164 #elif defined(CONFIG_USER_ONLY)
165 #define CODE64(S) true
166 #define LMA(S)    true
167 #else
168 #define CODE64(S) (((S)->flags & HF_CS64_MASK) != 0)
169 #define LMA(S)    (((S)->flags & HF_LMA_MASK) != 0)
170 #endif
171 
172 #ifdef TARGET_X86_64
173 #define REX_PREFIX(S)  (((S)->prefix & PREFIX_REX) != 0)
174 #define REX_W(S)       ((S)->rex_w)
175 #define REX_R(S)       ((S)->rex_r + 0)
176 #define REX_X(S)       ((S)->rex_x + 0)
177 #define REX_B(S)       ((S)->rex_b + 0)
178 #else
179 #define REX_PREFIX(S)  false
180 #define REX_W(S)       false
181 #define REX_R(S)       0
182 #define REX_X(S)       0
183 #define REX_B(S)       0
184 #endif
185 
186 /*
187  * Many sysemu-only helpers are not reachable for user-only.
188  * Define stub generators here, so that we need not either sprinkle
189  * ifdefs through the translator, nor provide the helper function.
190  */
191 #define STUB_HELPER(NAME, ...) \
192     static inline void gen_helper_##NAME(__VA_ARGS__) \
193     { qemu_build_not_reached(); }
194 
195 #ifdef CONFIG_USER_ONLY
196 STUB_HELPER(clgi, TCGv_env env)
197 STUB_HELPER(flush_page, TCGv_env env, TCGv addr)
198 STUB_HELPER(hlt, TCGv_env env, TCGv_i32 pc_ofs)
199 STUB_HELPER(inb, TCGv ret, TCGv_env env, TCGv_i32 port)
200 STUB_HELPER(inw, TCGv ret, TCGv_env env, TCGv_i32 port)
201 STUB_HELPER(inl, TCGv ret, TCGv_env env, TCGv_i32 port)
202 STUB_HELPER(monitor, TCGv_env env, TCGv addr)
203 STUB_HELPER(mwait, TCGv_env env, TCGv_i32 pc_ofs)
204 STUB_HELPER(outb, TCGv_env env, TCGv_i32 port, TCGv_i32 val)
205 STUB_HELPER(outw, TCGv_env env, TCGv_i32 port, TCGv_i32 val)
206 STUB_HELPER(outl, TCGv_env env, TCGv_i32 port, TCGv_i32 val)
207 STUB_HELPER(rdmsr, TCGv_env env)
208 STUB_HELPER(read_crN, TCGv ret, TCGv_env env, TCGv_i32 reg)
209 STUB_HELPER(get_dr, TCGv ret, TCGv_env env, TCGv_i32 reg)
210 STUB_HELPER(set_dr, TCGv_env env, TCGv_i32 reg, TCGv val)
211 STUB_HELPER(stgi, TCGv_env env)
212 STUB_HELPER(svm_check_intercept, TCGv_env env, TCGv_i32 type)
213 STUB_HELPER(vmload, TCGv_env env, TCGv_i32 aflag)
214 STUB_HELPER(vmmcall, TCGv_env env)
215 STUB_HELPER(vmrun, TCGv_env env, TCGv_i32 aflag, TCGv_i32 pc_ofs)
216 STUB_HELPER(vmsave, TCGv_env env, TCGv_i32 aflag)
217 STUB_HELPER(write_crN, TCGv_env env, TCGv_i32 reg, TCGv val)
218 STUB_HELPER(wrmsr, TCGv_env env)
219 #endif
220 
221 static void gen_eob(DisasContext *s);
222 static void gen_jr(DisasContext *s, TCGv dest);
223 static void gen_jmp(DisasContext *s, target_ulong eip);
224 static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num);
225 static void gen_op(DisasContext *s1, int op, MemOp ot, int d);
226 static void gen_exception_gpf(DisasContext *s);
227 
228 /* i386 arith/logic operations */
229 enum {
230     OP_ADDL,
231     OP_ORL,
232     OP_ADCL,
233     OP_SBBL,
234     OP_ANDL,
235     OP_SUBL,
236     OP_XORL,
237     OP_CMPL,
238 };
239 
240 /* i386 shift ops */
241 enum {
242     OP_ROL,
243     OP_ROR,
244     OP_RCL,
245     OP_RCR,
246     OP_SHL,
247     OP_SHR,
248     OP_SHL1, /* undocumented */
249     OP_SAR = 7,
250 };
251 
252 enum {
253     JCC_O,
254     JCC_B,
255     JCC_Z,
256     JCC_BE,
257     JCC_S,
258     JCC_P,
259     JCC_L,
260     JCC_LE,
261 };
262 
263 enum {
264     /* I386 int registers */
265     OR_EAX,   /* MUST be even numbered */
266     OR_ECX,
267     OR_EDX,
268     OR_EBX,
269     OR_ESP,
270     OR_EBP,
271     OR_ESI,
272     OR_EDI,
273 
274     OR_TMP0 = 16,    /* temporary operand register */
275     OR_TMP1,
276     OR_A0, /* temporary register used when doing address evaluation */
277 };
278 
279 enum {
280     USES_CC_DST  = 1,
281     USES_CC_SRC  = 2,
282     USES_CC_SRC2 = 4,
283     USES_CC_SRCT = 8,
284 };
285 
286 /* Bit set if the global variable is live after setting CC_OP to X.  */
287 static const uint8_t cc_op_live[CC_OP_NB] = {
288     [CC_OP_DYNAMIC] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
289     [CC_OP_EFLAGS] = USES_CC_SRC,
290     [CC_OP_MULB ... CC_OP_MULQ] = USES_CC_DST | USES_CC_SRC,
291     [CC_OP_ADDB ... CC_OP_ADDQ] = USES_CC_DST | USES_CC_SRC,
292     [CC_OP_ADCB ... CC_OP_ADCQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
293     [CC_OP_SUBB ... CC_OP_SUBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRCT,
294     [CC_OP_SBBB ... CC_OP_SBBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
295     [CC_OP_LOGICB ... CC_OP_LOGICQ] = USES_CC_DST,
296     [CC_OP_INCB ... CC_OP_INCQ] = USES_CC_DST | USES_CC_SRC,
297     [CC_OP_DECB ... CC_OP_DECQ] = USES_CC_DST | USES_CC_SRC,
298     [CC_OP_SHLB ... CC_OP_SHLQ] = USES_CC_DST | USES_CC_SRC,
299     [CC_OP_SARB ... CC_OP_SARQ] = USES_CC_DST | USES_CC_SRC,
300     [CC_OP_BMILGB ... CC_OP_BMILGQ] = USES_CC_DST | USES_CC_SRC,
301     [CC_OP_ADCX] = USES_CC_DST | USES_CC_SRC,
302     [CC_OP_ADOX] = USES_CC_SRC | USES_CC_SRC2,
303     [CC_OP_ADCOX] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
304     [CC_OP_CLR] = 0,
305     [CC_OP_POPCNT] = USES_CC_SRC,
306 };
307 
308 static void set_cc_op(DisasContext *s, CCOp op)
309 {
310     int dead;
311 
312     if (s->cc_op == op) {
313         return;
314     }
315 
316     /* Discard CC computation that will no longer be used.  */
317     dead = cc_op_live[s->cc_op] & ~cc_op_live[op];
318     if (dead & USES_CC_DST) {
319         tcg_gen_discard_tl(cpu_cc_dst);
320     }
321     if (dead & USES_CC_SRC) {
322         tcg_gen_discard_tl(cpu_cc_src);
323     }
324     if (dead & USES_CC_SRC2) {
325         tcg_gen_discard_tl(cpu_cc_src2);
326     }
327     if (dead & USES_CC_SRCT) {
328         tcg_gen_discard_tl(s->cc_srcT);
329     }
330 
331     if (op == CC_OP_DYNAMIC) {
332         /* The DYNAMIC setting is translator only, and should never be
333            stored.  Thus we always consider it clean.  */
334         s->cc_op_dirty = false;
335     } else {
336         /* Discard any computed CC_OP value (see shifts).  */
337         if (s->cc_op == CC_OP_DYNAMIC) {
338             tcg_gen_discard_i32(cpu_cc_op);
339         }
340         s->cc_op_dirty = true;
341     }
342     s->cc_op = op;
343 }
344 
345 static void gen_update_cc_op(DisasContext *s)
346 {
347     if (s->cc_op_dirty) {
348         tcg_gen_movi_i32(cpu_cc_op, s->cc_op);
349         s->cc_op_dirty = false;
350     }
351 }
352 
353 #ifdef TARGET_X86_64
354 
355 #define NB_OP_SIZES 4
356 
357 #else /* !TARGET_X86_64 */
358 
359 #define NB_OP_SIZES 3
360 
361 #endif /* !TARGET_X86_64 */
362 
363 #if HOST_BIG_ENDIAN
364 #define REG_B_OFFSET (sizeof(target_ulong) - 1)
365 #define REG_H_OFFSET (sizeof(target_ulong) - 2)
366 #define REG_W_OFFSET (sizeof(target_ulong) - 2)
367 #define REG_L_OFFSET (sizeof(target_ulong) - 4)
368 #define REG_LH_OFFSET (sizeof(target_ulong) - 8)
369 #else
370 #define REG_B_OFFSET 0
371 #define REG_H_OFFSET 1
372 #define REG_W_OFFSET 0
373 #define REG_L_OFFSET 0
374 #define REG_LH_OFFSET 4
375 #endif
376 
377 /* In instruction encodings for byte register accesses the
378  * register number usually indicates "low 8 bits of register N";
379  * however there are some special cases where N 4..7 indicates
380  * [AH, CH, DH, BH], ie "bits 15..8 of register N-4". Return
381  * true for this special case, false otherwise.
382  */
383 static inline bool byte_reg_is_xH(DisasContext *s, int reg)
384 {
385     /* Any time the REX prefix is present, byte registers are uniform */
386     if (reg < 4 || REX_PREFIX(s)) {
387         return false;
388     }
389     return true;
390 }
391 
392 /* Select the size of a push/pop operation.  */
393 static inline MemOp mo_pushpop(DisasContext *s, MemOp ot)
394 {
395     if (CODE64(s)) {
396         return ot == MO_16 ? MO_16 : MO_64;
397     } else {
398         return ot;
399     }
400 }
401 
402 /* Select the size of the stack pointer.  */
403 static inline MemOp mo_stacksize(DisasContext *s)
404 {
405     return CODE64(s) ? MO_64 : SS32(s) ? MO_32 : MO_16;
406 }
407 
408 /* Select only size 64 else 32.  Used for SSE operand sizes.  */
409 static inline MemOp mo_64_32(MemOp ot)
410 {
411 #ifdef TARGET_X86_64
412     return ot == MO_64 ? MO_64 : MO_32;
413 #else
414     return MO_32;
415 #endif
416 }
417 
418 /* Select size 8 if lsb of B is clear, else OT.  Used for decoding
419    byte vs word opcodes.  */
420 static inline MemOp mo_b_d(int b, MemOp ot)
421 {
422     return b & 1 ? ot : MO_8;
423 }
424 
425 /* Select size 8 if lsb of B is clear, else OT capped at 32.
426    Used for decoding operand size of port opcodes.  */
427 static inline MemOp mo_b_d32(int b, MemOp ot)
428 {
429     return b & 1 ? (ot == MO_16 ? MO_16 : MO_32) : MO_8;
430 }
431 
432 static void gen_op_mov_reg_v(DisasContext *s, MemOp ot, int reg, TCGv t0)
433 {
434     switch(ot) {
435     case MO_8:
436         if (!byte_reg_is_xH(s, reg)) {
437             tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 8);
438         } else {
439             tcg_gen_deposit_tl(cpu_regs[reg - 4], cpu_regs[reg - 4], t0, 8, 8);
440         }
441         break;
442     case MO_16:
443         tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 16);
444         break;
445     case MO_32:
446         /* For x86_64, this sets the higher half of register to zero.
447            For i386, this is equivalent to a mov. */
448         tcg_gen_ext32u_tl(cpu_regs[reg], t0);
449         break;
450 #ifdef TARGET_X86_64
451     case MO_64:
452         tcg_gen_mov_tl(cpu_regs[reg], t0);
453         break;
454 #endif
455     default:
456         tcg_abort();
457     }
458 }
459 
460 static inline
461 void gen_op_mov_v_reg(DisasContext *s, MemOp ot, TCGv t0, int reg)
462 {
463     if (ot == MO_8 && byte_reg_is_xH(s, reg)) {
464         tcg_gen_extract_tl(t0, cpu_regs[reg - 4], 8, 8);
465     } else {
466         tcg_gen_mov_tl(t0, cpu_regs[reg]);
467     }
468 }
469 
470 static void gen_add_A0_im(DisasContext *s, int val)
471 {
472     tcg_gen_addi_tl(s->A0, s->A0, val);
473     if (!CODE64(s)) {
474         tcg_gen_ext32u_tl(s->A0, s->A0);
475     }
476 }
477 
478 static inline void gen_op_jmp_v(TCGv dest)
479 {
480     tcg_gen_st_tl(dest, cpu_env, offsetof(CPUX86State, eip));
481 }
482 
483 static inline
484 void gen_op_add_reg_im(DisasContext *s, MemOp size, int reg, int32_t val)
485 {
486     tcg_gen_addi_tl(s->tmp0, cpu_regs[reg], val);
487     gen_op_mov_reg_v(s, size, reg, s->tmp0);
488 }
489 
490 static inline void gen_op_add_reg_T0(DisasContext *s, MemOp size, int reg)
491 {
492     tcg_gen_add_tl(s->tmp0, cpu_regs[reg], s->T0);
493     gen_op_mov_reg_v(s, size, reg, s->tmp0);
494 }
495 
496 static inline void gen_op_ld_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
497 {
498     tcg_gen_qemu_ld_tl(t0, a0, s->mem_index, idx | MO_LE);
499 }
500 
501 static inline void gen_op_st_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
502 {
503     tcg_gen_qemu_st_tl(t0, a0, s->mem_index, idx | MO_LE);
504 }
505 
506 static inline void gen_op_st_rm_T0_A0(DisasContext *s, int idx, int d)
507 {
508     if (d == OR_TMP0) {
509         gen_op_st_v(s, idx, s->T0, s->A0);
510     } else {
511         gen_op_mov_reg_v(s, idx, d, s->T0);
512     }
513 }
514 
515 static inline void gen_jmp_im(DisasContext *s, target_ulong pc)
516 {
517     tcg_gen_movi_tl(s->tmp0, pc);
518     gen_op_jmp_v(s->tmp0);
519 }
520 
521 /* Compute SEG:REG into A0.  SEG is selected from the override segment
522    (OVR_SEG) and the default segment (DEF_SEG).  OVR_SEG may be -1 to
523    indicate no override.  */
524 static void gen_lea_v_seg(DisasContext *s, MemOp aflag, TCGv a0,
525                           int def_seg, int ovr_seg)
526 {
527     switch (aflag) {
528 #ifdef TARGET_X86_64
529     case MO_64:
530         if (ovr_seg < 0) {
531             tcg_gen_mov_tl(s->A0, a0);
532             return;
533         }
534         break;
535 #endif
536     case MO_32:
537         /* 32 bit address */
538         if (ovr_seg < 0 && ADDSEG(s)) {
539             ovr_seg = def_seg;
540         }
541         if (ovr_seg < 0) {
542             tcg_gen_ext32u_tl(s->A0, a0);
543             return;
544         }
545         break;
546     case MO_16:
547         /* 16 bit address */
548         tcg_gen_ext16u_tl(s->A0, a0);
549         a0 = s->A0;
550         if (ovr_seg < 0) {
551             if (ADDSEG(s)) {
552                 ovr_seg = def_seg;
553             } else {
554                 return;
555             }
556         }
557         break;
558     default:
559         tcg_abort();
560     }
561 
562     if (ovr_seg >= 0) {
563         TCGv seg = cpu_seg_base[ovr_seg];
564 
565         if (aflag == MO_64) {
566             tcg_gen_add_tl(s->A0, a0, seg);
567         } else if (CODE64(s)) {
568             tcg_gen_ext32u_tl(s->A0, a0);
569             tcg_gen_add_tl(s->A0, s->A0, seg);
570         } else {
571             tcg_gen_add_tl(s->A0, a0, seg);
572             tcg_gen_ext32u_tl(s->A0, s->A0);
573         }
574     }
575 }
576 
577 static inline void gen_string_movl_A0_ESI(DisasContext *s)
578 {
579     gen_lea_v_seg(s, s->aflag, cpu_regs[R_ESI], R_DS, s->override);
580 }
581 
582 static inline void gen_string_movl_A0_EDI(DisasContext *s)
583 {
584     gen_lea_v_seg(s, s->aflag, cpu_regs[R_EDI], R_ES, -1);
585 }
586 
587 static inline void gen_op_movl_T0_Dshift(DisasContext *s, MemOp ot)
588 {
589     tcg_gen_ld32s_tl(s->T0, cpu_env, offsetof(CPUX86State, df));
590     tcg_gen_shli_tl(s->T0, s->T0, ot);
591 };
592 
593 static TCGv gen_ext_tl(TCGv dst, TCGv src, MemOp size, bool sign)
594 {
595     switch (size) {
596     case MO_8:
597         if (sign) {
598             tcg_gen_ext8s_tl(dst, src);
599         } else {
600             tcg_gen_ext8u_tl(dst, src);
601         }
602         return dst;
603     case MO_16:
604         if (sign) {
605             tcg_gen_ext16s_tl(dst, src);
606         } else {
607             tcg_gen_ext16u_tl(dst, src);
608         }
609         return dst;
610 #ifdef TARGET_X86_64
611     case MO_32:
612         if (sign) {
613             tcg_gen_ext32s_tl(dst, src);
614         } else {
615             tcg_gen_ext32u_tl(dst, src);
616         }
617         return dst;
618 #endif
619     default:
620         return src;
621     }
622 }
623 
624 static void gen_extu(MemOp ot, TCGv reg)
625 {
626     gen_ext_tl(reg, reg, ot, false);
627 }
628 
629 static void gen_exts(MemOp ot, TCGv reg)
630 {
631     gen_ext_tl(reg, reg, ot, true);
632 }
633 
634 static inline
635 void gen_op_jnz_ecx(DisasContext *s, MemOp size, TCGLabel *label1)
636 {
637     tcg_gen_mov_tl(s->tmp0, cpu_regs[R_ECX]);
638     gen_extu(size, s->tmp0);
639     tcg_gen_brcondi_tl(TCG_COND_NE, s->tmp0, 0, label1);
640 }
641 
642 static inline
643 void gen_op_jz_ecx(DisasContext *s, MemOp size, TCGLabel *label1)
644 {
645     tcg_gen_mov_tl(s->tmp0, cpu_regs[R_ECX]);
646     gen_extu(size, s->tmp0);
647     tcg_gen_brcondi_tl(TCG_COND_EQ, s->tmp0, 0, label1);
648 }
649 
650 static void gen_helper_in_func(MemOp ot, TCGv v, TCGv_i32 n)
651 {
652     switch (ot) {
653     case MO_8:
654         gen_helper_inb(v, cpu_env, n);
655         break;
656     case MO_16:
657         gen_helper_inw(v, cpu_env, n);
658         break;
659     case MO_32:
660         gen_helper_inl(v, cpu_env, n);
661         break;
662     default:
663         tcg_abort();
664     }
665 }
666 
667 static void gen_helper_out_func(MemOp ot, TCGv_i32 v, TCGv_i32 n)
668 {
669     switch (ot) {
670     case MO_8:
671         gen_helper_outb(cpu_env, v, n);
672         break;
673     case MO_16:
674         gen_helper_outw(cpu_env, v, n);
675         break;
676     case MO_32:
677         gen_helper_outl(cpu_env, v, n);
678         break;
679     default:
680         tcg_abort();
681     }
682 }
683 
684 /*
685  * Validate that access to [port, port + 1<<ot) is allowed.
686  * Raise #GP, or VMM exit if not.
687  */
688 static bool gen_check_io(DisasContext *s, MemOp ot, TCGv_i32 port,
689                          uint32_t svm_flags)
690 {
691 #ifdef CONFIG_USER_ONLY
692     /*
693      * We do not implement the ioperm(2) syscall, so the TSS check
694      * will always fail.
695      */
696     gen_exception_gpf(s);
697     return false;
698 #else
699     if (PE(s) && (CPL(s) > IOPL(s) || VM86(s))) {
700         gen_helper_check_io(cpu_env, port, tcg_constant_i32(1 << ot));
701     }
702     if (GUEST(s)) {
703         target_ulong cur_eip = s->base.pc_next - s->cs_base;
704         target_ulong next_eip = s->pc - s->cs_base;
705 
706         gen_update_cc_op(s);
707         gen_jmp_im(s, cur_eip);
708         if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
709             svm_flags |= SVM_IOIO_REP_MASK;
710         }
711         svm_flags |= 1 << (SVM_IOIO_SIZE_SHIFT + ot);
712         gen_helper_svm_check_io(cpu_env, port,
713                                 tcg_constant_i32(svm_flags),
714                                 tcg_constant_i32(next_eip - cur_eip));
715     }
716     return true;
717 #endif
718 }
719 
720 static inline void gen_movs(DisasContext *s, MemOp ot)
721 {
722     gen_string_movl_A0_ESI(s);
723     gen_op_ld_v(s, ot, s->T0, s->A0);
724     gen_string_movl_A0_EDI(s);
725     gen_op_st_v(s, ot, s->T0, s->A0);
726     gen_op_movl_T0_Dshift(s, ot);
727     gen_op_add_reg_T0(s, s->aflag, R_ESI);
728     gen_op_add_reg_T0(s, s->aflag, R_EDI);
729 }
730 
731 static void gen_op_update1_cc(DisasContext *s)
732 {
733     tcg_gen_mov_tl(cpu_cc_dst, s->T0);
734 }
735 
736 static void gen_op_update2_cc(DisasContext *s)
737 {
738     tcg_gen_mov_tl(cpu_cc_src, s->T1);
739     tcg_gen_mov_tl(cpu_cc_dst, s->T0);
740 }
741 
742 static void gen_op_update3_cc(DisasContext *s, TCGv reg)
743 {
744     tcg_gen_mov_tl(cpu_cc_src2, reg);
745     tcg_gen_mov_tl(cpu_cc_src, s->T1);
746     tcg_gen_mov_tl(cpu_cc_dst, s->T0);
747 }
748 
749 static inline void gen_op_testl_T0_T1_cc(DisasContext *s)
750 {
751     tcg_gen_and_tl(cpu_cc_dst, s->T0, s->T1);
752 }
753 
754 static void gen_op_update_neg_cc(DisasContext *s)
755 {
756     tcg_gen_mov_tl(cpu_cc_dst, s->T0);
757     tcg_gen_neg_tl(cpu_cc_src, s->T0);
758     tcg_gen_movi_tl(s->cc_srcT, 0);
759 }
760 
761 /* compute all eflags to cc_src */
762 static void gen_compute_eflags(DisasContext *s)
763 {
764     TCGv zero, dst, src1, src2;
765     int live, dead;
766 
767     if (s->cc_op == CC_OP_EFLAGS) {
768         return;
769     }
770     if (s->cc_op == CC_OP_CLR) {
771         tcg_gen_movi_tl(cpu_cc_src, CC_Z | CC_P);
772         set_cc_op(s, CC_OP_EFLAGS);
773         return;
774     }
775 
776     zero = NULL;
777     dst = cpu_cc_dst;
778     src1 = cpu_cc_src;
779     src2 = cpu_cc_src2;
780 
781     /* Take care to not read values that are not live.  */
782     live = cc_op_live[s->cc_op] & ~USES_CC_SRCT;
783     dead = live ^ (USES_CC_DST | USES_CC_SRC | USES_CC_SRC2);
784     if (dead) {
785         zero = tcg_const_tl(0);
786         if (dead & USES_CC_DST) {
787             dst = zero;
788         }
789         if (dead & USES_CC_SRC) {
790             src1 = zero;
791         }
792         if (dead & USES_CC_SRC2) {
793             src2 = zero;
794         }
795     }
796 
797     gen_update_cc_op(s);
798     gen_helper_cc_compute_all(cpu_cc_src, dst, src1, src2, cpu_cc_op);
799     set_cc_op(s, CC_OP_EFLAGS);
800 
801     if (dead) {
802         tcg_temp_free(zero);
803     }
804 }
805 
806 typedef struct CCPrepare {
807     TCGCond cond;
808     TCGv reg;
809     TCGv reg2;
810     target_ulong imm;
811     target_ulong mask;
812     bool use_reg2;
813     bool no_setcond;
814 } CCPrepare;
815 
816 /* compute eflags.C to reg */
817 static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
818 {
819     TCGv t0, t1;
820     int size, shift;
821 
822     switch (s->cc_op) {
823     case CC_OP_SUBB ... CC_OP_SUBQ:
824         /* (DATA_TYPE)CC_SRCT < (DATA_TYPE)CC_SRC */
825         size = s->cc_op - CC_OP_SUBB;
826         t1 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
827         /* If no temporary was used, be careful not to alias t1 and t0.  */
828         t0 = t1 == cpu_cc_src ? s->tmp0 : reg;
829         tcg_gen_mov_tl(t0, s->cc_srcT);
830         gen_extu(size, t0);
831         goto add_sub;
832 
833     case CC_OP_ADDB ... CC_OP_ADDQ:
834         /* (DATA_TYPE)CC_DST < (DATA_TYPE)CC_SRC */
835         size = s->cc_op - CC_OP_ADDB;
836         t1 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
837         t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
838     add_sub:
839         return (CCPrepare) { .cond = TCG_COND_LTU, .reg = t0,
840                              .reg2 = t1, .mask = -1, .use_reg2 = true };
841 
842     case CC_OP_LOGICB ... CC_OP_LOGICQ:
843     case CC_OP_CLR:
844     case CC_OP_POPCNT:
845         return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
846 
847     case CC_OP_INCB ... CC_OP_INCQ:
848     case CC_OP_DECB ... CC_OP_DECQ:
849         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
850                              .mask = -1, .no_setcond = true };
851 
852     case CC_OP_SHLB ... CC_OP_SHLQ:
853         /* (CC_SRC >> (DATA_BITS - 1)) & 1 */
854         size = s->cc_op - CC_OP_SHLB;
855         shift = (8 << size) - 1;
856         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
857                              .mask = (target_ulong)1 << shift };
858 
859     case CC_OP_MULB ... CC_OP_MULQ:
860         return (CCPrepare) { .cond = TCG_COND_NE,
861                              .reg = cpu_cc_src, .mask = -1 };
862 
863     case CC_OP_BMILGB ... CC_OP_BMILGQ:
864         size = s->cc_op - CC_OP_BMILGB;
865         t0 = gen_ext_tl(reg, cpu_cc_src, size, false);
866         return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
867 
868     case CC_OP_ADCX:
869     case CC_OP_ADCOX:
870         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_dst,
871                              .mask = -1, .no_setcond = true };
872 
873     case CC_OP_EFLAGS:
874     case CC_OP_SARB ... CC_OP_SARQ:
875         /* CC_SRC & 1 */
876         return (CCPrepare) { .cond = TCG_COND_NE,
877                              .reg = cpu_cc_src, .mask = CC_C };
878 
879     default:
880        /* The need to compute only C from CC_OP_DYNAMIC is important
881           in efficiently implementing e.g. INC at the start of a TB.  */
882        gen_update_cc_op(s);
883        gen_helper_cc_compute_c(reg, cpu_cc_dst, cpu_cc_src,
884                                cpu_cc_src2, cpu_cc_op);
885        return (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
886                             .mask = -1, .no_setcond = true };
887     }
888 }
889 
890 /* compute eflags.P to reg */
891 static CCPrepare gen_prepare_eflags_p(DisasContext *s, TCGv reg)
892 {
893     gen_compute_eflags(s);
894     return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
895                          .mask = CC_P };
896 }
897 
898 /* compute eflags.S to reg */
899 static CCPrepare gen_prepare_eflags_s(DisasContext *s, TCGv reg)
900 {
901     switch (s->cc_op) {
902     case CC_OP_DYNAMIC:
903         gen_compute_eflags(s);
904         /* FALLTHRU */
905     case CC_OP_EFLAGS:
906     case CC_OP_ADCX:
907     case CC_OP_ADOX:
908     case CC_OP_ADCOX:
909         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
910                              .mask = CC_S };
911     case CC_OP_CLR:
912     case CC_OP_POPCNT:
913         return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
914     default:
915         {
916             MemOp size = (s->cc_op - CC_OP_ADDB) & 3;
917             TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, true);
918             return (CCPrepare) { .cond = TCG_COND_LT, .reg = t0, .mask = -1 };
919         }
920     }
921 }
922 
923 /* compute eflags.O to reg */
924 static CCPrepare gen_prepare_eflags_o(DisasContext *s, TCGv reg)
925 {
926     switch (s->cc_op) {
927     case CC_OP_ADOX:
928     case CC_OP_ADCOX:
929         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src2,
930                              .mask = -1, .no_setcond = true };
931     case CC_OP_CLR:
932     case CC_OP_POPCNT:
933         return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
934     default:
935         gen_compute_eflags(s);
936         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
937                              .mask = CC_O };
938     }
939 }
940 
941 /* compute eflags.Z to reg */
942 static CCPrepare gen_prepare_eflags_z(DisasContext *s, TCGv reg)
943 {
944     switch (s->cc_op) {
945     case CC_OP_DYNAMIC:
946         gen_compute_eflags(s);
947         /* FALLTHRU */
948     case CC_OP_EFLAGS:
949     case CC_OP_ADCX:
950     case CC_OP_ADOX:
951     case CC_OP_ADCOX:
952         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
953                              .mask = CC_Z };
954     case CC_OP_CLR:
955         return (CCPrepare) { .cond = TCG_COND_ALWAYS, .mask = -1 };
956     case CC_OP_POPCNT:
957         return (CCPrepare) { .cond = TCG_COND_EQ, .reg = cpu_cc_src,
958                              .mask = -1 };
959     default:
960         {
961             MemOp size = (s->cc_op - CC_OP_ADDB) & 3;
962             TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
963             return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
964         }
965     }
966 }
967 
968 /* perform a conditional store into register 'reg' according to jump opcode
969    value 'b'. In the fast case, T0 is guaranted not to be used. */
970 static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
971 {
972     int inv, jcc_op, cond;
973     MemOp size;
974     CCPrepare cc;
975     TCGv t0;
976 
977     inv = b & 1;
978     jcc_op = (b >> 1) & 7;
979 
980     switch (s->cc_op) {
981     case CC_OP_SUBB ... CC_OP_SUBQ:
982         /* We optimize relational operators for the cmp/jcc case.  */
983         size = s->cc_op - CC_OP_SUBB;
984         switch (jcc_op) {
985         case JCC_BE:
986             tcg_gen_mov_tl(s->tmp4, s->cc_srcT);
987             gen_extu(size, s->tmp4);
988             t0 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
989             cc = (CCPrepare) { .cond = TCG_COND_LEU, .reg = s->tmp4,
990                                .reg2 = t0, .mask = -1, .use_reg2 = true };
991             break;
992 
993         case JCC_L:
994             cond = TCG_COND_LT;
995             goto fast_jcc_l;
996         case JCC_LE:
997             cond = TCG_COND_LE;
998         fast_jcc_l:
999             tcg_gen_mov_tl(s->tmp4, s->cc_srcT);
1000             gen_exts(size, s->tmp4);
1001             t0 = gen_ext_tl(s->tmp0, cpu_cc_src, size, true);
1002             cc = (CCPrepare) { .cond = cond, .reg = s->tmp4,
1003                                .reg2 = t0, .mask = -1, .use_reg2 = true };
1004             break;
1005 
1006         default:
1007             goto slow_jcc;
1008         }
1009         break;
1010 
1011     default:
1012     slow_jcc:
1013         /* This actually generates good code for JC, JZ and JS.  */
1014         switch (jcc_op) {
1015         case JCC_O:
1016             cc = gen_prepare_eflags_o(s, reg);
1017             break;
1018         case JCC_B:
1019             cc = gen_prepare_eflags_c(s, reg);
1020             break;
1021         case JCC_Z:
1022             cc = gen_prepare_eflags_z(s, reg);
1023             break;
1024         case JCC_BE:
1025             gen_compute_eflags(s);
1026             cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
1027                                .mask = CC_Z | CC_C };
1028             break;
1029         case JCC_S:
1030             cc = gen_prepare_eflags_s(s, reg);
1031             break;
1032         case JCC_P:
1033             cc = gen_prepare_eflags_p(s, reg);
1034             break;
1035         case JCC_L:
1036             gen_compute_eflags(s);
1037             if (reg == cpu_cc_src) {
1038                 reg = s->tmp0;
1039             }
1040             tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
1041             tcg_gen_xor_tl(reg, reg, cpu_cc_src);
1042             cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
1043                                .mask = CC_S };
1044             break;
1045         default:
1046         case JCC_LE:
1047             gen_compute_eflags(s);
1048             if (reg == cpu_cc_src) {
1049                 reg = s->tmp0;
1050             }
1051             tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
1052             tcg_gen_xor_tl(reg, reg, cpu_cc_src);
1053             cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
1054                                .mask = CC_S | CC_Z };
1055             break;
1056         }
1057         break;
1058     }
1059 
1060     if (inv) {
1061         cc.cond = tcg_invert_cond(cc.cond);
1062     }
1063     return cc;
1064 }
1065 
1066 static void gen_setcc1(DisasContext *s, int b, TCGv reg)
1067 {
1068     CCPrepare cc = gen_prepare_cc(s, b, reg);
1069 
1070     if (cc.no_setcond) {
1071         if (cc.cond == TCG_COND_EQ) {
1072             tcg_gen_xori_tl(reg, cc.reg, 1);
1073         } else {
1074             tcg_gen_mov_tl(reg, cc.reg);
1075         }
1076         return;
1077     }
1078 
1079     if (cc.cond == TCG_COND_NE && !cc.use_reg2 && cc.imm == 0 &&
1080         cc.mask != 0 && (cc.mask & (cc.mask - 1)) == 0) {
1081         tcg_gen_shri_tl(reg, cc.reg, ctztl(cc.mask));
1082         tcg_gen_andi_tl(reg, reg, 1);
1083         return;
1084     }
1085     if (cc.mask != -1) {
1086         tcg_gen_andi_tl(reg, cc.reg, cc.mask);
1087         cc.reg = reg;
1088     }
1089     if (cc.use_reg2) {
1090         tcg_gen_setcond_tl(cc.cond, reg, cc.reg, cc.reg2);
1091     } else {
1092         tcg_gen_setcondi_tl(cc.cond, reg, cc.reg, cc.imm);
1093     }
1094 }
1095 
1096 static inline void gen_compute_eflags_c(DisasContext *s, TCGv reg)
1097 {
1098     gen_setcc1(s, JCC_B << 1, reg);
1099 }
1100 
1101 /* generate a conditional jump to label 'l1' according to jump opcode
1102    value 'b'. In the fast case, T0 is guaranted not to be used. */
1103 static inline void gen_jcc1_noeob(DisasContext *s, int b, TCGLabel *l1)
1104 {
1105     CCPrepare cc = gen_prepare_cc(s, b, s->T0);
1106 
1107     if (cc.mask != -1) {
1108         tcg_gen_andi_tl(s->T0, cc.reg, cc.mask);
1109         cc.reg = s->T0;
1110     }
1111     if (cc.use_reg2) {
1112         tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
1113     } else {
1114         tcg_gen_brcondi_tl(cc.cond, cc.reg, cc.imm, l1);
1115     }
1116 }
1117 
1118 /* Generate a conditional jump to label 'l1' according to jump opcode
1119    value 'b'. In the fast case, T0 is guaranted not to be used.
1120    A translation block must end soon.  */
1121 static inline void gen_jcc1(DisasContext *s, int b, TCGLabel *l1)
1122 {
1123     CCPrepare cc = gen_prepare_cc(s, b, s->T0);
1124 
1125     gen_update_cc_op(s);
1126     if (cc.mask != -1) {
1127         tcg_gen_andi_tl(s->T0, cc.reg, cc.mask);
1128         cc.reg = s->T0;
1129     }
1130     set_cc_op(s, CC_OP_DYNAMIC);
1131     if (cc.use_reg2) {
1132         tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
1133     } else {
1134         tcg_gen_brcondi_tl(cc.cond, cc.reg, cc.imm, l1);
1135     }
1136 }
1137 
1138 /* XXX: does not work with gdbstub "ice" single step - not a
1139    serious problem */
1140 static TCGLabel *gen_jz_ecx_string(DisasContext *s, target_ulong next_eip)
1141 {
1142     TCGLabel *l1 = gen_new_label();
1143     TCGLabel *l2 = gen_new_label();
1144     gen_op_jnz_ecx(s, s->aflag, l1);
1145     gen_set_label(l2);
1146     gen_jmp_tb(s, next_eip, 1);
1147     gen_set_label(l1);
1148     return l2;
1149 }
1150 
1151 static inline void gen_stos(DisasContext *s, MemOp ot)
1152 {
1153     gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
1154     gen_string_movl_A0_EDI(s);
1155     gen_op_st_v(s, ot, s->T0, s->A0);
1156     gen_op_movl_T0_Dshift(s, ot);
1157     gen_op_add_reg_T0(s, s->aflag, R_EDI);
1158 }
1159 
1160 static inline void gen_lods(DisasContext *s, MemOp ot)
1161 {
1162     gen_string_movl_A0_ESI(s);
1163     gen_op_ld_v(s, ot, s->T0, s->A0);
1164     gen_op_mov_reg_v(s, ot, R_EAX, s->T0);
1165     gen_op_movl_T0_Dshift(s, ot);
1166     gen_op_add_reg_T0(s, s->aflag, R_ESI);
1167 }
1168 
1169 static inline void gen_scas(DisasContext *s, MemOp ot)
1170 {
1171     gen_string_movl_A0_EDI(s);
1172     gen_op_ld_v(s, ot, s->T1, s->A0);
1173     gen_op(s, OP_CMPL, ot, R_EAX);
1174     gen_op_movl_T0_Dshift(s, ot);
1175     gen_op_add_reg_T0(s, s->aflag, R_EDI);
1176 }
1177 
1178 static inline void gen_cmps(DisasContext *s, MemOp ot)
1179 {
1180     gen_string_movl_A0_EDI(s);
1181     gen_op_ld_v(s, ot, s->T1, s->A0);
1182     gen_string_movl_A0_ESI(s);
1183     gen_op(s, OP_CMPL, ot, OR_TMP0);
1184     gen_op_movl_T0_Dshift(s, ot);
1185     gen_op_add_reg_T0(s, s->aflag, R_ESI);
1186     gen_op_add_reg_T0(s, s->aflag, R_EDI);
1187 }
1188 
1189 static void gen_bpt_io(DisasContext *s, TCGv_i32 t_port, int ot)
1190 {
1191     if (s->flags & HF_IOBPT_MASK) {
1192 #ifdef CONFIG_USER_ONLY
1193         /* user-mode cpu should not be in IOBPT mode */
1194         g_assert_not_reached();
1195 #else
1196         TCGv_i32 t_size = tcg_const_i32(1 << ot);
1197         TCGv t_next = tcg_const_tl(s->pc - s->cs_base);
1198 
1199         gen_helper_bpt_io(cpu_env, t_port, t_size, t_next);
1200         tcg_temp_free_i32(t_size);
1201         tcg_temp_free(t_next);
1202 #endif /* CONFIG_USER_ONLY */
1203     }
1204 }
1205 
1206 static inline void gen_ins(DisasContext *s, MemOp ot)
1207 {
1208     gen_string_movl_A0_EDI(s);
1209     /* Note: we must do this dummy write first to be restartable in
1210        case of page fault. */
1211     tcg_gen_movi_tl(s->T0, 0);
1212     gen_op_st_v(s, ot, s->T0, s->A0);
1213     tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
1214     tcg_gen_andi_i32(s->tmp2_i32, s->tmp2_i32, 0xffff);
1215     gen_helper_in_func(ot, s->T0, s->tmp2_i32);
1216     gen_op_st_v(s, ot, s->T0, s->A0);
1217     gen_op_movl_T0_Dshift(s, ot);
1218     gen_op_add_reg_T0(s, s->aflag, R_EDI);
1219     gen_bpt_io(s, s->tmp2_i32, ot);
1220 }
1221 
1222 static inline void gen_outs(DisasContext *s, MemOp ot)
1223 {
1224     gen_string_movl_A0_ESI(s);
1225     gen_op_ld_v(s, ot, s->T0, s->A0);
1226 
1227     tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
1228     tcg_gen_andi_i32(s->tmp2_i32, s->tmp2_i32, 0xffff);
1229     tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T0);
1230     gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32);
1231     gen_op_movl_T0_Dshift(s, ot);
1232     gen_op_add_reg_T0(s, s->aflag, R_ESI);
1233     gen_bpt_io(s, s->tmp2_i32, ot);
1234 }
1235 
1236 /* same method as Valgrind : we generate jumps to current or next
1237    instruction */
1238 #define GEN_REPZ(op)                                                          \
1239 static inline void gen_repz_ ## op(DisasContext *s, MemOp ot,              \
1240                                  target_ulong cur_eip, target_ulong next_eip) \
1241 {                                                                             \
1242     TCGLabel *l2;                                                             \
1243     gen_update_cc_op(s);                                                      \
1244     l2 = gen_jz_ecx_string(s, next_eip);                                      \
1245     gen_ ## op(s, ot);                                                        \
1246     gen_op_add_reg_im(s, s->aflag, R_ECX, -1);                                \
1247     /* a loop would cause two single step exceptions if ECX = 1               \
1248        before rep string_insn */                                              \
1249     if (s->repz_opt)                                                          \
1250         gen_op_jz_ecx(s, s->aflag, l2);                                       \
1251     gen_jmp(s, cur_eip);                                                      \
1252 }
1253 
1254 #define GEN_REPZ2(op)                                                         \
1255 static inline void gen_repz_ ## op(DisasContext *s, MemOp ot,              \
1256                                    target_ulong cur_eip,                      \
1257                                    target_ulong next_eip,                     \
1258                                    int nz)                                    \
1259 {                                                                             \
1260     TCGLabel *l2;                                                             \
1261     gen_update_cc_op(s);                                                      \
1262     l2 = gen_jz_ecx_string(s, next_eip);                                      \
1263     gen_ ## op(s, ot);                                                        \
1264     gen_op_add_reg_im(s, s->aflag, R_ECX, -1);                                \
1265     gen_update_cc_op(s);                                                      \
1266     gen_jcc1(s, (JCC_Z << 1) | (nz ^ 1), l2);                                 \
1267     if (s->repz_opt)                                                          \
1268         gen_op_jz_ecx(s, s->aflag, l2);                                       \
1269     gen_jmp(s, cur_eip);                                                      \
1270 }
1271 
1272 GEN_REPZ(movs)
1273 GEN_REPZ(stos)
1274 GEN_REPZ(lods)
1275 GEN_REPZ(ins)
1276 GEN_REPZ(outs)
1277 GEN_REPZ2(scas)
1278 GEN_REPZ2(cmps)
1279 
1280 static void gen_helper_fp_arith_ST0_FT0(int op)
1281 {
1282     switch (op) {
1283     case 0:
1284         gen_helper_fadd_ST0_FT0(cpu_env);
1285         break;
1286     case 1:
1287         gen_helper_fmul_ST0_FT0(cpu_env);
1288         break;
1289     case 2:
1290         gen_helper_fcom_ST0_FT0(cpu_env);
1291         break;
1292     case 3:
1293         gen_helper_fcom_ST0_FT0(cpu_env);
1294         break;
1295     case 4:
1296         gen_helper_fsub_ST0_FT0(cpu_env);
1297         break;
1298     case 5:
1299         gen_helper_fsubr_ST0_FT0(cpu_env);
1300         break;
1301     case 6:
1302         gen_helper_fdiv_ST0_FT0(cpu_env);
1303         break;
1304     case 7:
1305         gen_helper_fdivr_ST0_FT0(cpu_env);
1306         break;
1307     }
1308 }
1309 
1310 /* NOTE the exception in "r" op ordering */
1311 static void gen_helper_fp_arith_STN_ST0(int op, int opreg)
1312 {
1313     TCGv_i32 tmp = tcg_const_i32(opreg);
1314     switch (op) {
1315     case 0:
1316         gen_helper_fadd_STN_ST0(cpu_env, tmp);
1317         break;
1318     case 1:
1319         gen_helper_fmul_STN_ST0(cpu_env, tmp);
1320         break;
1321     case 4:
1322         gen_helper_fsubr_STN_ST0(cpu_env, tmp);
1323         break;
1324     case 5:
1325         gen_helper_fsub_STN_ST0(cpu_env, tmp);
1326         break;
1327     case 6:
1328         gen_helper_fdivr_STN_ST0(cpu_env, tmp);
1329         break;
1330     case 7:
1331         gen_helper_fdiv_STN_ST0(cpu_env, tmp);
1332         break;
1333     }
1334 }
1335 
1336 static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip)
1337 {
1338     gen_update_cc_op(s);
1339     gen_jmp_im(s, cur_eip);
1340     gen_helper_raise_exception(cpu_env, tcg_const_i32(trapno));
1341     s->base.is_jmp = DISAS_NORETURN;
1342 }
1343 
1344 /* Generate #UD for the current instruction.  The assumption here is that
1345    the instruction is known, but it isn't allowed in the current cpu mode.  */
1346 static void gen_illegal_opcode(DisasContext *s)
1347 {
1348     gen_exception(s, EXCP06_ILLOP, s->pc_start - s->cs_base);
1349 }
1350 
1351 /* Generate #GP for the current instruction. */
1352 static void gen_exception_gpf(DisasContext *s)
1353 {
1354     gen_exception(s, EXCP0D_GPF, s->pc_start - s->cs_base);
1355 }
1356 
1357 /* Check for cpl == 0; if not, raise #GP and return false. */
1358 static bool check_cpl0(DisasContext *s)
1359 {
1360     if (CPL(s) == 0) {
1361         return true;
1362     }
1363     gen_exception_gpf(s);
1364     return false;
1365 }
1366 
1367 /* If vm86, check for iopl == 3; if not, raise #GP and return false. */
1368 static bool check_vm86_iopl(DisasContext *s)
1369 {
1370     if (!VM86(s) || IOPL(s) == 3) {
1371         return true;
1372     }
1373     gen_exception_gpf(s);
1374     return false;
1375 }
1376 
1377 /* Check for iopl allowing access; if not, raise #GP and return false. */
1378 static bool check_iopl(DisasContext *s)
1379 {
1380     if (VM86(s) ? IOPL(s) == 3 : CPL(s) <= IOPL(s)) {
1381         return true;
1382     }
1383     gen_exception_gpf(s);
1384     return false;
1385 }
1386 
1387 /* if d == OR_TMP0, it means memory operand (address in A0) */
1388 static void gen_op(DisasContext *s1, int op, MemOp ot, int d)
1389 {
1390     if (d != OR_TMP0) {
1391         if (s1->prefix & PREFIX_LOCK) {
1392             /* Lock prefix when destination is not memory.  */
1393             gen_illegal_opcode(s1);
1394             return;
1395         }
1396         gen_op_mov_v_reg(s1, ot, s1->T0, d);
1397     } else if (!(s1->prefix & PREFIX_LOCK)) {
1398         gen_op_ld_v(s1, ot, s1->T0, s1->A0);
1399     }
1400     switch(op) {
1401     case OP_ADCL:
1402         gen_compute_eflags_c(s1, s1->tmp4);
1403         if (s1->prefix & PREFIX_LOCK) {
1404             tcg_gen_add_tl(s1->T0, s1->tmp4, s1->T1);
1405             tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
1406                                         s1->mem_index, ot | MO_LE);
1407         } else {
1408             tcg_gen_add_tl(s1->T0, s1->T0, s1->T1);
1409             tcg_gen_add_tl(s1->T0, s1->T0, s1->tmp4);
1410             gen_op_st_rm_T0_A0(s1, ot, d);
1411         }
1412         gen_op_update3_cc(s1, s1->tmp4);
1413         set_cc_op(s1, CC_OP_ADCB + ot);
1414         break;
1415     case OP_SBBL:
1416         gen_compute_eflags_c(s1, s1->tmp4);
1417         if (s1->prefix & PREFIX_LOCK) {
1418             tcg_gen_add_tl(s1->T0, s1->T1, s1->tmp4);
1419             tcg_gen_neg_tl(s1->T0, s1->T0);
1420             tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
1421                                         s1->mem_index, ot | MO_LE);
1422         } else {
1423             tcg_gen_sub_tl(s1->T0, s1->T0, s1->T1);
1424             tcg_gen_sub_tl(s1->T0, s1->T0, s1->tmp4);
1425             gen_op_st_rm_T0_A0(s1, ot, d);
1426         }
1427         gen_op_update3_cc(s1, s1->tmp4);
1428         set_cc_op(s1, CC_OP_SBBB + ot);
1429         break;
1430     case OP_ADDL:
1431         if (s1->prefix & PREFIX_LOCK) {
1432             tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T1,
1433                                         s1->mem_index, ot | MO_LE);
1434         } else {
1435             tcg_gen_add_tl(s1->T0, s1->T0, s1->T1);
1436             gen_op_st_rm_T0_A0(s1, ot, d);
1437         }
1438         gen_op_update2_cc(s1);
1439         set_cc_op(s1, CC_OP_ADDB + ot);
1440         break;
1441     case OP_SUBL:
1442         if (s1->prefix & PREFIX_LOCK) {
1443             tcg_gen_neg_tl(s1->T0, s1->T1);
1444             tcg_gen_atomic_fetch_add_tl(s1->cc_srcT, s1->A0, s1->T0,
1445                                         s1->mem_index, ot | MO_LE);
1446             tcg_gen_sub_tl(s1->T0, s1->cc_srcT, s1->T1);
1447         } else {
1448             tcg_gen_mov_tl(s1->cc_srcT, s1->T0);
1449             tcg_gen_sub_tl(s1->T0, s1->T0, s1->T1);
1450             gen_op_st_rm_T0_A0(s1, ot, d);
1451         }
1452         gen_op_update2_cc(s1);
1453         set_cc_op(s1, CC_OP_SUBB + ot);
1454         break;
1455     default:
1456     case OP_ANDL:
1457         if (s1->prefix & PREFIX_LOCK) {
1458             tcg_gen_atomic_and_fetch_tl(s1->T0, s1->A0, s1->T1,
1459                                         s1->mem_index, ot | MO_LE);
1460         } else {
1461             tcg_gen_and_tl(s1->T0, s1->T0, s1->T1);
1462             gen_op_st_rm_T0_A0(s1, ot, d);
1463         }
1464         gen_op_update1_cc(s1);
1465         set_cc_op(s1, CC_OP_LOGICB + ot);
1466         break;
1467     case OP_ORL:
1468         if (s1->prefix & PREFIX_LOCK) {
1469             tcg_gen_atomic_or_fetch_tl(s1->T0, s1->A0, s1->T1,
1470                                        s1->mem_index, ot | MO_LE);
1471         } else {
1472             tcg_gen_or_tl(s1->T0, s1->T0, s1->T1);
1473             gen_op_st_rm_T0_A0(s1, ot, d);
1474         }
1475         gen_op_update1_cc(s1);
1476         set_cc_op(s1, CC_OP_LOGICB + ot);
1477         break;
1478     case OP_XORL:
1479         if (s1->prefix & PREFIX_LOCK) {
1480             tcg_gen_atomic_xor_fetch_tl(s1->T0, s1->A0, s1->T1,
1481                                         s1->mem_index, ot | MO_LE);
1482         } else {
1483             tcg_gen_xor_tl(s1->T0, s1->T0, s1->T1);
1484             gen_op_st_rm_T0_A0(s1, ot, d);
1485         }
1486         gen_op_update1_cc(s1);
1487         set_cc_op(s1, CC_OP_LOGICB + ot);
1488         break;
1489     case OP_CMPL:
1490         tcg_gen_mov_tl(cpu_cc_src, s1->T1);
1491         tcg_gen_mov_tl(s1->cc_srcT, s1->T0);
1492         tcg_gen_sub_tl(cpu_cc_dst, s1->T0, s1->T1);
1493         set_cc_op(s1, CC_OP_SUBB + ot);
1494         break;
1495     }
1496 }
1497 
1498 /* if d == OR_TMP0, it means memory operand (address in A0) */
1499 static void gen_inc(DisasContext *s1, MemOp ot, int d, int c)
1500 {
1501     if (s1->prefix & PREFIX_LOCK) {
1502         if (d != OR_TMP0) {
1503             /* Lock prefix when destination is not memory */
1504             gen_illegal_opcode(s1);
1505             return;
1506         }
1507         tcg_gen_movi_tl(s1->T0, c > 0 ? 1 : -1);
1508         tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
1509                                     s1->mem_index, ot | MO_LE);
1510     } else {
1511         if (d != OR_TMP0) {
1512             gen_op_mov_v_reg(s1, ot, s1->T0, d);
1513         } else {
1514             gen_op_ld_v(s1, ot, s1->T0, s1->A0);
1515         }
1516         tcg_gen_addi_tl(s1->T0, s1->T0, (c > 0 ? 1 : -1));
1517         gen_op_st_rm_T0_A0(s1, ot, d);
1518     }
1519 
1520     gen_compute_eflags_c(s1, cpu_cc_src);
1521     tcg_gen_mov_tl(cpu_cc_dst, s1->T0);
1522     set_cc_op(s1, (c > 0 ? CC_OP_INCB : CC_OP_DECB) + ot);
1523 }
1524 
1525 static void gen_shift_flags(DisasContext *s, MemOp ot, TCGv result,
1526                             TCGv shm1, TCGv count, bool is_right)
1527 {
1528     TCGv_i32 z32, s32, oldop;
1529     TCGv z_tl;
1530 
1531     /* Store the results into the CC variables.  If we know that the
1532        variable must be dead, store unconditionally.  Otherwise we'll
1533        need to not disrupt the current contents.  */
1534     z_tl = tcg_const_tl(0);
1535     if (cc_op_live[s->cc_op] & USES_CC_DST) {
1536         tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_dst, count, z_tl,
1537                            result, cpu_cc_dst);
1538     } else {
1539         tcg_gen_mov_tl(cpu_cc_dst, result);
1540     }
1541     if (cc_op_live[s->cc_op] & USES_CC_SRC) {
1542         tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_src, count, z_tl,
1543                            shm1, cpu_cc_src);
1544     } else {
1545         tcg_gen_mov_tl(cpu_cc_src, shm1);
1546     }
1547     tcg_temp_free(z_tl);
1548 
1549     /* Get the two potential CC_OP values into temporaries.  */
1550     tcg_gen_movi_i32(s->tmp2_i32, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
1551     if (s->cc_op == CC_OP_DYNAMIC) {
1552         oldop = cpu_cc_op;
1553     } else {
1554         tcg_gen_movi_i32(s->tmp3_i32, s->cc_op);
1555         oldop = s->tmp3_i32;
1556     }
1557 
1558     /* Conditionally store the CC_OP value.  */
1559     z32 = tcg_const_i32(0);
1560     s32 = tcg_temp_new_i32();
1561     tcg_gen_trunc_tl_i32(s32, count);
1562     tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, s32, z32, s->tmp2_i32, oldop);
1563     tcg_temp_free_i32(z32);
1564     tcg_temp_free_i32(s32);
1565 
1566     /* The CC_OP value is no longer predictable.  */
1567     set_cc_op(s, CC_OP_DYNAMIC);
1568 }
1569 
1570 static void gen_shift_rm_T1(DisasContext *s, MemOp ot, int op1,
1571                             int is_right, int is_arith)
1572 {
1573     target_ulong mask = (ot == MO_64 ? 0x3f : 0x1f);
1574 
1575     /* load */
1576     if (op1 == OR_TMP0) {
1577         gen_op_ld_v(s, ot, s->T0, s->A0);
1578     } else {
1579         gen_op_mov_v_reg(s, ot, s->T0, op1);
1580     }
1581 
1582     tcg_gen_andi_tl(s->T1, s->T1, mask);
1583     tcg_gen_subi_tl(s->tmp0, s->T1, 1);
1584 
1585     if (is_right) {
1586         if (is_arith) {
1587             gen_exts(ot, s->T0);
1588             tcg_gen_sar_tl(s->tmp0, s->T0, s->tmp0);
1589             tcg_gen_sar_tl(s->T0, s->T0, s->T1);
1590         } else {
1591             gen_extu(ot, s->T0);
1592             tcg_gen_shr_tl(s->tmp0, s->T0, s->tmp0);
1593             tcg_gen_shr_tl(s->T0, s->T0, s->T1);
1594         }
1595     } else {
1596         tcg_gen_shl_tl(s->tmp0, s->T0, s->tmp0);
1597         tcg_gen_shl_tl(s->T0, s->T0, s->T1);
1598     }
1599 
1600     /* store */
1601     gen_op_st_rm_T0_A0(s, ot, op1);
1602 
1603     gen_shift_flags(s, ot, s->T0, s->tmp0, s->T1, is_right);
1604 }
1605 
1606 static void gen_shift_rm_im(DisasContext *s, MemOp ot, int op1, int op2,
1607                             int is_right, int is_arith)
1608 {
1609     int mask = (ot == MO_64 ? 0x3f : 0x1f);
1610 
1611     /* load */
1612     if (op1 == OR_TMP0)
1613         gen_op_ld_v(s, ot, s->T0, s->A0);
1614     else
1615         gen_op_mov_v_reg(s, ot, s->T0, op1);
1616 
1617     op2 &= mask;
1618     if (op2 != 0) {
1619         if (is_right) {
1620             if (is_arith) {
1621                 gen_exts(ot, s->T0);
1622                 tcg_gen_sari_tl(s->tmp4, s->T0, op2 - 1);
1623                 tcg_gen_sari_tl(s->T0, s->T0, op2);
1624             } else {
1625                 gen_extu(ot, s->T0);
1626                 tcg_gen_shri_tl(s->tmp4, s->T0, op2 - 1);
1627                 tcg_gen_shri_tl(s->T0, s->T0, op2);
1628             }
1629         } else {
1630             tcg_gen_shli_tl(s->tmp4, s->T0, op2 - 1);
1631             tcg_gen_shli_tl(s->T0, s->T0, op2);
1632         }
1633     }
1634 
1635     /* store */
1636     gen_op_st_rm_T0_A0(s, ot, op1);
1637 
1638     /* update eflags if non zero shift */
1639     if (op2 != 0) {
1640         tcg_gen_mov_tl(cpu_cc_src, s->tmp4);
1641         tcg_gen_mov_tl(cpu_cc_dst, s->T0);
1642         set_cc_op(s, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
1643     }
1644 }
1645 
1646 static void gen_rot_rm_T1(DisasContext *s, MemOp ot, int op1, int is_right)
1647 {
1648     target_ulong mask = (ot == MO_64 ? 0x3f : 0x1f);
1649     TCGv_i32 t0, t1;
1650 
1651     /* load */
1652     if (op1 == OR_TMP0) {
1653         gen_op_ld_v(s, ot, s->T0, s->A0);
1654     } else {
1655         gen_op_mov_v_reg(s, ot, s->T0, op1);
1656     }
1657 
1658     tcg_gen_andi_tl(s->T1, s->T1, mask);
1659 
1660     switch (ot) {
1661     case MO_8:
1662         /* Replicate the 8-bit input so that a 32-bit rotate works.  */
1663         tcg_gen_ext8u_tl(s->T0, s->T0);
1664         tcg_gen_muli_tl(s->T0, s->T0, 0x01010101);
1665         goto do_long;
1666     case MO_16:
1667         /* Replicate the 16-bit input so that a 32-bit rotate works.  */
1668         tcg_gen_deposit_tl(s->T0, s->T0, s->T0, 16, 16);
1669         goto do_long;
1670     do_long:
1671 #ifdef TARGET_X86_64
1672     case MO_32:
1673         tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
1674         tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
1675         if (is_right) {
1676             tcg_gen_rotr_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
1677         } else {
1678             tcg_gen_rotl_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
1679         }
1680         tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
1681         break;
1682 #endif
1683     default:
1684         if (is_right) {
1685             tcg_gen_rotr_tl(s->T0, s->T0, s->T1);
1686         } else {
1687             tcg_gen_rotl_tl(s->T0, s->T0, s->T1);
1688         }
1689         break;
1690     }
1691 
1692     /* store */
1693     gen_op_st_rm_T0_A0(s, ot, op1);
1694 
1695     /* We'll need the flags computed into CC_SRC.  */
1696     gen_compute_eflags(s);
1697 
1698     /* The value that was "rotated out" is now present at the other end
1699        of the word.  Compute C into CC_DST and O into CC_SRC2.  Note that
1700        since we've computed the flags into CC_SRC, these variables are
1701        currently dead.  */
1702     if (is_right) {
1703         tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask - 1);
1704         tcg_gen_shri_tl(cpu_cc_dst, s->T0, mask);
1705         tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
1706     } else {
1707         tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask);
1708         tcg_gen_andi_tl(cpu_cc_dst, s->T0, 1);
1709     }
1710     tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
1711     tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
1712 
1713     /* Now conditionally store the new CC_OP value.  If the shift count
1714        is 0 we keep the CC_OP_EFLAGS setting so that only CC_SRC is live.
1715        Otherwise reuse CC_OP_ADCOX which have the C and O flags split out
1716        exactly as we computed above.  */
1717     t0 = tcg_const_i32(0);
1718     t1 = tcg_temp_new_i32();
1719     tcg_gen_trunc_tl_i32(t1, s->T1);
1720     tcg_gen_movi_i32(s->tmp2_i32, CC_OP_ADCOX);
1721     tcg_gen_movi_i32(s->tmp3_i32, CC_OP_EFLAGS);
1722     tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, t1, t0,
1723                         s->tmp2_i32, s->tmp3_i32);
1724     tcg_temp_free_i32(t0);
1725     tcg_temp_free_i32(t1);
1726 
1727     /* The CC_OP value is no longer predictable.  */
1728     set_cc_op(s, CC_OP_DYNAMIC);
1729 }
1730 
1731 static void gen_rot_rm_im(DisasContext *s, MemOp ot, int op1, int op2,
1732                           int is_right)
1733 {
1734     int mask = (ot == MO_64 ? 0x3f : 0x1f);
1735     int shift;
1736 
1737     /* load */
1738     if (op1 == OR_TMP0) {
1739         gen_op_ld_v(s, ot, s->T0, s->A0);
1740     } else {
1741         gen_op_mov_v_reg(s, ot, s->T0, op1);
1742     }
1743 
1744     op2 &= mask;
1745     if (op2 != 0) {
1746         switch (ot) {
1747 #ifdef TARGET_X86_64
1748         case MO_32:
1749             tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
1750             if (is_right) {
1751                 tcg_gen_rotri_i32(s->tmp2_i32, s->tmp2_i32, op2);
1752             } else {
1753                 tcg_gen_rotli_i32(s->tmp2_i32, s->tmp2_i32, op2);
1754             }
1755             tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
1756             break;
1757 #endif
1758         default:
1759             if (is_right) {
1760                 tcg_gen_rotri_tl(s->T0, s->T0, op2);
1761             } else {
1762                 tcg_gen_rotli_tl(s->T0, s->T0, op2);
1763             }
1764             break;
1765         case MO_8:
1766             mask = 7;
1767             goto do_shifts;
1768         case MO_16:
1769             mask = 15;
1770         do_shifts:
1771             shift = op2 & mask;
1772             if (is_right) {
1773                 shift = mask + 1 - shift;
1774             }
1775             gen_extu(ot, s->T0);
1776             tcg_gen_shli_tl(s->tmp0, s->T0, shift);
1777             tcg_gen_shri_tl(s->T0, s->T0, mask + 1 - shift);
1778             tcg_gen_or_tl(s->T0, s->T0, s->tmp0);
1779             break;
1780         }
1781     }
1782 
1783     /* store */
1784     gen_op_st_rm_T0_A0(s, ot, op1);
1785 
1786     if (op2 != 0) {
1787         /* Compute the flags into CC_SRC.  */
1788         gen_compute_eflags(s);
1789 
1790         /* The value that was "rotated out" is now present at the other end
1791            of the word.  Compute C into CC_DST and O into CC_SRC2.  Note that
1792            since we've computed the flags into CC_SRC, these variables are
1793            currently dead.  */
1794         if (is_right) {
1795             tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask - 1);
1796             tcg_gen_shri_tl(cpu_cc_dst, s->T0, mask);
1797             tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
1798         } else {
1799             tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask);
1800             tcg_gen_andi_tl(cpu_cc_dst, s->T0, 1);
1801         }
1802         tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
1803         tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
1804         set_cc_op(s, CC_OP_ADCOX);
1805     }
1806 }
1807 
1808 /* XXX: add faster immediate = 1 case */
1809 static void gen_rotc_rm_T1(DisasContext *s, MemOp ot, int op1,
1810                            int is_right)
1811 {
1812     gen_compute_eflags(s);
1813     assert(s->cc_op == CC_OP_EFLAGS);
1814 
1815     /* load */
1816     if (op1 == OR_TMP0)
1817         gen_op_ld_v(s, ot, s->T0, s->A0);
1818     else
1819         gen_op_mov_v_reg(s, ot, s->T0, op1);
1820 
1821     if (is_right) {
1822         switch (ot) {
1823         case MO_8:
1824             gen_helper_rcrb(s->T0, cpu_env, s->T0, s->T1);
1825             break;
1826         case MO_16:
1827             gen_helper_rcrw(s->T0, cpu_env, s->T0, s->T1);
1828             break;
1829         case MO_32:
1830             gen_helper_rcrl(s->T0, cpu_env, s->T0, s->T1);
1831             break;
1832 #ifdef TARGET_X86_64
1833         case MO_64:
1834             gen_helper_rcrq(s->T0, cpu_env, s->T0, s->T1);
1835             break;
1836 #endif
1837         default:
1838             tcg_abort();
1839         }
1840     } else {
1841         switch (ot) {
1842         case MO_8:
1843             gen_helper_rclb(s->T0, cpu_env, s->T0, s->T1);
1844             break;
1845         case MO_16:
1846             gen_helper_rclw(s->T0, cpu_env, s->T0, s->T1);
1847             break;
1848         case MO_32:
1849             gen_helper_rcll(s->T0, cpu_env, s->T0, s->T1);
1850             break;
1851 #ifdef TARGET_X86_64
1852         case MO_64:
1853             gen_helper_rclq(s->T0, cpu_env, s->T0, s->T1);
1854             break;
1855 #endif
1856         default:
1857             tcg_abort();
1858         }
1859     }
1860     /* store */
1861     gen_op_st_rm_T0_A0(s, ot, op1);
1862 }
1863 
1864 /* XXX: add faster immediate case */
1865 static void gen_shiftd_rm_T1(DisasContext *s, MemOp ot, int op1,
1866                              bool is_right, TCGv count_in)
1867 {
1868     target_ulong mask = (ot == MO_64 ? 63 : 31);
1869     TCGv count;
1870 
1871     /* load */
1872     if (op1 == OR_TMP0) {
1873         gen_op_ld_v(s, ot, s->T0, s->A0);
1874     } else {
1875         gen_op_mov_v_reg(s, ot, s->T0, op1);
1876     }
1877 
1878     count = tcg_temp_new();
1879     tcg_gen_andi_tl(count, count_in, mask);
1880 
1881     switch (ot) {
1882     case MO_16:
1883         /* Note: we implement the Intel behaviour for shift count > 16.
1884            This means "shrdw C, B, A" shifts A:B:A >> C.  Build the B:A
1885            portion by constructing it as a 32-bit value.  */
1886         if (is_right) {
1887             tcg_gen_deposit_tl(s->tmp0, s->T0, s->T1, 16, 16);
1888             tcg_gen_mov_tl(s->T1, s->T0);
1889             tcg_gen_mov_tl(s->T0, s->tmp0);
1890         } else {
1891             tcg_gen_deposit_tl(s->T1, s->T0, s->T1, 16, 16);
1892         }
1893         /*
1894          * If TARGET_X86_64 defined then fall through into MO_32 case,
1895          * otherwise fall through default case.
1896          */
1897     case MO_32:
1898 #ifdef TARGET_X86_64
1899         /* Concatenate the two 32-bit values and use a 64-bit shift.  */
1900         tcg_gen_subi_tl(s->tmp0, count, 1);
1901         if (is_right) {
1902             tcg_gen_concat_tl_i64(s->T0, s->T0, s->T1);
1903             tcg_gen_shr_i64(s->tmp0, s->T0, s->tmp0);
1904             tcg_gen_shr_i64(s->T0, s->T0, count);
1905         } else {
1906             tcg_gen_concat_tl_i64(s->T0, s->T1, s->T0);
1907             tcg_gen_shl_i64(s->tmp0, s->T0, s->tmp0);
1908             tcg_gen_shl_i64(s->T0, s->T0, count);
1909             tcg_gen_shri_i64(s->tmp0, s->tmp0, 32);
1910             tcg_gen_shri_i64(s->T0, s->T0, 32);
1911         }
1912         break;
1913 #endif
1914     default:
1915         tcg_gen_subi_tl(s->tmp0, count, 1);
1916         if (is_right) {
1917             tcg_gen_shr_tl(s->tmp0, s->T0, s->tmp0);
1918 
1919             tcg_gen_subfi_tl(s->tmp4, mask + 1, count);
1920             tcg_gen_shr_tl(s->T0, s->T0, count);
1921             tcg_gen_shl_tl(s->T1, s->T1, s->tmp4);
1922         } else {
1923             tcg_gen_shl_tl(s->tmp0, s->T0, s->tmp0);
1924             if (ot == MO_16) {
1925                 /* Only needed if count > 16, for Intel behaviour.  */
1926                 tcg_gen_subfi_tl(s->tmp4, 33, count);
1927                 tcg_gen_shr_tl(s->tmp4, s->T1, s->tmp4);
1928                 tcg_gen_or_tl(s->tmp0, s->tmp0, s->tmp4);
1929             }
1930 
1931             tcg_gen_subfi_tl(s->tmp4, mask + 1, count);
1932             tcg_gen_shl_tl(s->T0, s->T0, count);
1933             tcg_gen_shr_tl(s->T1, s->T1, s->tmp4);
1934         }
1935         tcg_gen_movi_tl(s->tmp4, 0);
1936         tcg_gen_movcond_tl(TCG_COND_EQ, s->T1, count, s->tmp4,
1937                            s->tmp4, s->T1);
1938         tcg_gen_or_tl(s->T0, s->T0, s->T1);
1939         break;
1940     }
1941 
1942     /* store */
1943     gen_op_st_rm_T0_A0(s, ot, op1);
1944 
1945     gen_shift_flags(s, ot, s->T0, s->tmp0, count, is_right);
1946     tcg_temp_free(count);
1947 }
1948 
1949 static void gen_shift(DisasContext *s1, int op, MemOp ot, int d, int s)
1950 {
1951     if (s != OR_TMP1)
1952         gen_op_mov_v_reg(s1, ot, s1->T1, s);
1953     switch(op) {
1954     case OP_ROL:
1955         gen_rot_rm_T1(s1, ot, d, 0);
1956         break;
1957     case OP_ROR:
1958         gen_rot_rm_T1(s1, ot, d, 1);
1959         break;
1960     case OP_SHL:
1961     case OP_SHL1:
1962         gen_shift_rm_T1(s1, ot, d, 0, 0);
1963         break;
1964     case OP_SHR:
1965         gen_shift_rm_T1(s1, ot, d, 1, 0);
1966         break;
1967     case OP_SAR:
1968         gen_shift_rm_T1(s1, ot, d, 1, 1);
1969         break;
1970     case OP_RCL:
1971         gen_rotc_rm_T1(s1, ot, d, 0);
1972         break;
1973     case OP_RCR:
1974         gen_rotc_rm_T1(s1, ot, d, 1);
1975         break;
1976     }
1977 }
1978 
1979 static void gen_shifti(DisasContext *s1, int op, MemOp ot, int d, int c)
1980 {
1981     switch(op) {
1982     case OP_ROL:
1983         gen_rot_rm_im(s1, ot, d, c, 0);
1984         break;
1985     case OP_ROR:
1986         gen_rot_rm_im(s1, ot, d, c, 1);
1987         break;
1988     case OP_SHL:
1989     case OP_SHL1:
1990         gen_shift_rm_im(s1, ot, d, c, 0, 0);
1991         break;
1992     case OP_SHR:
1993         gen_shift_rm_im(s1, ot, d, c, 1, 0);
1994         break;
1995     case OP_SAR:
1996         gen_shift_rm_im(s1, ot, d, c, 1, 1);
1997         break;
1998     default:
1999         /* currently not optimized */
2000         tcg_gen_movi_tl(s1->T1, c);
2001         gen_shift(s1, op, ot, d, OR_TMP1);
2002         break;
2003     }
2004 }
2005 
2006 #define X86_MAX_INSN_LENGTH 15
2007 
2008 static uint64_t advance_pc(CPUX86State *env, DisasContext *s, int num_bytes)
2009 {
2010     uint64_t pc = s->pc;
2011 
2012     /* This is a subsequent insn that crosses a page boundary.  */
2013     if (s->base.num_insns > 1 &&
2014         !is_same_page(&s->base, s->pc + num_bytes - 1)) {
2015         siglongjmp(s->jmpbuf, 2);
2016     }
2017 
2018     s->pc += num_bytes;
2019     if (unlikely(s->pc - s->pc_start > X86_MAX_INSN_LENGTH)) {
2020         /* If the instruction's 16th byte is on a different page than the 1st, a
2021          * page fault on the second page wins over the general protection fault
2022          * caused by the instruction being too long.
2023          * This can happen even if the operand is only one byte long!
2024          */
2025         if (((s->pc - 1) ^ (pc - 1)) & TARGET_PAGE_MASK) {
2026             volatile uint8_t unused =
2027                 cpu_ldub_code(env, (s->pc - 1) & TARGET_PAGE_MASK);
2028             (void) unused;
2029         }
2030         siglongjmp(s->jmpbuf, 1);
2031     }
2032 
2033     return pc;
2034 }
2035 
2036 static inline uint8_t x86_ldub_code(CPUX86State *env, DisasContext *s)
2037 {
2038     return translator_ldub(env, &s->base, advance_pc(env, s, 1));
2039 }
2040 
2041 static inline int16_t x86_ldsw_code(CPUX86State *env, DisasContext *s)
2042 {
2043     return translator_lduw(env, &s->base, advance_pc(env, s, 2));
2044 }
2045 
2046 static inline uint16_t x86_lduw_code(CPUX86State *env, DisasContext *s)
2047 {
2048     return translator_lduw(env, &s->base, advance_pc(env, s, 2));
2049 }
2050 
2051 static inline uint32_t x86_ldl_code(CPUX86State *env, DisasContext *s)
2052 {
2053     return translator_ldl(env, &s->base, advance_pc(env, s, 4));
2054 }
2055 
2056 #ifdef TARGET_X86_64
2057 static inline uint64_t x86_ldq_code(CPUX86State *env, DisasContext *s)
2058 {
2059     return translator_ldq(env, &s->base, advance_pc(env, s, 8));
2060 }
2061 #endif
2062 
2063 /* Decompose an address.  */
2064 
2065 typedef struct AddressParts {
2066     int def_seg;
2067     int base;
2068     int index;
2069     int scale;
2070     target_long disp;
2071 } AddressParts;
2072 
2073 static AddressParts gen_lea_modrm_0(CPUX86State *env, DisasContext *s,
2074                                     int modrm)
2075 {
2076     int def_seg, base, index, scale, mod, rm;
2077     target_long disp;
2078     bool havesib;
2079 
2080     def_seg = R_DS;
2081     index = -1;
2082     scale = 0;
2083     disp = 0;
2084 
2085     mod = (modrm >> 6) & 3;
2086     rm = modrm & 7;
2087     base = rm | REX_B(s);
2088 
2089     if (mod == 3) {
2090         /* Normally filtered out earlier, but including this path
2091            simplifies multi-byte nop, as well as bndcl, bndcu, bndcn.  */
2092         goto done;
2093     }
2094 
2095     switch (s->aflag) {
2096     case MO_64:
2097     case MO_32:
2098         havesib = 0;
2099         if (rm == 4) {
2100             int code = x86_ldub_code(env, s);
2101             scale = (code >> 6) & 3;
2102             index = ((code >> 3) & 7) | REX_X(s);
2103             if (index == 4) {
2104                 index = -1;  /* no index */
2105             }
2106             base = (code & 7) | REX_B(s);
2107             havesib = 1;
2108         }
2109 
2110         switch (mod) {
2111         case 0:
2112             if ((base & 7) == 5) {
2113                 base = -1;
2114                 disp = (int32_t)x86_ldl_code(env, s);
2115                 if (CODE64(s) && !havesib) {
2116                     base = -2;
2117                     disp += s->pc + s->rip_offset;
2118                 }
2119             }
2120             break;
2121         case 1:
2122             disp = (int8_t)x86_ldub_code(env, s);
2123             break;
2124         default:
2125         case 2:
2126             disp = (int32_t)x86_ldl_code(env, s);
2127             break;
2128         }
2129 
2130         /* For correct popl handling with esp.  */
2131         if (base == R_ESP && s->popl_esp_hack) {
2132             disp += s->popl_esp_hack;
2133         }
2134         if (base == R_EBP || base == R_ESP) {
2135             def_seg = R_SS;
2136         }
2137         break;
2138 
2139     case MO_16:
2140         if (mod == 0) {
2141             if (rm == 6) {
2142                 base = -1;
2143                 disp = x86_lduw_code(env, s);
2144                 break;
2145             }
2146         } else if (mod == 1) {
2147             disp = (int8_t)x86_ldub_code(env, s);
2148         } else {
2149             disp = (int16_t)x86_lduw_code(env, s);
2150         }
2151 
2152         switch (rm) {
2153         case 0:
2154             base = R_EBX;
2155             index = R_ESI;
2156             break;
2157         case 1:
2158             base = R_EBX;
2159             index = R_EDI;
2160             break;
2161         case 2:
2162             base = R_EBP;
2163             index = R_ESI;
2164             def_seg = R_SS;
2165             break;
2166         case 3:
2167             base = R_EBP;
2168             index = R_EDI;
2169             def_seg = R_SS;
2170             break;
2171         case 4:
2172             base = R_ESI;
2173             break;
2174         case 5:
2175             base = R_EDI;
2176             break;
2177         case 6:
2178             base = R_EBP;
2179             def_seg = R_SS;
2180             break;
2181         default:
2182         case 7:
2183             base = R_EBX;
2184             break;
2185         }
2186         break;
2187 
2188     default:
2189         tcg_abort();
2190     }
2191 
2192  done:
2193     return (AddressParts){ def_seg, base, index, scale, disp };
2194 }
2195 
2196 /* Compute the address, with a minimum number of TCG ops.  */
2197 static TCGv gen_lea_modrm_1(DisasContext *s, AddressParts a)
2198 {
2199     TCGv ea = NULL;
2200 
2201     if (a.index >= 0) {
2202         if (a.scale == 0) {
2203             ea = cpu_regs[a.index];
2204         } else {
2205             tcg_gen_shli_tl(s->A0, cpu_regs[a.index], a.scale);
2206             ea = s->A0;
2207         }
2208         if (a.base >= 0) {
2209             tcg_gen_add_tl(s->A0, ea, cpu_regs[a.base]);
2210             ea = s->A0;
2211         }
2212     } else if (a.base >= 0) {
2213         ea = cpu_regs[a.base];
2214     }
2215     if (!ea) {
2216         tcg_gen_movi_tl(s->A0, a.disp);
2217         ea = s->A0;
2218     } else if (a.disp != 0) {
2219         tcg_gen_addi_tl(s->A0, ea, a.disp);
2220         ea = s->A0;
2221     }
2222 
2223     return ea;
2224 }
2225 
2226 static void gen_lea_modrm(CPUX86State *env, DisasContext *s, int modrm)
2227 {
2228     AddressParts a = gen_lea_modrm_0(env, s, modrm);
2229     TCGv ea = gen_lea_modrm_1(s, a);
2230     gen_lea_v_seg(s, s->aflag, ea, a.def_seg, s->override);
2231 }
2232 
2233 static void gen_nop_modrm(CPUX86State *env, DisasContext *s, int modrm)
2234 {
2235     (void)gen_lea_modrm_0(env, s, modrm);
2236 }
2237 
2238 /* Used for BNDCL, BNDCU, BNDCN.  */
2239 static void gen_bndck(CPUX86State *env, DisasContext *s, int modrm,
2240                       TCGCond cond, TCGv_i64 bndv)
2241 {
2242     TCGv ea = gen_lea_modrm_1(s, gen_lea_modrm_0(env, s, modrm));
2243 
2244     tcg_gen_extu_tl_i64(s->tmp1_i64, ea);
2245     if (!CODE64(s)) {
2246         tcg_gen_ext32u_i64(s->tmp1_i64, s->tmp1_i64);
2247     }
2248     tcg_gen_setcond_i64(cond, s->tmp1_i64, s->tmp1_i64, bndv);
2249     tcg_gen_extrl_i64_i32(s->tmp2_i32, s->tmp1_i64);
2250     gen_helper_bndck(cpu_env, s->tmp2_i32);
2251 }
2252 
2253 /* used for LEA and MOV AX, mem */
2254 static void gen_add_A0_ds_seg(DisasContext *s)
2255 {
2256     gen_lea_v_seg(s, s->aflag, s->A0, R_DS, s->override);
2257 }
2258 
2259 /* generate modrm memory load or store of 'reg'. TMP0 is used if reg ==
2260    OR_TMP0 */
2261 static void gen_ldst_modrm(CPUX86State *env, DisasContext *s, int modrm,
2262                            MemOp ot, int reg, int is_store)
2263 {
2264     int mod, rm;
2265 
2266     mod = (modrm >> 6) & 3;
2267     rm = (modrm & 7) | REX_B(s);
2268     if (mod == 3) {
2269         if (is_store) {
2270             if (reg != OR_TMP0)
2271                 gen_op_mov_v_reg(s, ot, s->T0, reg);
2272             gen_op_mov_reg_v(s, ot, rm, s->T0);
2273         } else {
2274             gen_op_mov_v_reg(s, ot, s->T0, rm);
2275             if (reg != OR_TMP0)
2276                 gen_op_mov_reg_v(s, ot, reg, s->T0);
2277         }
2278     } else {
2279         gen_lea_modrm(env, s, modrm);
2280         if (is_store) {
2281             if (reg != OR_TMP0)
2282                 gen_op_mov_v_reg(s, ot, s->T0, reg);
2283             gen_op_st_v(s, ot, s->T0, s->A0);
2284         } else {
2285             gen_op_ld_v(s, ot, s->T0, s->A0);
2286             if (reg != OR_TMP0)
2287                 gen_op_mov_reg_v(s, ot, reg, s->T0);
2288         }
2289     }
2290 }
2291 
2292 static target_ulong insn_get_addr(CPUX86State *env, DisasContext *s, MemOp ot)
2293 {
2294     target_ulong ret;
2295 
2296     switch (ot) {
2297     case MO_8:
2298         ret = x86_ldub_code(env, s);
2299         break;
2300     case MO_16:
2301         ret = x86_lduw_code(env, s);
2302         break;
2303     case MO_32:
2304         ret = x86_ldl_code(env, s);
2305         break;
2306 #ifdef TARGET_X86_64
2307     case MO_64:
2308         ret = x86_ldq_code(env, s);
2309         break;
2310 #endif
2311     default:
2312         g_assert_not_reached();
2313     }
2314     return ret;
2315 }
2316 
2317 static inline uint32_t insn_get(CPUX86State *env, DisasContext *s, MemOp ot)
2318 {
2319     uint32_t ret;
2320 
2321     switch (ot) {
2322     case MO_8:
2323         ret = x86_ldub_code(env, s);
2324         break;
2325     case MO_16:
2326         ret = x86_lduw_code(env, s);
2327         break;
2328     case MO_32:
2329 #ifdef TARGET_X86_64
2330     case MO_64:
2331 #endif
2332         ret = x86_ldl_code(env, s);
2333         break;
2334     default:
2335         tcg_abort();
2336     }
2337     return ret;
2338 }
2339 
2340 static inline int insn_const_size(MemOp ot)
2341 {
2342     if (ot <= MO_32) {
2343         return 1 << ot;
2344     } else {
2345         return 4;
2346     }
2347 }
2348 
2349 static void gen_goto_tb(DisasContext *s, int tb_num, target_ulong eip)
2350 {
2351     target_ulong pc = s->cs_base + eip;
2352 
2353     if (translator_use_goto_tb(&s->base, pc))  {
2354         /* jump to same page: we can use a direct jump */
2355         tcg_gen_goto_tb(tb_num);
2356         gen_jmp_im(s, eip);
2357         tcg_gen_exit_tb(s->base.tb, tb_num);
2358         s->base.is_jmp = DISAS_NORETURN;
2359     } else {
2360         /* jump to another page */
2361         gen_jmp_im(s, eip);
2362         gen_jr(s, s->tmp0);
2363     }
2364 }
2365 
2366 static inline void gen_jcc(DisasContext *s, int b,
2367                            target_ulong val, target_ulong next_eip)
2368 {
2369     TCGLabel *l1, *l2;
2370 
2371     if (s->jmp_opt) {
2372         l1 = gen_new_label();
2373         gen_jcc1(s, b, l1);
2374 
2375         gen_goto_tb(s, 0, next_eip);
2376 
2377         gen_set_label(l1);
2378         gen_goto_tb(s, 1, val);
2379     } else {
2380         l1 = gen_new_label();
2381         l2 = gen_new_label();
2382         gen_jcc1(s, b, l1);
2383 
2384         gen_jmp_im(s, next_eip);
2385         tcg_gen_br(l2);
2386 
2387         gen_set_label(l1);
2388         gen_jmp_im(s, val);
2389         gen_set_label(l2);
2390         gen_eob(s);
2391     }
2392 }
2393 
2394 static void gen_cmovcc1(CPUX86State *env, DisasContext *s, MemOp ot, int b,
2395                         int modrm, int reg)
2396 {
2397     CCPrepare cc;
2398 
2399     gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
2400 
2401     cc = gen_prepare_cc(s, b, s->T1);
2402     if (cc.mask != -1) {
2403         TCGv t0 = tcg_temp_new();
2404         tcg_gen_andi_tl(t0, cc.reg, cc.mask);
2405         cc.reg = t0;
2406     }
2407     if (!cc.use_reg2) {
2408         cc.reg2 = tcg_const_tl(cc.imm);
2409     }
2410 
2411     tcg_gen_movcond_tl(cc.cond, s->T0, cc.reg, cc.reg2,
2412                        s->T0, cpu_regs[reg]);
2413     gen_op_mov_reg_v(s, ot, reg, s->T0);
2414 
2415     if (cc.mask != -1) {
2416         tcg_temp_free(cc.reg);
2417     }
2418     if (!cc.use_reg2) {
2419         tcg_temp_free(cc.reg2);
2420     }
2421 }
2422 
2423 static inline void gen_op_movl_T0_seg(DisasContext *s, X86Seg seg_reg)
2424 {
2425     tcg_gen_ld32u_tl(s->T0, cpu_env,
2426                      offsetof(CPUX86State,segs[seg_reg].selector));
2427 }
2428 
2429 static inline void gen_op_movl_seg_T0_vm(DisasContext *s, X86Seg seg_reg)
2430 {
2431     tcg_gen_ext16u_tl(s->T0, s->T0);
2432     tcg_gen_st32_tl(s->T0, cpu_env,
2433                     offsetof(CPUX86State,segs[seg_reg].selector));
2434     tcg_gen_shli_tl(cpu_seg_base[seg_reg], s->T0, 4);
2435 }
2436 
2437 /* move T0 to seg_reg and compute if the CPU state may change. Never
2438    call this function with seg_reg == R_CS */
2439 static void gen_movl_seg_T0(DisasContext *s, X86Seg seg_reg)
2440 {
2441     if (PE(s) && !VM86(s)) {
2442         tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
2443         gen_helper_load_seg(cpu_env, tcg_const_i32(seg_reg), s->tmp2_i32);
2444         /* abort translation because the addseg value may change or
2445            because ss32 may change. For R_SS, translation must always
2446            stop as a special handling must be done to disable hardware
2447            interrupts for the next instruction */
2448         if (seg_reg == R_SS || (CODE32(s) && seg_reg < R_FS)) {
2449             s->base.is_jmp = DISAS_TOO_MANY;
2450         }
2451     } else {
2452         gen_op_movl_seg_T0_vm(s, seg_reg);
2453         if (seg_reg == R_SS) {
2454             s->base.is_jmp = DISAS_TOO_MANY;
2455         }
2456     }
2457 }
2458 
2459 static void gen_svm_check_intercept(DisasContext *s, uint32_t type)
2460 {
2461     /* no SVM activated; fast case */
2462     if (likely(!GUEST(s))) {
2463         return;
2464     }
2465     gen_helper_svm_check_intercept(cpu_env, tcg_constant_i32(type));
2466 }
2467 
2468 static inline void gen_stack_update(DisasContext *s, int addend)
2469 {
2470     gen_op_add_reg_im(s, mo_stacksize(s), R_ESP, addend);
2471 }
2472 
2473 /* Generate a push. It depends on ss32, addseg and dflag.  */
2474 static void gen_push_v(DisasContext *s, TCGv val)
2475 {
2476     MemOp d_ot = mo_pushpop(s, s->dflag);
2477     MemOp a_ot = mo_stacksize(s);
2478     int size = 1 << d_ot;
2479     TCGv new_esp = s->A0;
2480 
2481     tcg_gen_subi_tl(s->A0, cpu_regs[R_ESP], size);
2482 
2483     if (!CODE64(s)) {
2484         if (ADDSEG(s)) {
2485             new_esp = s->tmp4;
2486             tcg_gen_mov_tl(new_esp, s->A0);
2487         }
2488         gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2489     }
2490 
2491     gen_op_st_v(s, d_ot, val, s->A0);
2492     gen_op_mov_reg_v(s, a_ot, R_ESP, new_esp);
2493 }
2494 
2495 /* two step pop is necessary for precise exceptions */
2496 static MemOp gen_pop_T0(DisasContext *s)
2497 {
2498     MemOp d_ot = mo_pushpop(s, s->dflag);
2499 
2500     gen_lea_v_seg(s, mo_stacksize(s), cpu_regs[R_ESP], R_SS, -1);
2501     gen_op_ld_v(s, d_ot, s->T0, s->A0);
2502 
2503     return d_ot;
2504 }
2505 
2506 static inline void gen_pop_update(DisasContext *s, MemOp ot)
2507 {
2508     gen_stack_update(s, 1 << ot);
2509 }
2510 
2511 static inline void gen_stack_A0(DisasContext *s)
2512 {
2513     gen_lea_v_seg(s, SS32(s) ? MO_32 : MO_16, cpu_regs[R_ESP], R_SS, -1);
2514 }
2515 
2516 static void gen_pusha(DisasContext *s)
2517 {
2518     MemOp s_ot = SS32(s) ? MO_32 : MO_16;
2519     MemOp d_ot = s->dflag;
2520     int size = 1 << d_ot;
2521     int i;
2522 
2523     for (i = 0; i < 8; i++) {
2524         tcg_gen_addi_tl(s->A0, cpu_regs[R_ESP], (i - 8) * size);
2525         gen_lea_v_seg(s, s_ot, s->A0, R_SS, -1);
2526         gen_op_st_v(s, d_ot, cpu_regs[7 - i], s->A0);
2527     }
2528 
2529     gen_stack_update(s, -8 * size);
2530 }
2531 
2532 static void gen_popa(DisasContext *s)
2533 {
2534     MemOp s_ot = SS32(s) ? MO_32 : MO_16;
2535     MemOp d_ot = s->dflag;
2536     int size = 1 << d_ot;
2537     int i;
2538 
2539     for (i = 0; i < 8; i++) {
2540         /* ESP is not reloaded */
2541         if (7 - i == R_ESP) {
2542             continue;
2543         }
2544         tcg_gen_addi_tl(s->A0, cpu_regs[R_ESP], i * size);
2545         gen_lea_v_seg(s, s_ot, s->A0, R_SS, -1);
2546         gen_op_ld_v(s, d_ot, s->T0, s->A0);
2547         gen_op_mov_reg_v(s, d_ot, 7 - i, s->T0);
2548     }
2549 
2550     gen_stack_update(s, 8 * size);
2551 }
2552 
2553 static void gen_enter(DisasContext *s, int esp_addend, int level)
2554 {
2555     MemOp d_ot = mo_pushpop(s, s->dflag);
2556     MemOp a_ot = CODE64(s) ? MO_64 : SS32(s) ? MO_32 : MO_16;
2557     int size = 1 << d_ot;
2558 
2559     /* Push BP; compute FrameTemp into T1.  */
2560     tcg_gen_subi_tl(s->T1, cpu_regs[R_ESP], size);
2561     gen_lea_v_seg(s, a_ot, s->T1, R_SS, -1);
2562     gen_op_st_v(s, d_ot, cpu_regs[R_EBP], s->A0);
2563 
2564     level &= 31;
2565     if (level != 0) {
2566         int i;
2567 
2568         /* Copy level-1 pointers from the previous frame.  */
2569         for (i = 1; i < level; ++i) {
2570             tcg_gen_subi_tl(s->A0, cpu_regs[R_EBP], size * i);
2571             gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2572             gen_op_ld_v(s, d_ot, s->tmp0, s->A0);
2573 
2574             tcg_gen_subi_tl(s->A0, s->T1, size * i);
2575             gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2576             gen_op_st_v(s, d_ot, s->tmp0, s->A0);
2577         }
2578 
2579         /* Push the current FrameTemp as the last level.  */
2580         tcg_gen_subi_tl(s->A0, s->T1, size * level);
2581         gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2582         gen_op_st_v(s, d_ot, s->T1, s->A0);
2583     }
2584 
2585     /* Copy the FrameTemp value to EBP.  */
2586     gen_op_mov_reg_v(s, a_ot, R_EBP, s->T1);
2587 
2588     /* Compute the final value of ESP.  */
2589     tcg_gen_subi_tl(s->T1, s->T1, esp_addend + size * level);
2590     gen_op_mov_reg_v(s, a_ot, R_ESP, s->T1);
2591 }
2592 
2593 static void gen_leave(DisasContext *s)
2594 {
2595     MemOp d_ot = mo_pushpop(s, s->dflag);
2596     MemOp a_ot = mo_stacksize(s);
2597 
2598     gen_lea_v_seg(s, a_ot, cpu_regs[R_EBP], R_SS, -1);
2599     gen_op_ld_v(s, d_ot, s->T0, s->A0);
2600 
2601     tcg_gen_addi_tl(s->T1, cpu_regs[R_EBP], 1 << d_ot);
2602 
2603     gen_op_mov_reg_v(s, d_ot, R_EBP, s->T0);
2604     gen_op_mov_reg_v(s, a_ot, R_ESP, s->T1);
2605 }
2606 
2607 /* Similarly, except that the assumption here is that we don't decode
2608    the instruction at all -- either a missing opcode, an unimplemented
2609    feature, or just a bogus instruction stream.  */
2610 static void gen_unknown_opcode(CPUX86State *env, DisasContext *s)
2611 {
2612     gen_illegal_opcode(s);
2613 
2614     if (qemu_loglevel_mask(LOG_UNIMP)) {
2615         FILE *logfile = qemu_log_trylock();
2616         if (logfile) {
2617             target_ulong pc = s->pc_start, end = s->pc;
2618 
2619             fprintf(logfile, "ILLOPC: " TARGET_FMT_lx ":", pc);
2620             for (; pc < end; ++pc) {
2621                 fprintf(logfile, " %02x", cpu_ldub_code(env, pc));
2622             }
2623             fprintf(logfile, "\n");
2624             qemu_log_unlock(logfile);
2625         }
2626     }
2627 }
2628 
2629 /* an interrupt is different from an exception because of the
2630    privilege checks */
2631 static void gen_interrupt(DisasContext *s, int intno,
2632                           target_ulong cur_eip, target_ulong next_eip)
2633 {
2634     gen_update_cc_op(s);
2635     gen_jmp_im(s, cur_eip);
2636     gen_helper_raise_interrupt(cpu_env, tcg_const_i32(intno),
2637                                tcg_const_i32(next_eip - cur_eip));
2638     s->base.is_jmp = DISAS_NORETURN;
2639 }
2640 
2641 static void gen_set_hflag(DisasContext *s, uint32_t mask)
2642 {
2643     if ((s->flags & mask) == 0) {
2644         TCGv_i32 t = tcg_temp_new_i32();
2645         tcg_gen_ld_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2646         tcg_gen_ori_i32(t, t, mask);
2647         tcg_gen_st_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2648         tcg_temp_free_i32(t);
2649         s->flags |= mask;
2650     }
2651 }
2652 
2653 static void gen_reset_hflag(DisasContext *s, uint32_t mask)
2654 {
2655     if (s->flags & mask) {
2656         TCGv_i32 t = tcg_temp_new_i32();
2657         tcg_gen_ld_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2658         tcg_gen_andi_i32(t, t, ~mask);
2659         tcg_gen_st_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2660         tcg_temp_free_i32(t);
2661         s->flags &= ~mask;
2662     }
2663 }
2664 
2665 /* Clear BND registers during legacy branches.  */
2666 static void gen_bnd_jmp(DisasContext *s)
2667 {
2668     /* Clear the registers only if BND prefix is missing, MPX is enabled,
2669        and if the BNDREGs are known to be in use (non-zero) already.
2670        The helper itself will check BNDPRESERVE at runtime.  */
2671     if ((s->prefix & PREFIX_REPNZ) == 0
2672         && (s->flags & HF_MPX_EN_MASK) != 0
2673         && (s->flags & HF_MPX_IU_MASK) != 0) {
2674         gen_helper_bnd_jmp(cpu_env);
2675     }
2676 }
2677 
2678 /* Generate an end of block. Trace exception is also generated if needed.
2679    If INHIBIT, set HF_INHIBIT_IRQ_MASK if it isn't already set.
2680    If RECHECK_TF, emit a rechecking helper for #DB, ignoring the state of
2681    S->TF.  This is used by the syscall/sysret insns.  */
2682 static void
2683 do_gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf, bool jr)
2684 {
2685     gen_update_cc_op(s);
2686 
2687     /* If several instructions disable interrupts, only the first does it.  */
2688     if (inhibit && !(s->flags & HF_INHIBIT_IRQ_MASK)) {
2689         gen_set_hflag(s, HF_INHIBIT_IRQ_MASK);
2690     } else {
2691         gen_reset_hflag(s, HF_INHIBIT_IRQ_MASK);
2692     }
2693 
2694     if (s->base.tb->flags & HF_RF_MASK) {
2695         gen_helper_reset_rf(cpu_env);
2696     }
2697     if (recheck_tf) {
2698         gen_helper_rechecking_single_step(cpu_env);
2699         tcg_gen_exit_tb(NULL, 0);
2700     } else if (s->flags & HF_TF_MASK) {
2701         gen_helper_single_step(cpu_env);
2702     } else if (jr) {
2703         tcg_gen_lookup_and_goto_ptr();
2704     } else {
2705         tcg_gen_exit_tb(NULL, 0);
2706     }
2707     s->base.is_jmp = DISAS_NORETURN;
2708 }
2709 
2710 static inline void
2711 gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf)
2712 {
2713     do_gen_eob_worker(s, inhibit, recheck_tf, false);
2714 }
2715 
2716 /* End of block.
2717    If INHIBIT, set HF_INHIBIT_IRQ_MASK if it isn't already set.  */
2718 static void gen_eob_inhibit_irq(DisasContext *s, bool inhibit)
2719 {
2720     gen_eob_worker(s, inhibit, false);
2721 }
2722 
2723 /* End of block, resetting the inhibit irq flag.  */
2724 static void gen_eob(DisasContext *s)
2725 {
2726     gen_eob_worker(s, false, false);
2727 }
2728 
2729 /* Jump to register */
2730 static void gen_jr(DisasContext *s, TCGv dest)
2731 {
2732     do_gen_eob_worker(s, false, false, true);
2733 }
2734 
2735 /* generate a jump to eip. No segment change must happen before as a
2736    direct call to the next block may occur */
2737 static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num)
2738 {
2739     gen_update_cc_op(s);
2740     set_cc_op(s, CC_OP_DYNAMIC);
2741     if (s->jmp_opt) {
2742         gen_goto_tb(s, tb_num, eip);
2743     } else {
2744         gen_jmp_im(s, eip);
2745         gen_eob(s);
2746     }
2747 }
2748 
2749 static void gen_jmp(DisasContext *s, target_ulong eip)
2750 {
2751     gen_jmp_tb(s, eip, 0);
2752 }
2753 
2754 static inline void gen_ldq_env_A0(DisasContext *s, int offset)
2755 {
2756     tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEUQ);
2757     tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset);
2758 }
2759 
2760 static inline void gen_stq_env_A0(DisasContext *s, int offset)
2761 {
2762     tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset);
2763     tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEUQ);
2764 }
2765 
2766 static inline void gen_ldo_env_A0(DisasContext *s, int offset, bool align)
2767 {
2768     int mem_index = s->mem_index;
2769     tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, mem_index,
2770                         MO_LEUQ | (align ? MO_ALIGN_16 : 0));
2771     tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
2772     tcg_gen_addi_tl(s->tmp0, s->A0, 8);
2773     tcg_gen_qemu_ld_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEUQ);
2774     tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
2775 }
2776 
2777 static inline void gen_sto_env_A0(DisasContext *s, int offset, bool align)
2778 {
2779     int mem_index = s->mem_index;
2780     tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
2781     tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, mem_index,
2782                         MO_LEUQ | (align ? MO_ALIGN_16 : 0));
2783     tcg_gen_addi_tl(s->tmp0, s->A0, 8);
2784     tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
2785     tcg_gen_qemu_st_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEUQ);
2786 }
2787 
2788 static inline void gen_op_movo(DisasContext *s, int d_offset, int s_offset)
2789 {
2790     tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(0)));
2791     tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(0)));
2792     tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(1)));
2793     tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(1)));
2794 }
2795 
2796 static inline void gen_op_movq(DisasContext *s, int d_offset, int s_offset)
2797 {
2798     tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset);
2799     tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset);
2800 }
2801 
2802 static inline void gen_op_movl(DisasContext *s, int d_offset, int s_offset)
2803 {
2804     tcg_gen_ld_i32(s->tmp2_i32, cpu_env, s_offset);
2805     tcg_gen_st_i32(s->tmp2_i32, cpu_env, d_offset);
2806 }
2807 
2808 static inline void gen_op_movq_env_0(DisasContext *s, int d_offset)
2809 {
2810     tcg_gen_movi_i64(s->tmp1_i64, 0);
2811     tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset);
2812 }
2813 
2814 #define ZMM_OFFSET(reg) offsetof(CPUX86State, xmm_regs[reg])
2815 
2816 typedef void (*SSEFunc_i_ep)(TCGv_i32 val, TCGv_ptr env, TCGv_ptr reg);
2817 typedef void (*SSEFunc_l_ep)(TCGv_i64 val, TCGv_ptr env, TCGv_ptr reg);
2818 typedef void (*SSEFunc_0_epi)(TCGv_ptr env, TCGv_ptr reg, TCGv_i32 val);
2819 typedef void (*SSEFunc_0_epl)(TCGv_ptr env, TCGv_ptr reg, TCGv_i64 val);
2820 typedef void (*SSEFunc_0_epp)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b);
2821 typedef void (*SSEFunc_0_eppp)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b,
2822                                TCGv_ptr reg_c);
2823 typedef void (*SSEFunc_0_eppi)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b,
2824                                TCGv_i32 val);
2825 typedef void (*SSEFunc_0_ppi)(TCGv_ptr reg_a, TCGv_ptr reg_b, TCGv_i32 val);
2826 typedef void (*SSEFunc_0_eppt)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b,
2827                                TCGv val);
2828 
2829 #define SSE_OPF_CMP       (1 << 1) /* does not write for first operand */
2830 #define SSE_OPF_SPECIAL   (1 << 3) /* magic */
2831 #define SSE_OPF_3DNOW     (1 << 4) /* 3DNow! instruction */
2832 #define SSE_OPF_MMX       (1 << 5) /* MMX/integer/AVX2 instruction */
2833 #define SSE_OPF_SCALAR    (1 << 6) /* Has SSE scalar variants */
2834 #define SSE_OPF_SHUF      (1 << 9) /* pshufx/shufpx */
2835 
2836 #define OP(op, flags, a, b, c, d)       \
2837     {flags, {{.op = a}, {.op = b}, {.op = c}, {.op = d} } }
2838 
2839 #define MMX_OP(x) OP(op1, SSE_OPF_MMX, \
2840         gen_helper_ ## x ## _mmx, gen_helper_ ## x ## _xmm, NULL, NULL)
2841 
2842 #define SSE_FOP(name) OP(op1, SSE_OPF_SCALAR, \
2843         gen_helper_##name##ps##_xmm, gen_helper_##name##pd##_xmm, \
2844         gen_helper_##name##ss, gen_helper_##name##sd)
2845 #define SSE_OP(sname, dname, op, flags) OP(op, flags, \
2846         gen_helper_##sname##_xmm, gen_helper_##dname##_xmm, NULL, NULL)
2847 
2848 typedef union SSEFuncs {
2849     SSEFunc_0_epp op1;
2850     SSEFunc_0_ppi op1i;
2851     SSEFunc_0_eppt op1t;
2852 } SSEFuncs;
2853 
2854 struct SSEOpHelper_table1 {
2855     int flags;
2856     SSEFuncs fn[4];
2857 };
2858 
2859 #define SSE_3DNOW { SSE_OPF_3DNOW }
2860 #define SSE_SPECIAL { SSE_OPF_SPECIAL }
2861 
2862 static const struct SSEOpHelper_table1 sse_op_table1[256] = {
2863     /* 3DNow! extensions */
2864     [0x0e] = SSE_SPECIAL, /* femms */
2865     [0x0f] = SSE_3DNOW, /* pf... (sse_op_table5) */
2866     /* pure SSE operations */
2867     [0x10] = SSE_SPECIAL, /* movups, movupd, movss, movsd */
2868     [0x11] = SSE_SPECIAL, /* movups, movupd, movss, movsd */
2869     [0x12] = SSE_SPECIAL, /* movlps, movlpd, movsldup, movddup */
2870     [0x13] = SSE_SPECIAL, /* movlps, movlpd */
2871     [0x14] = SSE_OP(punpckldq, punpcklqdq, op1, 0), /* unpcklps, unpcklpd */
2872     [0x15] = SSE_OP(punpckhdq, punpckhqdq, op1, 0), /* unpckhps, unpckhpd */
2873     [0x16] = SSE_SPECIAL, /* movhps, movhpd, movshdup */
2874     [0x17] = SSE_SPECIAL, /* movhps, movhpd */
2875 
2876     [0x28] = SSE_SPECIAL, /* movaps, movapd */
2877     [0x29] = SSE_SPECIAL, /* movaps, movapd */
2878     [0x2a] = SSE_SPECIAL, /* cvtpi2ps, cvtpi2pd, cvtsi2ss, cvtsi2sd */
2879     [0x2b] = SSE_SPECIAL, /* movntps, movntpd, movntss, movntsd */
2880     [0x2c] = SSE_SPECIAL, /* cvttps2pi, cvttpd2pi, cvttsd2si, cvttss2si */
2881     [0x2d] = SSE_SPECIAL, /* cvtps2pi, cvtpd2pi, cvtsd2si, cvtss2si */
2882     [0x2e] = OP(op1, SSE_OPF_CMP | SSE_OPF_SCALAR,
2883             gen_helper_ucomiss, gen_helper_ucomisd, NULL, NULL),
2884     [0x2f] = OP(op1, SSE_OPF_CMP | SSE_OPF_SCALAR,
2885             gen_helper_comiss, gen_helper_comisd, NULL, NULL),
2886     [0x50] = SSE_SPECIAL, /* movmskps, movmskpd */
2887     [0x51] = OP(op1, SSE_OPF_SCALAR,
2888                 gen_helper_sqrtps_xmm, gen_helper_sqrtpd_xmm,
2889                 gen_helper_sqrtss, gen_helper_sqrtsd),
2890     [0x52] = OP(op1, SSE_OPF_SCALAR,
2891                 gen_helper_rsqrtps_xmm, NULL, gen_helper_rsqrtss, NULL),
2892     [0x53] = OP(op1, SSE_OPF_SCALAR,
2893                 gen_helper_rcpps_xmm, NULL, gen_helper_rcpss, NULL),
2894     [0x54] = SSE_OP(pand, pand, op1, 0), /* andps, andpd */
2895     [0x55] = SSE_OP(pandn, pandn, op1, 0), /* andnps, andnpd */
2896     [0x56] = SSE_OP(por, por, op1, 0), /* orps, orpd */
2897     [0x57] = SSE_OP(pxor, pxor, op1, 0), /* xorps, xorpd */
2898     [0x58] = SSE_FOP(add),
2899     [0x59] = SSE_FOP(mul),
2900     [0x5a] = OP(op1, SSE_OPF_SCALAR,
2901                 gen_helper_cvtps2pd_xmm, gen_helper_cvtpd2ps_xmm,
2902                 gen_helper_cvtss2sd, gen_helper_cvtsd2ss),
2903     [0x5b] = OP(op1, 0,
2904                 gen_helper_cvtdq2ps_xmm, gen_helper_cvtps2dq_xmm,
2905                 gen_helper_cvttps2dq_xmm, NULL),
2906     [0x5c] = SSE_FOP(sub),
2907     [0x5d] = SSE_FOP(min),
2908     [0x5e] = SSE_FOP(div),
2909     [0x5f] = SSE_FOP(max),
2910 
2911     [0xc2] = SSE_FOP(cmpeq), /* sse_op_table4 */
2912     [0xc6] = SSE_OP(shufps, shufpd, op1i, SSE_OPF_SHUF),
2913 
2914     /* SSSE3, SSE4, MOVBE, CRC32, BMI1, BMI2, ADX.  */
2915     [0x38] = SSE_SPECIAL,
2916     [0x3a] = SSE_SPECIAL,
2917 
2918     /* MMX ops and their SSE extensions */
2919     [0x60] = MMX_OP(punpcklbw),
2920     [0x61] = MMX_OP(punpcklwd),
2921     [0x62] = MMX_OP(punpckldq),
2922     [0x63] = MMX_OP(packsswb),
2923     [0x64] = MMX_OP(pcmpgtb),
2924     [0x65] = MMX_OP(pcmpgtw),
2925     [0x66] = MMX_OP(pcmpgtl),
2926     [0x67] = MMX_OP(packuswb),
2927     [0x68] = MMX_OP(punpckhbw),
2928     [0x69] = MMX_OP(punpckhwd),
2929     [0x6a] = MMX_OP(punpckhdq),
2930     [0x6b] = MMX_OP(packssdw),
2931     [0x6c] = OP(op1, SSE_OPF_MMX,
2932                 NULL, gen_helper_punpcklqdq_xmm, NULL, NULL),
2933     [0x6d] = OP(op1, SSE_OPF_MMX,
2934                 NULL, gen_helper_punpckhqdq_xmm, NULL, NULL),
2935     [0x6e] = SSE_SPECIAL, /* movd mm, ea */
2936     [0x6f] = SSE_SPECIAL, /* movq, movdqa, , movqdu */
2937     [0x70] = OP(op1i, SSE_OPF_SHUF | SSE_OPF_MMX,
2938             gen_helper_pshufw_mmx, gen_helper_pshufd_xmm,
2939             gen_helper_pshufhw_xmm, gen_helper_pshuflw_xmm),
2940     [0x71] = SSE_SPECIAL, /* shiftw */
2941     [0x72] = SSE_SPECIAL, /* shiftd */
2942     [0x73] = SSE_SPECIAL, /* shiftq */
2943     [0x74] = MMX_OP(pcmpeqb),
2944     [0x75] = MMX_OP(pcmpeqw),
2945     [0x76] = MMX_OP(pcmpeql),
2946     [0x77] = SSE_SPECIAL, /* emms */
2947     [0x78] = SSE_SPECIAL, /* extrq_i, insertq_i (sse4a) */
2948     [0x79] = OP(op1, 0,
2949             NULL, gen_helper_extrq_r, NULL, gen_helper_insertq_r),
2950     [0x7c] = OP(op1, 0,
2951                 NULL, gen_helper_haddpd_xmm, NULL, gen_helper_haddps_xmm),
2952     [0x7d] = OP(op1, 0,
2953                 NULL, gen_helper_hsubpd_xmm, NULL, gen_helper_hsubps_xmm),
2954     [0x7e] = SSE_SPECIAL, /* movd, movd, , movq */
2955     [0x7f] = SSE_SPECIAL, /* movq, movdqa, movdqu */
2956     [0xc4] = SSE_SPECIAL, /* pinsrw */
2957     [0xc5] = SSE_SPECIAL, /* pextrw */
2958     [0xd0] = OP(op1, 0,
2959                 NULL, gen_helper_addsubpd_xmm, NULL, gen_helper_addsubps_xmm),
2960     [0xd1] = MMX_OP(psrlw),
2961     [0xd2] = MMX_OP(psrld),
2962     [0xd3] = MMX_OP(psrlq),
2963     [0xd4] = MMX_OP(paddq),
2964     [0xd5] = MMX_OP(pmullw),
2965     [0xd6] = SSE_SPECIAL,
2966     [0xd7] = SSE_SPECIAL, /* pmovmskb */
2967     [0xd8] = MMX_OP(psubusb),
2968     [0xd9] = MMX_OP(psubusw),
2969     [0xda] = MMX_OP(pminub),
2970     [0xdb] = MMX_OP(pand),
2971     [0xdc] = MMX_OP(paddusb),
2972     [0xdd] = MMX_OP(paddusw),
2973     [0xde] = MMX_OP(pmaxub),
2974     [0xdf] = MMX_OP(pandn),
2975     [0xe0] = MMX_OP(pavgb),
2976     [0xe1] = MMX_OP(psraw),
2977     [0xe2] = MMX_OP(psrad),
2978     [0xe3] = MMX_OP(pavgw),
2979     [0xe4] = MMX_OP(pmulhuw),
2980     [0xe5] = MMX_OP(pmulhw),
2981     [0xe6] = OP(op1, 0,
2982             NULL, gen_helper_cvttpd2dq_xmm,
2983             gen_helper_cvtdq2pd_xmm, gen_helper_cvtpd2dq_xmm),
2984     [0xe7] = SSE_SPECIAL,  /* movntq, movntq */
2985     [0xe8] = MMX_OP(psubsb),
2986     [0xe9] = MMX_OP(psubsw),
2987     [0xea] = MMX_OP(pminsw),
2988     [0xeb] = MMX_OP(por),
2989     [0xec] = MMX_OP(paddsb),
2990     [0xed] = MMX_OP(paddsw),
2991     [0xee] = MMX_OP(pmaxsw),
2992     [0xef] = MMX_OP(pxor),
2993     [0xf0] = SSE_SPECIAL, /* lddqu */
2994     [0xf1] = MMX_OP(psllw),
2995     [0xf2] = MMX_OP(pslld),
2996     [0xf3] = MMX_OP(psllq),
2997     [0xf4] = MMX_OP(pmuludq),
2998     [0xf5] = MMX_OP(pmaddwd),
2999     [0xf6] = MMX_OP(psadbw),
3000     [0xf7] = OP(op1t, SSE_OPF_MMX,
3001                 gen_helper_maskmov_mmx, gen_helper_maskmov_xmm, NULL, NULL),
3002     [0xf8] = MMX_OP(psubb),
3003     [0xf9] = MMX_OP(psubw),
3004     [0xfa] = MMX_OP(psubl),
3005     [0xfb] = MMX_OP(psubq),
3006     [0xfc] = MMX_OP(paddb),
3007     [0xfd] = MMX_OP(paddw),
3008     [0xfe] = MMX_OP(paddl),
3009 };
3010 #undef MMX_OP
3011 #undef OP
3012 #undef SSE_FOP
3013 #undef SSE_OP
3014 #undef SSE_SPECIAL
3015 
3016 #define MMX_OP2(x) { gen_helper_ ## x ## _mmx, gen_helper_ ## x ## _xmm }
3017 
3018 static const SSEFunc_0_epp sse_op_table2[3 * 8][2] = {
3019     [0 + 2] = MMX_OP2(psrlw),
3020     [0 + 4] = MMX_OP2(psraw),
3021     [0 + 6] = MMX_OP2(psllw),
3022     [8 + 2] = MMX_OP2(psrld),
3023     [8 + 4] = MMX_OP2(psrad),
3024     [8 + 6] = MMX_OP2(pslld),
3025     [16 + 2] = MMX_OP2(psrlq),
3026     [16 + 3] = { NULL, gen_helper_psrldq_xmm },
3027     [16 + 6] = MMX_OP2(psllq),
3028     [16 + 7] = { NULL, gen_helper_pslldq_xmm },
3029 };
3030 
3031 static const SSEFunc_0_epi sse_op_table3ai[] = {
3032     gen_helper_cvtsi2ss,
3033     gen_helper_cvtsi2sd
3034 };
3035 
3036 #ifdef TARGET_X86_64
3037 static const SSEFunc_0_epl sse_op_table3aq[] = {
3038     gen_helper_cvtsq2ss,
3039     gen_helper_cvtsq2sd
3040 };
3041 #endif
3042 
3043 static const SSEFunc_i_ep sse_op_table3bi[] = {
3044     gen_helper_cvttss2si,
3045     gen_helper_cvtss2si,
3046     gen_helper_cvttsd2si,
3047     gen_helper_cvtsd2si
3048 };
3049 
3050 #ifdef TARGET_X86_64
3051 static const SSEFunc_l_ep sse_op_table3bq[] = {
3052     gen_helper_cvttss2sq,
3053     gen_helper_cvtss2sq,
3054     gen_helper_cvttsd2sq,
3055     gen_helper_cvtsd2sq
3056 };
3057 #endif
3058 
3059 #define SSE_CMP(x) { \
3060     gen_helper_ ## x ## ps ## _xmm, gen_helper_ ## x ## pd ## _xmm, \
3061     gen_helper_ ## x ## ss, gen_helper_ ## x ## sd}
3062 static const SSEFunc_0_epp sse_op_table4[8][4] = {
3063     SSE_CMP(cmpeq),
3064     SSE_CMP(cmplt),
3065     SSE_CMP(cmple),
3066     SSE_CMP(cmpunord),
3067     SSE_CMP(cmpneq),
3068     SSE_CMP(cmpnlt),
3069     SSE_CMP(cmpnle),
3070     SSE_CMP(cmpord),
3071 };
3072 #undef SSE_CMP
3073 
3074 static const SSEFunc_0_epp sse_op_table5[256] = {
3075     [0x0c] = gen_helper_pi2fw,
3076     [0x0d] = gen_helper_pi2fd,
3077     [0x1c] = gen_helper_pf2iw,
3078     [0x1d] = gen_helper_pf2id,
3079     [0x8a] = gen_helper_pfnacc,
3080     [0x8e] = gen_helper_pfpnacc,
3081     [0x90] = gen_helper_pfcmpge,
3082     [0x94] = gen_helper_pfmin,
3083     [0x96] = gen_helper_pfrcp,
3084     [0x97] = gen_helper_pfrsqrt,
3085     [0x9a] = gen_helper_pfsub,
3086     [0x9e] = gen_helper_pfadd,
3087     [0xa0] = gen_helper_pfcmpgt,
3088     [0xa4] = gen_helper_pfmax,
3089     [0xa6] = gen_helper_movq, /* pfrcpit1; no need to actually increase precision */
3090     [0xa7] = gen_helper_movq, /* pfrsqit1 */
3091     [0xaa] = gen_helper_pfsubr,
3092     [0xae] = gen_helper_pfacc,
3093     [0xb0] = gen_helper_pfcmpeq,
3094     [0xb4] = gen_helper_pfmul,
3095     [0xb6] = gen_helper_movq, /* pfrcpit2 */
3096     [0xb7] = gen_helper_pmulhrw_mmx,
3097     [0xbb] = gen_helper_pswapd,
3098     [0xbf] = gen_helper_pavgb_mmx,
3099 };
3100 
3101 struct SSEOpHelper_table6 {
3102     SSEFuncs fn[2];
3103     uint32_t ext_mask;
3104     int flags;
3105 };
3106 
3107 struct SSEOpHelper_table7 {
3108     union {
3109         SSEFunc_0_eppi op1;
3110     } fn[2];
3111     uint32_t ext_mask;
3112     int flags;
3113 };
3114 
3115 #define gen_helper_special_xmm NULL
3116 
3117 #define OP(name, op, flags, ext, mmx_name) \
3118     {{{.op = mmx_name}, {.op = gen_helper_ ## name ## _xmm} }, \
3119         CPUID_EXT_ ## ext, flags}
3120 #define BINARY_OP_MMX(name, ext) \
3121     OP(name, op1, SSE_OPF_MMX, ext, gen_helper_ ## name ## _mmx)
3122 #define BINARY_OP(name, ext, flags) \
3123     OP(name, op1, flags, ext, NULL)
3124 #define UNARY_OP_MMX(name, ext) \
3125     OP(name, op1, SSE_OPF_MMX, ext, gen_helper_ ## name ## _mmx)
3126 #define UNARY_OP(name, ext, flags) \
3127     OP(name, op1, flags, ext, NULL)
3128 #define BLENDV_OP(name, ext, flags) OP(name, op1, 0, ext, NULL)
3129 #define CMP_OP(name, ext) OP(name, op1, SSE_OPF_CMP, ext, NULL)
3130 #define SPECIAL_OP(ext) OP(special, op1, SSE_OPF_SPECIAL, ext, NULL)
3131 
3132 /* prefix [66] 0f 38 */
3133 static const struct SSEOpHelper_table6 sse_op_table6[256] = {
3134     [0x00] = BINARY_OP_MMX(pshufb, SSSE3),
3135     [0x01] = BINARY_OP_MMX(phaddw, SSSE3),
3136     [0x02] = BINARY_OP_MMX(phaddd, SSSE3),
3137     [0x03] = BINARY_OP_MMX(phaddsw, SSSE3),
3138     [0x04] = BINARY_OP_MMX(pmaddubsw, SSSE3),
3139     [0x05] = BINARY_OP_MMX(phsubw, SSSE3),
3140     [0x06] = BINARY_OP_MMX(phsubd, SSSE3),
3141     [0x07] = BINARY_OP_MMX(phsubsw, SSSE3),
3142     [0x08] = BINARY_OP_MMX(psignb, SSSE3),
3143     [0x09] = BINARY_OP_MMX(psignw, SSSE3),
3144     [0x0a] = BINARY_OP_MMX(psignd, SSSE3),
3145     [0x0b] = BINARY_OP_MMX(pmulhrsw, SSSE3),
3146     [0x10] = BLENDV_OP(pblendvb, SSE41, SSE_OPF_MMX),
3147     [0x14] = BLENDV_OP(blendvps, SSE41, 0),
3148     [0x15] = BLENDV_OP(blendvpd, SSE41, 0),
3149     [0x17] = CMP_OP(ptest, SSE41),
3150     [0x1c] = UNARY_OP_MMX(pabsb, SSSE3),
3151     [0x1d] = UNARY_OP_MMX(pabsw, SSSE3),
3152     [0x1e] = UNARY_OP_MMX(pabsd, SSSE3),
3153     [0x20] = UNARY_OP(pmovsxbw, SSE41, SSE_OPF_MMX),
3154     [0x21] = UNARY_OP(pmovsxbd, SSE41, SSE_OPF_MMX),
3155     [0x22] = UNARY_OP(pmovsxbq, SSE41, SSE_OPF_MMX),
3156     [0x23] = UNARY_OP(pmovsxwd, SSE41, SSE_OPF_MMX),
3157     [0x24] = UNARY_OP(pmovsxwq, SSE41, SSE_OPF_MMX),
3158     [0x25] = UNARY_OP(pmovsxdq, SSE41, SSE_OPF_MMX),
3159     [0x28] = BINARY_OP(pmuldq, SSE41, SSE_OPF_MMX),
3160     [0x29] = BINARY_OP(pcmpeqq, SSE41, SSE_OPF_MMX),
3161     [0x2a] = SPECIAL_OP(SSE41), /* movntdqa */
3162     [0x2b] = BINARY_OP(packusdw, SSE41, SSE_OPF_MMX),
3163     [0x30] = UNARY_OP(pmovzxbw, SSE41, SSE_OPF_MMX),
3164     [0x31] = UNARY_OP(pmovzxbd, SSE41, SSE_OPF_MMX),
3165     [0x32] = UNARY_OP(pmovzxbq, SSE41, SSE_OPF_MMX),
3166     [0x33] = UNARY_OP(pmovzxwd, SSE41, SSE_OPF_MMX),
3167     [0x34] = UNARY_OP(pmovzxwq, SSE41, SSE_OPF_MMX),
3168     [0x35] = UNARY_OP(pmovzxdq, SSE41, SSE_OPF_MMX),
3169     [0x37] = BINARY_OP(pcmpgtq, SSE41, SSE_OPF_MMX),
3170     [0x38] = BINARY_OP(pminsb, SSE41, SSE_OPF_MMX),
3171     [0x39] = BINARY_OP(pminsd, SSE41, SSE_OPF_MMX),
3172     [0x3a] = BINARY_OP(pminuw, SSE41, SSE_OPF_MMX),
3173     [0x3b] = BINARY_OP(pminud, SSE41, SSE_OPF_MMX),
3174     [0x3c] = BINARY_OP(pmaxsb, SSE41, SSE_OPF_MMX),
3175     [0x3d] = BINARY_OP(pmaxsd, SSE41, SSE_OPF_MMX),
3176     [0x3e] = BINARY_OP(pmaxuw, SSE41, SSE_OPF_MMX),
3177     [0x3f] = BINARY_OP(pmaxud, SSE41, SSE_OPF_MMX),
3178     [0x40] = BINARY_OP(pmulld, SSE41, SSE_OPF_MMX),
3179     [0x41] = UNARY_OP(phminposuw, SSE41, 0),
3180     [0xdb] = UNARY_OP(aesimc, AES, 0),
3181     [0xdc] = BINARY_OP(aesenc, AES, 0),
3182     [0xdd] = BINARY_OP(aesenclast, AES, 0),
3183     [0xde] = BINARY_OP(aesdec, AES, 0),
3184     [0xdf] = BINARY_OP(aesdeclast, AES, 0),
3185 };
3186 
3187 /* prefix [66] 0f 3a */
3188 static const struct SSEOpHelper_table7 sse_op_table7[256] = {
3189     [0x08] = UNARY_OP(roundps, SSE41, 0),
3190     [0x09] = UNARY_OP(roundpd, SSE41, 0),
3191     [0x0a] = UNARY_OP(roundss, SSE41, SSE_OPF_SCALAR),
3192     [0x0b] = UNARY_OP(roundsd, SSE41, SSE_OPF_SCALAR),
3193     [0x0c] = BINARY_OP(blendps, SSE41, 0),
3194     [0x0d] = BINARY_OP(blendpd, SSE41, 0),
3195     [0x0e] = BINARY_OP(pblendw, SSE41, SSE_OPF_MMX),
3196     [0x0f] = BINARY_OP_MMX(palignr, SSSE3),
3197     [0x14] = SPECIAL_OP(SSE41), /* pextrb */
3198     [0x15] = SPECIAL_OP(SSE41), /* pextrw */
3199     [0x16] = SPECIAL_OP(SSE41), /* pextrd/pextrq */
3200     [0x17] = SPECIAL_OP(SSE41), /* extractps */
3201     [0x20] = SPECIAL_OP(SSE41), /* pinsrb */
3202     [0x21] = SPECIAL_OP(SSE41), /* insertps */
3203     [0x22] = SPECIAL_OP(SSE41), /* pinsrd/pinsrq */
3204     [0x40] = BINARY_OP(dpps, SSE41, 0),
3205     [0x41] = BINARY_OP(dppd, SSE41, 0),
3206     [0x42] = BINARY_OP(mpsadbw, SSE41, SSE_OPF_MMX),
3207     [0x44] = BINARY_OP(pclmulqdq, PCLMULQDQ, 0),
3208     [0x60] = CMP_OP(pcmpestrm, SSE42),
3209     [0x61] = CMP_OP(pcmpestri, SSE42),
3210     [0x62] = CMP_OP(pcmpistrm, SSE42),
3211     [0x63] = CMP_OP(pcmpistri, SSE42),
3212     [0xdf] = UNARY_OP(aeskeygenassist, AES, 0),
3213 };
3214 
3215 #undef OP
3216 #undef BINARY_OP_MMX
3217 #undef BINARY_OP
3218 #undef UNARY_OP_MMX
3219 #undef UNARY_OP
3220 #undef BLENDV_OP
3221 #undef SPECIAL_OP
3222 
3223 /* VEX prefix not allowed */
3224 #define CHECK_NO_VEX(s) do { \
3225     if (s->prefix & PREFIX_VEX) \
3226         goto illegal_op; \
3227     } while (0)
3228 
3229 static void gen_sse(CPUX86State *env, DisasContext *s, int b,
3230                     target_ulong pc_start)
3231 {
3232     int b1, op1_offset, op2_offset, is_xmm, val;
3233     int modrm, mod, rm, reg;
3234     int sse_op_flags;
3235     SSEFuncs sse_op_fn;
3236     const struct SSEOpHelper_table6 *op6;
3237     const struct SSEOpHelper_table7 *op7;
3238     MemOp ot;
3239 
3240     b &= 0xff;
3241     if (s->prefix & PREFIX_DATA)
3242         b1 = 1;
3243     else if (s->prefix & PREFIX_REPZ)
3244         b1 = 2;
3245     else if (s->prefix & PREFIX_REPNZ)
3246         b1 = 3;
3247     else
3248         b1 = 0;
3249     sse_op_flags = sse_op_table1[b].flags;
3250     sse_op_fn = sse_op_table1[b].fn[b1];
3251     if ((sse_op_flags & (SSE_OPF_SPECIAL | SSE_OPF_3DNOW)) == 0
3252             && !sse_op_fn.op1) {
3253         goto unknown_op;
3254     }
3255     if ((b <= 0x5f && b >= 0x10) || b == 0xc6 || b == 0xc2) {
3256         is_xmm = 1;
3257     } else {
3258         if (b1 == 0) {
3259             /* MMX case */
3260             is_xmm = 0;
3261         } else {
3262             is_xmm = 1;
3263         }
3264     }
3265     if (sse_op_flags & SSE_OPF_3DNOW) {
3266         if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) {
3267             goto illegal_op;
3268         }
3269     }
3270     /* simple MMX/SSE operation */
3271     if (s->flags & HF_TS_MASK) {
3272         gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
3273         return;
3274     }
3275     if (s->flags & HF_EM_MASK) {
3276     illegal_op:
3277         gen_illegal_opcode(s);
3278         return;
3279     }
3280     if (is_xmm
3281         && !(s->flags & HF_OSFXSR_MASK)
3282         && (b != 0x38 && b != 0x3a)) {
3283         goto unknown_op;
3284     }
3285     if (b == 0x0e) {
3286         if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) {
3287             /* If we were fully decoding this we might use illegal_op.  */
3288             goto unknown_op;
3289         }
3290         /* femms */
3291         gen_helper_emms(cpu_env);
3292         return;
3293     }
3294     if (b == 0x77) {
3295         /* emms */
3296         gen_helper_emms(cpu_env);
3297         return;
3298     }
3299     /* prepare MMX state (XXX: optimize by storing fptt and fptags in
3300        the static cpu state) */
3301     if (!is_xmm) {
3302         gen_helper_enter_mmx(cpu_env);
3303     }
3304 
3305     modrm = x86_ldub_code(env, s);
3306     reg = ((modrm >> 3) & 7);
3307     if (is_xmm) {
3308         reg |= REX_R(s);
3309     }
3310     mod = (modrm >> 6) & 3;
3311     if (sse_op_flags & SSE_OPF_SPECIAL) {
3312         b |= (b1 << 8);
3313         switch(b) {
3314         case 0x0e7: /* movntq */
3315             CHECK_NO_VEX(s);
3316             if (mod == 3) {
3317                 goto illegal_op;
3318             }
3319             gen_lea_modrm(env, s, modrm);
3320             gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3321             break;
3322         case 0x1e7: /* movntdq */
3323         case 0x02b: /* movntps */
3324         case 0x12b: /* movntpd */
3325             if (mod == 3)
3326                 goto illegal_op;
3327             gen_lea_modrm(env, s, modrm);
3328             gen_sto_env_A0(s, ZMM_OFFSET(reg), true);
3329             break;
3330         case 0x3f0: /* lddqu */
3331             if (mod == 3)
3332                 goto illegal_op;
3333             gen_lea_modrm(env, s, modrm);
3334             gen_ldo_env_A0(s, ZMM_OFFSET(reg), false);
3335             break;
3336         case 0x22b: /* movntss */
3337         case 0x32b: /* movntsd */
3338             if (mod == 3)
3339                 goto illegal_op;
3340             gen_lea_modrm(env, s, modrm);
3341             if (b1 & 1) {
3342                 gen_stq_env_A0(s, offsetof(CPUX86State,
3343                                            xmm_regs[reg].ZMM_Q(0)));
3344             } else {
3345                 tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State,
3346                     xmm_regs[reg].ZMM_L(0)));
3347                 gen_op_st_v(s, MO_32, s->T0, s->A0);
3348             }
3349             break;
3350         case 0x6e: /* movd mm, ea */
3351             CHECK_NO_VEX(s);
3352 #ifdef TARGET_X86_64
3353             if (s->dflag == MO_64) {
3354                 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
3355                 tcg_gen_st_tl(s->T0, cpu_env,
3356                               offsetof(CPUX86State, fpregs[reg].mmx));
3357             } else
3358 #endif
3359             {
3360                 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
3361                 tcg_gen_addi_ptr(s->ptr0, cpu_env,
3362                                  offsetof(CPUX86State,fpregs[reg].mmx));
3363                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
3364                 gen_helper_movl_mm_T0_mmx(s->ptr0, s->tmp2_i32);
3365             }
3366             break;
3367         case 0x16e: /* movd xmm, ea */
3368 #ifdef TARGET_X86_64
3369             if (s->dflag == MO_64) {
3370                 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
3371                 tcg_gen_addi_ptr(s->ptr0, cpu_env, ZMM_OFFSET(reg));
3372                 gen_helper_movq_mm_T0_xmm(s->ptr0, s->T0);
3373             } else
3374 #endif
3375             {
3376                 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
3377                 tcg_gen_addi_ptr(s->ptr0, cpu_env, ZMM_OFFSET(reg));
3378                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
3379                 gen_helper_movl_mm_T0_xmm(s->ptr0, s->tmp2_i32);
3380             }
3381             break;
3382         case 0x6f: /* movq mm, ea */
3383             CHECK_NO_VEX(s);
3384             if (mod != 3) {
3385                 gen_lea_modrm(env, s, modrm);
3386                 gen_ldq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3387             } else {
3388                 rm = (modrm & 7);
3389                 tcg_gen_ld_i64(s->tmp1_i64, cpu_env,
3390                                offsetof(CPUX86State,fpregs[rm].mmx));
3391                 tcg_gen_st_i64(s->tmp1_i64, cpu_env,
3392                                offsetof(CPUX86State,fpregs[reg].mmx));
3393             }
3394             break;
3395         case 0x010: /* movups */
3396         case 0x110: /* movupd */
3397         case 0x028: /* movaps */
3398         case 0x128: /* movapd */
3399         case 0x16f: /* movdqa xmm, ea */
3400         case 0x26f: /* movdqu xmm, ea */
3401             if (mod != 3) {
3402                 gen_lea_modrm(env, s, modrm);
3403                 gen_ldo_env_A0(s, ZMM_OFFSET(reg),
3404                                /* movaps, movapd, movdqa */
3405                                b == 0x028 || b == 0x128 || b == 0x16f);
3406             } else {
3407                 rm = (modrm & 7) | REX_B(s);
3408                 gen_op_movo(s, ZMM_OFFSET(reg), ZMM_OFFSET(rm));
3409             }
3410             break;
3411         case 0x210: /* movss xmm, ea */
3412             if (mod != 3) {
3413                 gen_lea_modrm(env, s, modrm);
3414                 gen_op_ld_v(s, MO_32, s->T0, s->A0);
3415                 tcg_gen_st32_tl(s->T0, cpu_env,
3416                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)));
3417                 tcg_gen_movi_tl(s->T0, 0);
3418                 tcg_gen_st32_tl(s->T0, cpu_env,
3419                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1)));
3420                 tcg_gen_st32_tl(s->T0, cpu_env,
3421                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)));
3422                 tcg_gen_st32_tl(s->T0, cpu_env,
3423                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)));
3424             } else {
3425                 rm = (modrm & 7) | REX_B(s);
3426                 tcg_gen_ld_i32(s->tmp2_i32, cpu_env,
3427                                offsetof(CPUX86State, xmm_regs[rm].ZMM_L(0)));
3428                 tcg_gen_st_i32(s->tmp2_i32, cpu_env,
3429                                offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)));
3430             }
3431             break;
3432         case 0x310: /* movsd xmm, ea */
3433             if (mod != 3) {
3434                 gen_lea_modrm(env, s, modrm);
3435                 gen_ldq_env_A0(s, offsetof(CPUX86State,
3436                                            xmm_regs[reg].ZMM_Q(0)));
3437                 tcg_gen_movi_tl(s->T0, 0);
3438                 tcg_gen_st32_tl(s->T0, cpu_env,
3439                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)));
3440                 tcg_gen_st32_tl(s->T0, cpu_env,
3441                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)));
3442             } else {
3443                 rm = (modrm & 7) | REX_B(s);
3444                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3445                             offsetof(CPUX86State, xmm_regs[rm].ZMM_Q(0)));
3446             }
3447             break;
3448         case 0x012: /* movlps */
3449         case 0x112: /* movlpd */
3450             if (mod != 3) {
3451                 gen_lea_modrm(env, s, modrm);
3452                 gen_ldq_env_A0(s, offsetof(CPUX86State,
3453                                            xmm_regs[reg].ZMM_Q(0)));
3454             } else {
3455                 /* movhlps */
3456                 rm = (modrm & 7) | REX_B(s);
3457                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3458                             offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(1)));
3459             }
3460             break;
3461         case 0x212: /* movsldup */
3462             if (mod != 3) {
3463                 gen_lea_modrm(env, s, modrm);
3464                 gen_ldo_env_A0(s, ZMM_OFFSET(reg), true);
3465             } else {
3466                 rm = (modrm & 7) | REX_B(s);
3467                 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)),
3468                             offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)));
3469                 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)),
3470                             offsetof(CPUX86State,xmm_regs[rm].ZMM_L(2)));
3471             }
3472             gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1)),
3473                         offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3474             gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)),
3475                         offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)));
3476             break;
3477         case 0x312: /* movddup */
3478             if (mod != 3) {
3479                 gen_lea_modrm(env, s, modrm);
3480                 gen_ldq_env_A0(s, offsetof(CPUX86State,
3481                                            xmm_regs[reg].ZMM_Q(0)));
3482             } else {
3483                 rm = (modrm & 7) | REX_B(s);
3484                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3485                             offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3486             }
3487             gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)),
3488                         offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3489             break;
3490         case 0x016: /* movhps */
3491         case 0x116: /* movhpd */
3492             if (mod != 3) {
3493                 gen_lea_modrm(env, s, modrm);
3494                 gen_ldq_env_A0(s, offsetof(CPUX86State,
3495                                            xmm_regs[reg].ZMM_Q(1)));
3496             } else {
3497                 /* movlhps */
3498                 rm = (modrm & 7) | REX_B(s);
3499                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)),
3500                             offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3501             }
3502             break;
3503         case 0x216: /* movshdup */
3504             if (mod != 3) {
3505                 gen_lea_modrm(env, s, modrm);
3506                 gen_ldo_env_A0(s, ZMM_OFFSET(reg), true);
3507             } else {
3508                 rm = (modrm & 7) | REX_B(s);
3509                 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1)),
3510                             offsetof(CPUX86State,xmm_regs[rm].ZMM_L(1)));
3511                 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)),
3512                             offsetof(CPUX86State,xmm_regs[rm].ZMM_L(3)));
3513             }
3514             gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)),
3515                         offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)));
3516             gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)),
3517                         offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)));
3518             break;
3519         case 0x178:
3520         case 0x378:
3521             CHECK_NO_VEX(s);
3522             {
3523                 int bit_index, field_length;
3524 
3525                 if (b1 == 1 && reg != 0)
3526                     goto illegal_op;
3527                 field_length = x86_ldub_code(env, s) & 0x3F;
3528                 bit_index = x86_ldub_code(env, s) & 0x3F;
3529                 tcg_gen_addi_ptr(s->ptr0, cpu_env, ZMM_OFFSET(reg));
3530                 if (b1 == 1)
3531                     gen_helper_extrq_i(cpu_env, s->ptr0,
3532                                        tcg_const_i32(bit_index),
3533                                        tcg_const_i32(field_length));
3534                 else {
3535                     if (mod != 3) {
3536                         gen_lea_modrm(env, s, modrm);
3537                         op2_offset = offsetof(CPUX86State, xmm_t0);
3538                         gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.ZMM_D(0)));
3539                     } else {
3540                         rm = (modrm & 7) | REX_B(s);
3541                         op2_offset = ZMM_OFFSET(rm);
3542                     }
3543                     tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
3544                     gen_helper_insertq_i(cpu_env, s->ptr0, s->ptr1,
3545                                          tcg_const_i32(bit_index),
3546                                          tcg_const_i32(field_length));
3547                 }
3548             }
3549             break;
3550         case 0x7e: /* movd ea, mm */
3551             CHECK_NO_VEX(s);
3552 #ifdef TARGET_X86_64
3553             if (s->dflag == MO_64) {
3554                 tcg_gen_ld_i64(s->T0, cpu_env,
3555                                offsetof(CPUX86State,fpregs[reg].mmx));
3556                 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
3557             } else
3558 #endif
3559             {
3560                 tcg_gen_ld32u_tl(s->T0, cpu_env,
3561                                  offsetof(CPUX86State,fpregs[reg].mmx.MMX_L(0)));
3562                 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
3563             }
3564             break;
3565         case 0x17e: /* movd ea, xmm */
3566 #ifdef TARGET_X86_64
3567             if (s->dflag == MO_64) {
3568                 tcg_gen_ld_i64(s->T0, cpu_env,
3569                                offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3570                 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
3571             } else
3572 #endif
3573             {
3574                 tcg_gen_ld32u_tl(s->T0, cpu_env,
3575                                  offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3576                 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
3577             }
3578             break;
3579         case 0x27e: /* movq xmm, ea */
3580             if (mod != 3) {
3581                 gen_lea_modrm(env, s, modrm);
3582                 gen_ldq_env_A0(s, offsetof(CPUX86State,
3583                                            xmm_regs[reg].ZMM_Q(0)));
3584             } else {
3585                 rm = (modrm & 7) | REX_B(s);
3586                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3587                             offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3588             }
3589             gen_op_movq_env_0(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)));
3590             break;
3591         case 0x7f: /* movq ea, mm */
3592             CHECK_NO_VEX(s);
3593             if (mod != 3) {
3594                 gen_lea_modrm(env, s, modrm);
3595                 gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3596             } else {
3597                 rm = (modrm & 7);
3598                 gen_op_movq(s, offsetof(CPUX86State, fpregs[rm].mmx),
3599                             offsetof(CPUX86State,fpregs[reg].mmx));
3600             }
3601             break;
3602         case 0x011: /* movups */
3603         case 0x111: /* movupd */
3604         case 0x029: /* movaps */
3605         case 0x129: /* movapd */
3606         case 0x17f: /* movdqa ea, xmm */
3607         case 0x27f: /* movdqu ea, xmm */
3608             if (mod != 3) {
3609                 gen_lea_modrm(env, s, modrm);
3610                 gen_sto_env_A0(s, ZMM_OFFSET(reg),
3611                                /* movaps, movapd, movdqa */
3612                                b == 0x029 || b == 0x129 || b == 0x17f);
3613             } else {
3614                 rm = (modrm & 7) | REX_B(s);
3615                 gen_op_movo(s, ZMM_OFFSET(rm), ZMM_OFFSET(reg));
3616             }
3617             break;
3618         case 0x211: /* movss ea, xmm */
3619             if (mod != 3) {
3620                 gen_lea_modrm(env, s, modrm);
3621                 tcg_gen_ld32u_tl(s->T0, cpu_env,
3622                                  offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)));
3623                 gen_op_st_v(s, MO_32, s->T0, s->A0);
3624             } else {
3625                 rm = (modrm & 7) | REX_B(s);
3626                 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[rm].ZMM_L(0)),
3627                             offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3628             }
3629             break;
3630         case 0x311: /* movsd ea, xmm */
3631             if (mod != 3) {
3632                 gen_lea_modrm(env, s, modrm);
3633                 gen_stq_env_A0(s, offsetof(CPUX86State,
3634                                            xmm_regs[reg].ZMM_Q(0)));
3635             } else {
3636                 rm = (modrm & 7) | REX_B(s);
3637                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[rm].ZMM_Q(0)),
3638                             offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3639             }
3640             break;
3641         case 0x013: /* movlps */
3642         case 0x113: /* movlpd */
3643             if (mod != 3) {
3644                 gen_lea_modrm(env, s, modrm);
3645                 gen_stq_env_A0(s, offsetof(CPUX86State,
3646                                            xmm_regs[reg].ZMM_Q(0)));
3647             } else {
3648                 goto illegal_op;
3649             }
3650             break;
3651         case 0x017: /* movhps */
3652         case 0x117: /* movhpd */
3653             if (mod != 3) {
3654                 gen_lea_modrm(env, s, modrm);
3655                 gen_stq_env_A0(s, offsetof(CPUX86State,
3656                                            xmm_regs[reg].ZMM_Q(1)));
3657             } else {
3658                 goto illegal_op;
3659             }
3660             break;
3661         case 0x71: /* shift mm, im */
3662         case 0x72:
3663         case 0x73:
3664         case 0x171: /* shift xmm, im */
3665         case 0x172:
3666         case 0x173:
3667             val = x86_ldub_code(env, s);
3668             if (is_xmm) {
3669                 tcg_gen_movi_tl(s->T0, val);
3670                 tcg_gen_st32_tl(s->T0, cpu_env,
3671                                 offsetof(CPUX86State, xmm_t0.ZMM_L(0)));
3672                 tcg_gen_movi_tl(s->T0, 0);
3673                 tcg_gen_st32_tl(s->T0, cpu_env,
3674                                 offsetof(CPUX86State, xmm_t0.ZMM_L(1)));
3675                 op1_offset = offsetof(CPUX86State,xmm_t0);
3676             } else {
3677                 CHECK_NO_VEX(s);
3678                 tcg_gen_movi_tl(s->T0, val);
3679                 tcg_gen_st32_tl(s->T0, cpu_env,
3680                                 offsetof(CPUX86State, mmx_t0.MMX_L(0)));
3681                 tcg_gen_movi_tl(s->T0, 0);
3682                 tcg_gen_st32_tl(s->T0, cpu_env,
3683                                 offsetof(CPUX86State, mmx_t0.MMX_L(1)));
3684                 op1_offset = offsetof(CPUX86State,mmx_t0);
3685             }
3686             assert(b1 < 2);
3687             SSEFunc_0_epp fn = sse_op_table2[((b - 1) & 3) * 8 +
3688                                        (((modrm >> 3)) & 7)][b1];
3689             if (!fn) {
3690                 goto unknown_op;
3691             }
3692             if (is_xmm) {
3693                 rm = (modrm & 7) | REX_B(s);
3694                 op2_offset = ZMM_OFFSET(rm);
3695             } else {
3696                 rm = (modrm & 7);
3697                 op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3698             }
3699             tcg_gen_addi_ptr(s->ptr0, cpu_env, op2_offset);
3700             tcg_gen_addi_ptr(s->ptr1, cpu_env, op1_offset);
3701             fn(cpu_env, s->ptr0, s->ptr1);
3702             break;
3703         case 0x050: /* movmskps */
3704             rm = (modrm & 7) | REX_B(s);
3705             tcg_gen_addi_ptr(s->ptr0, cpu_env, ZMM_OFFSET(rm));
3706             gen_helper_movmskps_xmm(s->tmp2_i32, cpu_env, s->ptr0);
3707             tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
3708             break;
3709         case 0x150: /* movmskpd */
3710             rm = (modrm & 7) | REX_B(s);
3711             tcg_gen_addi_ptr(s->ptr0, cpu_env, ZMM_OFFSET(rm));
3712             gen_helper_movmskpd_xmm(s->tmp2_i32, cpu_env, s->ptr0);
3713             tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
3714             break;
3715         case 0x02a: /* cvtpi2ps */
3716         case 0x12a: /* cvtpi2pd */
3717             CHECK_NO_VEX(s);
3718             gen_helper_enter_mmx(cpu_env);
3719             if (mod != 3) {
3720                 gen_lea_modrm(env, s, modrm);
3721                 op2_offset = offsetof(CPUX86State,mmx_t0);
3722                 gen_ldq_env_A0(s, op2_offset);
3723             } else {
3724                 rm = (modrm & 7);
3725                 op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3726             }
3727             op1_offset = ZMM_OFFSET(reg);
3728             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
3729             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
3730             switch(b >> 8) {
3731             case 0x0:
3732                 gen_helper_cvtpi2ps(cpu_env, s->ptr0, s->ptr1);
3733                 break;
3734             default:
3735             case 0x1:
3736                 gen_helper_cvtpi2pd(cpu_env, s->ptr0, s->ptr1);
3737                 break;
3738             }
3739             break;
3740         case 0x22a: /* cvtsi2ss */
3741         case 0x32a: /* cvtsi2sd */
3742             ot = mo_64_32(s->dflag);
3743             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3744             op1_offset = ZMM_OFFSET(reg);
3745             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
3746             if (ot == MO_32) {
3747                 SSEFunc_0_epi sse_fn_epi = sse_op_table3ai[(b >> 8) & 1];
3748                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
3749                 sse_fn_epi(cpu_env, s->ptr0, s->tmp2_i32);
3750             } else {
3751 #ifdef TARGET_X86_64
3752                 SSEFunc_0_epl sse_fn_epl = sse_op_table3aq[(b >> 8) & 1];
3753                 sse_fn_epl(cpu_env, s->ptr0, s->T0);
3754 #else
3755                 goto illegal_op;
3756 #endif
3757             }
3758             break;
3759         case 0x02c: /* cvttps2pi */
3760         case 0x12c: /* cvttpd2pi */
3761         case 0x02d: /* cvtps2pi */
3762         case 0x12d: /* cvtpd2pi */
3763             CHECK_NO_VEX(s);
3764             gen_helper_enter_mmx(cpu_env);
3765             if (mod != 3) {
3766                 gen_lea_modrm(env, s, modrm);
3767                 op2_offset = offsetof(CPUX86State,xmm_t0);
3768                 /* FIXME: should be 64-bit access if b1 == 0.  */
3769                 gen_ldo_env_A0(s, op2_offset, !!b1);
3770             } else {
3771                 rm = (modrm & 7) | REX_B(s);
3772                 op2_offset = ZMM_OFFSET(rm);
3773             }
3774             op1_offset = offsetof(CPUX86State,fpregs[reg & 7].mmx);
3775             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
3776             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
3777             switch(b) {
3778             case 0x02c:
3779                 gen_helper_cvttps2pi(cpu_env, s->ptr0, s->ptr1);
3780                 break;
3781             case 0x12c:
3782                 gen_helper_cvttpd2pi(cpu_env, s->ptr0, s->ptr1);
3783                 break;
3784             case 0x02d:
3785                 gen_helper_cvtps2pi(cpu_env, s->ptr0, s->ptr1);
3786                 break;
3787             case 0x12d:
3788                 gen_helper_cvtpd2pi(cpu_env, s->ptr0, s->ptr1);
3789                 break;
3790             }
3791             break;
3792         case 0x22c: /* cvttss2si */
3793         case 0x32c: /* cvttsd2si */
3794         case 0x22d: /* cvtss2si */
3795         case 0x32d: /* cvtsd2si */
3796             ot = mo_64_32(s->dflag);
3797             if (mod != 3) {
3798                 gen_lea_modrm(env, s, modrm);
3799                 if ((b >> 8) & 1) {
3800                     gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.ZMM_Q(0)));
3801                 } else {
3802                     gen_op_ld_v(s, MO_32, s->T0, s->A0);
3803                     tcg_gen_st32_tl(s->T0, cpu_env,
3804                                     offsetof(CPUX86State, xmm_t0.ZMM_L(0)));
3805                 }
3806                 op2_offset = offsetof(CPUX86State,xmm_t0);
3807             } else {
3808                 rm = (modrm & 7) | REX_B(s);
3809                 op2_offset = ZMM_OFFSET(rm);
3810             }
3811             tcg_gen_addi_ptr(s->ptr0, cpu_env, op2_offset);
3812             if (ot == MO_32) {
3813                 SSEFunc_i_ep sse_fn_i_ep =
3814                     sse_op_table3bi[((b >> 7) & 2) | (b & 1)];
3815                 sse_fn_i_ep(s->tmp2_i32, cpu_env, s->ptr0);
3816                 tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
3817             } else {
3818 #ifdef TARGET_X86_64
3819                 SSEFunc_l_ep sse_fn_l_ep =
3820                     sse_op_table3bq[((b >> 7) & 2) | (b & 1)];
3821                 sse_fn_l_ep(s->T0, cpu_env, s->ptr0);
3822 #else
3823                 goto illegal_op;
3824 #endif
3825             }
3826             gen_op_mov_reg_v(s, ot, reg, s->T0);
3827             break;
3828         case 0xc4: /* pinsrw */
3829         case 0x1c4:
3830             s->rip_offset = 1;
3831             gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
3832             val = x86_ldub_code(env, s);
3833             if (b1) {
3834                 val &= 7;
3835                 tcg_gen_st16_tl(s->T0, cpu_env,
3836                                 offsetof(CPUX86State,xmm_regs[reg].ZMM_W(val)));
3837             } else {
3838                 CHECK_NO_VEX(s);
3839                 val &= 3;
3840                 tcg_gen_st16_tl(s->T0, cpu_env,
3841                                 offsetof(CPUX86State,fpregs[reg].mmx.MMX_W(val)));
3842             }
3843             break;
3844         case 0xc5: /* pextrw */
3845         case 0x1c5:
3846             if (mod != 3)
3847                 goto illegal_op;
3848             ot = mo_64_32(s->dflag);
3849             val = x86_ldub_code(env, s);
3850             if (b1) {
3851                 val &= 7;
3852                 rm = (modrm & 7) | REX_B(s);
3853                 tcg_gen_ld16u_tl(s->T0, cpu_env,
3854                                  offsetof(CPUX86State,xmm_regs[rm].ZMM_W(val)));
3855             } else {
3856                 val &= 3;
3857                 rm = (modrm & 7);
3858                 tcg_gen_ld16u_tl(s->T0, cpu_env,
3859                                 offsetof(CPUX86State,fpregs[rm].mmx.MMX_W(val)));
3860             }
3861             reg = ((modrm >> 3) & 7) | REX_R(s);
3862             gen_op_mov_reg_v(s, ot, reg, s->T0);
3863             break;
3864         case 0x1d6: /* movq ea, xmm */
3865             if (mod != 3) {
3866                 gen_lea_modrm(env, s, modrm);
3867                 gen_stq_env_A0(s, offsetof(CPUX86State,
3868                                            xmm_regs[reg].ZMM_Q(0)));
3869             } else {
3870                 rm = (modrm & 7) | REX_B(s);
3871                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[rm].ZMM_Q(0)),
3872                             offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3873                 gen_op_movq_env_0(s,
3874                                   offsetof(CPUX86State, xmm_regs[rm].ZMM_Q(1)));
3875             }
3876             break;
3877         case 0x2d6: /* movq2dq */
3878             CHECK_NO_VEX(s);
3879             gen_helper_enter_mmx(cpu_env);
3880             rm = (modrm & 7);
3881             gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3882                         offsetof(CPUX86State,fpregs[rm].mmx));
3883             gen_op_movq_env_0(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)));
3884             break;
3885         case 0x3d6: /* movdq2q */
3886             CHECK_NO_VEX(s);
3887             gen_helper_enter_mmx(cpu_env);
3888             rm = (modrm & 7) | REX_B(s);
3889             gen_op_movq(s, offsetof(CPUX86State, fpregs[reg & 7].mmx),
3890                         offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3891             break;
3892         case 0xd7: /* pmovmskb */
3893         case 0x1d7:
3894             if (mod != 3)
3895                 goto illegal_op;
3896             if (b1) {
3897                 rm = (modrm & 7) | REX_B(s);
3898                 tcg_gen_addi_ptr(s->ptr0, cpu_env, ZMM_OFFSET(rm));
3899                 gen_helper_pmovmskb_xmm(s->tmp2_i32, cpu_env, s->ptr0);
3900             } else {
3901                 CHECK_NO_VEX(s);
3902                 rm = (modrm & 7);
3903                 tcg_gen_addi_ptr(s->ptr0, cpu_env,
3904                                  offsetof(CPUX86State, fpregs[rm].mmx));
3905                 gen_helper_pmovmskb_mmx(s->tmp2_i32, cpu_env, s->ptr0);
3906             }
3907             reg = ((modrm >> 3) & 7) | REX_R(s);
3908             tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
3909             break;
3910 
3911         case 0x138:
3912         case 0x038:
3913             b = modrm;
3914             if ((b & 0xf0) == 0xf0) {
3915                 goto do_0f_38_fx;
3916             }
3917             modrm = x86_ldub_code(env, s);
3918             rm = modrm & 7;
3919             reg = ((modrm >> 3) & 7) | REX_R(s);
3920             mod = (modrm >> 6) & 3;
3921 
3922             assert(b1 < 2);
3923             op6 = &sse_op_table6[b];
3924             if (op6->ext_mask == 0) {
3925                 goto unknown_op;
3926             }
3927             if (!(s->cpuid_ext_features & op6->ext_mask)) {
3928                 goto illegal_op;
3929             }
3930 
3931             if (b1) {
3932                 op1_offset = ZMM_OFFSET(reg);
3933                 if (mod == 3) {
3934                     op2_offset = ZMM_OFFSET(rm | REX_B(s));
3935                 } else {
3936                     op2_offset = offsetof(CPUX86State,xmm_t0);
3937                     gen_lea_modrm(env, s, modrm);
3938                     switch (b) {
3939                     case 0x20: case 0x30: /* pmovsxbw, pmovzxbw */
3940                     case 0x23: case 0x33: /* pmovsxwd, pmovzxwd */
3941                     case 0x25: case 0x35: /* pmovsxdq, pmovzxdq */
3942                         gen_ldq_env_A0(s, op2_offset +
3943                                         offsetof(ZMMReg, ZMM_Q(0)));
3944                         break;
3945                     case 0x21: case 0x31: /* pmovsxbd, pmovzxbd */
3946                     case 0x24: case 0x34: /* pmovsxwq, pmovzxwq */
3947                         tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
3948                                             s->mem_index, MO_LEUL);
3949                         tcg_gen_st_i32(s->tmp2_i32, cpu_env, op2_offset +
3950                                         offsetof(ZMMReg, ZMM_L(0)));
3951                         break;
3952                     case 0x22: case 0x32: /* pmovsxbq, pmovzxbq */
3953                         tcg_gen_qemu_ld_tl(s->tmp0, s->A0,
3954                                            s->mem_index, MO_LEUW);
3955                         tcg_gen_st16_tl(s->tmp0, cpu_env, op2_offset +
3956                                         offsetof(ZMMReg, ZMM_W(0)));
3957                         break;
3958                     case 0x2a:            /* movntdqa */
3959                         gen_ldo_env_A0(s, op1_offset, true);
3960                         return;
3961                     default:
3962                         gen_ldo_env_A0(s, op2_offset, true);
3963                     }
3964                 }
3965                 if (!op6->fn[b1].op1) {
3966                     goto illegal_op;
3967                 }
3968                 tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
3969                 tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
3970                 op6->fn[b1].op1(cpu_env, s->ptr0, s->ptr1);
3971             } else {
3972                 CHECK_NO_VEX(s);
3973                 if ((op6->flags & SSE_OPF_MMX) == 0) {
3974                     goto unknown_op;
3975                 }
3976                 op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
3977                 if (mod == 3) {
3978                     op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3979                 } else {
3980                     op2_offset = offsetof(CPUX86State,mmx_t0);
3981                     gen_lea_modrm(env, s, modrm);
3982                     gen_ldq_env_A0(s, op2_offset);
3983                 }
3984                 tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
3985                 tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
3986                 op6->fn[0].op1(cpu_env, s->ptr0, s->ptr1);
3987             }
3988 
3989             if (op6->flags & SSE_OPF_CMP) {
3990                 set_cc_op(s, CC_OP_EFLAGS);
3991             }
3992             break;
3993 
3994         case 0x238:
3995         case 0x338:
3996         do_0f_38_fx:
3997             /* Various integer extensions at 0f 38 f[0-f].  */
3998             b = modrm | (b1 << 8);
3999             modrm = x86_ldub_code(env, s);
4000             reg = ((modrm >> 3) & 7) | REX_R(s);
4001 
4002             switch (b) {
4003             case 0x3f0: /* crc32 Gd,Eb */
4004             case 0x3f1: /* crc32 Gd,Ey */
4005             do_crc32:
4006                 CHECK_NO_VEX(s);
4007                 if (!(s->cpuid_ext_features & CPUID_EXT_SSE42)) {
4008                     goto illegal_op;
4009                 }
4010                 if ((b & 0xff) == 0xf0) {
4011                     ot = MO_8;
4012                 } else if (s->dflag != MO_64) {
4013                     ot = (s->prefix & PREFIX_DATA ? MO_16 : MO_32);
4014                 } else {
4015                     ot = MO_64;
4016                 }
4017 
4018                 tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[reg]);
4019                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4020                 gen_helper_crc32(s->T0, s->tmp2_i32,
4021                                  s->T0, tcg_const_i32(8 << ot));
4022 
4023                 ot = mo_64_32(s->dflag);
4024                 gen_op_mov_reg_v(s, ot, reg, s->T0);
4025                 break;
4026 
4027             case 0x1f0: /* crc32 or movbe */
4028             case 0x1f1:
4029                 CHECK_NO_VEX(s);
4030                 /* For these insns, the f3 prefix is supposed to have priority
4031                    over the 66 prefix, but that's not what we implement above
4032                    setting b1.  */
4033                 if (s->prefix & PREFIX_REPNZ) {
4034                     goto do_crc32;
4035                 }
4036                 /* FALLTHRU */
4037             case 0x0f0: /* movbe Gy,My */
4038             case 0x0f1: /* movbe My,Gy */
4039                 CHECK_NO_VEX(s);
4040                 if (!(s->cpuid_ext_features & CPUID_EXT_MOVBE)) {
4041                     goto illegal_op;
4042                 }
4043                 if (s->dflag != MO_64) {
4044                     ot = (s->prefix & PREFIX_DATA ? MO_16 : MO_32);
4045                 } else {
4046                     ot = MO_64;
4047                 }
4048 
4049                 gen_lea_modrm(env, s, modrm);
4050                 if ((b & 1) == 0) {
4051                     tcg_gen_qemu_ld_tl(s->T0, s->A0,
4052                                        s->mem_index, ot | MO_BE);
4053                     gen_op_mov_reg_v(s, ot, reg, s->T0);
4054                 } else {
4055                     tcg_gen_qemu_st_tl(cpu_regs[reg], s->A0,
4056                                        s->mem_index, ot | MO_BE);
4057                 }
4058                 break;
4059 
4060             case 0x0f2: /* andn Gy, By, Ey */
4061                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
4062                     || !(s->prefix & PREFIX_VEX)
4063                     || s->vex_l != 0) {
4064                     goto illegal_op;
4065                 }
4066                 ot = mo_64_32(s->dflag);
4067                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4068                 tcg_gen_andc_tl(s->T0, s->T0, cpu_regs[s->vex_v]);
4069                 gen_op_mov_reg_v(s, ot, reg, s->T0);
4070                 gen_op_update1_cc(s);
4071                 set_cc_op(s, CC_OP_LOGICB + ot);
4072                 break;
4073 
4074             case 0x0f7: /* bextr Gy, Ey, By */
4075                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
4076                     || !(s->prefix & PREFIX_VEX)
4077                     || s->vex_l != 0) {
4078                     goto illegal_op;
4079                 }
4080                 ot = mo_64_32(s->dflag);
4081                 {
4082                     TCGv bound, zero;
4083 
4084                     gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4085                     /* Extract START, and shift the operand.
4086                        Shifts larger than operand size get zeros.  */
4087                     tcg_gen_ext8u_tl(s->A0, cpu_regs[s->vex_v]);
4088                     tcg_gen_shr_tl(s->T0, s->T0, s->A0);
4089 
4090                     bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
4091                     zero = tcg_const_tl(0);
4092                     tcg_gen_movcond_tl(TCG_COND_LEU, s->T0, s->A0, bound,
4093                                        s->T0, zero);
4094                     tcg_temp_free(zero);
4095 
4096                     /* Extract the LEN into a mask.  Lengths larger than
4097                        operand size get all ones.  */
4098                     tcg_gen_extract_tl(s->A0, cpu_regs[s->vex_v], 8, 8);
4099                     tcg_gen_movcond_tl(TCG_COND_LEU, s->A0, s->A0, bound,
4100                                        s->A0, bound);
4101                     tcg_temp_free(bound);
4102                     tcg_gen_movi_tl(s->T1, 1);
4103                     tcg_gen_shl_tl(s->T1, s->T1, s->A0);
4104                     tcg_gen_subi_tl(s->T1, s->T1, 1);
4105                     tcg_gen_and_tl(s->T0, s->T0, s->T1);
4106 
4107                     gen_op_mov_reg_v(s, ot, reg, s->T0);
4108                     gen_op_update1_cc(s);
4109                     set_cc_op(s, CC_OP_LOGICB + ot);
4110                 }
4111                 break;
4112 
4113             case 0x0f5: /* bzhi Gy, Ey, By */
4114                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
4115                     || !(s->prefix & PREFIX_VEX)
4116                     || s->vex_l != 0) {
4117                     goto illegal_op;
4118                 }
4119                 ot = mo_64_32(s->dflag);
4120                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4121                 tcg_gen_ext8u_tl(s->T1, cpu_regs[s->vex_v]);
4122                 {
4123                     TCGv bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
4124                     /* Note that since we're using BMILG (in order to get O
4125                        cleared) we need to store the inverse into C.  */
4126                     tcg_gen_setcond_tl(TCG_COND_LT, cpu_cc_src,
4127                                        s->T1, bound);
4128                     tcg_gen_movcond_tl(TCG_COND_GT, s->T1, s->T1,
4129                                        bound, bound, s->T1);
4130                     tcg_temp_free(bound);
4131                 }
4132                 tcg_gen_movi_tl(s->A0, -1);
4133                 tcg_gen_shl_tl(s->A0, s->A0, s->T1);
4134                 tcg_gen_andc_tl(s->T0, s->T0, s->A0);
4135                 gen_op_mov_reg_v(s, ot, reg, s->T0);
4136                 gen_op_update1_cc(s);
4137                 set_cc_op(s, CC_OP_BMILGB + ot);
4138                 break;
4139 
4140             case 0x3f6: /* mulx By, Gy, rdx, Ey */
4141                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
4142                     || !(s->prefix & PREFIX_VEX)
4143                     || s->vex_l != 0) {
4144                     goto illegal_op;
4145                 }
4146                 ot = mo_64_32(s->dflag);
4147                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4148                 switch (ot) {
4149                 default:
4150                     tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
4151                     tcg_gen_trunc_tl_i32(s->tmp3_i32, cpu_regs[R_EDX]);
4152                     tcg_gen_mulu2_i32(s->tmp2_i32, s->tmp3_i32,
4153                                       s->tmp2_i32, s->tmp3_i32);
4154                     tcg_gen_extu_i32_tl(cpu_regs[s->vex_v], s->tmp2_i32);
4155                     tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp3_i32);
4156                     break;
4157 #ifdef TARGET_X86_64
4158                 case MO_64:
4159                     tcg_gen_mulu2_i64(s->T0, s->T1,
4160                                       s->T0, cpu_regs[R_EDX]);
4161                     tcg_gen_mov_i64(cpu_regs[s->vex_v], s->T0);
4162                     tcg_gen_mov_i64(cpu_regs[reg], s->T1);
4163                     break;
4164 #endif
4165                 }
4166                 break;
4167 
4168             case 0x3f5: /* pdep Gy, By, Ey */
4169                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
4170                     || !(s->prefix & PREFIX_VEX)
4171                     || s->vex_l != 0) {
4172                     goto illegal_op;
4173                 }
4174                 ot = mo_64_32(s->dflag);
4175                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4176                 /* Note that by zero-extending the source operand, we
4177                    automatically handle zero-extending the result.  */
4178                 if (ot == MO_64) {
4179                     tcg_gen_mov_tl(s->T1, cpu_regs[s->vex_v]);
4180                 } else {
4181                     tcg_gen_ext32u_tl(s->T1, cpu_regs[s->vex_v]);
4182                 }
4183                 gen_helper_pdep(cpu_regs[reg], s->T1, s->T0);
4184                 break;
4185 
4186             case 0x2f5: /* pext Gy, By, Ey */
4187                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
4188                     || !(s->prefix & PREFIX_VEX)
4189                     || s->vex_l != 0) {
4190                     goto illegal_op;
4191                 }
4192                 ot = mo_64_32(s->dflag);
4193                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4194                 /* Note that by zero-extending the source operand, we
4195                    automatically handle zero-extending the result.  */
4196                 if (ot == MO_64) {
4197                     tcg_gen_mov_tl(s->T1, cpu_regs[s->vex_v]);
4198                 } else {
4199                     tcg_gen_ext32u_tl(s->T1, cpu_regs[s->vex_v]);
4200                 }
4201                 gen_helper_pext(cpu_regs[reg], s->T1, s->T0);
4202                 break;
4203 
4204             case 0x1f6: /* adcx Gy, Ey */
4205             case 0x2f6: /* adox Gy, Ey */
4206                 CHECK_NO_VEX(s);
4207                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_ADX)) {
4208                     goto illegal_op;
4209                 } else {
4210                     TCGv carry_in, carry_out, zero;
4211                     int end_op;
4212 
4213                     ot = mo_64_32(s->dflag);
4214                     gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4215 
4216                     /* Re-use the carry-out from a previous round.  */
4217                     carry_in = NULL;
4218                     carry_out = (b == 0x1f6 ? cpu_cc_dst : cpu_cc_src2);
4219                     switch (s->cc_op) {
4220                     case CC_OP_ADCX:
4221                         if (b == 0x1f6) {
4222                             carry_in = cpu_cc_dst;
4223                             end_op = CC_OP_ADCX;
4224                         } else {
4225                             end_op = CC_OP_ADCOX;
4226                         }
4227                         break;
4228                     case CC_OP_ADOX:
4229                         if (b == 0x1f6) {
4230                             end_op = CC_OP_ADCOX;
4231                         } else {
4232                             carry_in = cpu_cc_src2;
4233                             end_op = CC_OP_ADOX;
4234                         }
4235                         break;
4236                     case CC_OP_ADCOX:
4237                         end_op = CC_OP_ADCOX;
4238                         carry_in = carry_out;
4239                         break;
4240                     default:
4241                         end_op = (b == 0x1f6 ? CC_OP_ADCX : CC_OP_ADOX);
4242                         break;
4243                     }
4244                     /* If we can't reuse carry-out, get it out of EFLAGS.  */
4245                     if (!carry_in) {
4246                         if (s->cc_op != CC_OP_ADCX && s->cc_op != CC_OP_ADOX) {
4247                             gen_compute_eflags(s);
4248                         }
4249                         carry_in = s->tmp0;
4250                         tcg_gen_extract_tl(carry_in, cpu_cc_src,
4251                                            ctz32(b == 0x1f6 ? CC_C : CC_O), 1);
4252                     }
4253 
4254                     switch (ot) {
4255 #ifdef TARGET_X86_64
4256                     case MO_32:
4257                         /* If we know TL is 64-bit, and we want a 32-bit
4258                            result, just do everything in 64-bit arithmetic.  */
4259                         tcg_gen_ext32u_i64(cpu_regs[reg], cpu_regs[reg]);
4260                         tcg_gen_ext32u_i64(s->T0, s->T0);
4261                         tcg_gen_add_i64(s->T0, s->T0, cpu_regs[reg]);
4262                         tcg_gen_add_i64(s->T0, s->T0, carry_in);
4263                         tcg_gen_ext32u_i64(cpu_regs[reg], s->T0);
4264                         tcg_gen_shri_i64(carry_out, s->T0, 32);
4265                         break;
4266 #endif
4267                     default:
4268                         /* Otherwise compute the carry-out in two steps.  */
4269                         zero = tcg_const_tl(0);
4270                         tcg_gen_add2_tl(s->T0, carry_out,
4271                                         s->T0, zero,
4272                                         carry_in, zero);
4273                         tcg_gen_add2_tl(cpu_regs[reg], carry_out,
4274                                         cpu_regs[reg], carry_out,
4275                                         s->T0, zero);
4276                         tcg_temp_free(zero);
4277                         break;
4278                     }
4279                     set_cc_op(s, end_op);
4280                 }
4281                 break;
4282 
4283             case 0x1f7: /* shlx Gy, Ey, By */
4284             case 0x2f7: /* sarx Gy, Ey, By */
4285             case 0x3f7: /* shrx Gy, Ey, By */
4286                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
4287                     || !(s->prefix & PREFIX_VEX)
4288                     || s->vex_l != 0) {
4289                     goto illegal_op;
4290                 }
4291                 ot = mo_64_32(s->dflag);
4292                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4293                 if (ot == MO_64) {
4294                     tcg_gen_andi_tl(s->T1, cpu_regs[s->vex_v], 63);
4295                 } else {
4296                     tcg_gen_andi_tl(s->T1, cpu_regs[s->vex_v], 31);
4297                 }
4298                 if (b == 0x1f7) {
4299                     tcg_gen_shl_tl(s->T0, s->T0, s->T1);
4300                 } else if (b == 0x2f7) {
4301                     if (ot != MO_64) {
4302                         tcg_gen_ext32s_tl(s->T0, s->T0);
4303                     }
4304                     tcg_gen_sar_tl(s->T0, s->T0, s->T1);
4305                 } else {
4306                     if (ot != MO_64) {
4307                         tcg_gen_ext32u_tl(s->T0, s->T0);
4308                     }
4309                     tcg_gen_shr_tl(s->T0, s->T0, s->T1);
4310                 }
4311                 gen_op_mov_reg_v(s, ot, reg, s->T0);
4312                 break;
4313 
4314             case 0x0f3:
4315             case 0x1f3:
4316             case 0x2f3:
4317             case 0x3f3: /* Group 17 */
4318                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
4319                     || !(s->prefix & PREFIX_VEX)
4320                     || s->vex_l != 0) {
4321                     goto illegal_op;
4322                 }
4323                 ot = mo_64_32(s->dflag);
4324                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4325 
4326                 tcg_gen_mov_tl(cpu_cc_src, s->T0);
4327                 switch (reg & 7) {
4328                 case 1: /* blsr By,Ey */
4329                     tcg_gen_subi_tl(s->T1, s->T0, 1);
4330                     tcg_gen_and_tl(s->T0, s->T0, s->T1);
4331                     break;
4332                 case 2: /* blsmsk By,Ey */
4333                     tcg_gen_subi_tl(s->T1, s->T0, 1);
4334                     tcg_gen_xor_tl(s->T0, s->T0, s->T1);
4335                     break;
4336                 case 3: /* blsi By, Ey */
4337                     tcg_gen_neg_tl(s->T1, s->T0);
4338                     tcg_gen_and_tl(s->T0, s->T0, s->T1);
4339                     break;
4340                 default:
4341                     goto unknown_op;
4342                 }
4343                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
4344                 gen_op_mov_reg_v(s, ot, s->vex_v, s->T0);
4345                 set_cc_op(s, CC_OP_BMILGB + ot);
4346                 break;
4347 
4348             default:
4349                 goto unknown_op;
4350             }
4351             break;
4352 
4353         case 0x03a:
4354         case 0x13a:
4355             b = modrm;
4356             modrm = x86_ldub_code(env, s);
4357             rm = modrm & 7;
4358             reg = ((modrm >> 3) & 7) | REX_R(s);
4359             mod = (modrm >> 6) & 3;
4360 
4361             assert(b1 < 2);
4362             op7 = &sse_op_table7[b];
4363             if (op7->ext_mask == 0) {
4364                 goto unknown_op;
4365             }
4366             if (!(s->cpuid_ext_features & op7->ext_mask)) {
4367                 goto illegal_op;
4368             }
4369 
4370             s->rip_offset = 1;
4371 
4372             if (op7->flags & SSE_OPF_SPECIAL) {
4373                 /* None of the "special" ops are valid on mmx registers */
4374                 if (b1 == 0) {
4375                     goto illegal_op;
4376                 }
4377                 ot = mo_64_32(s->dflag);
4378                 rm = (modrm & 7) | REX_B(s);
4379                 if (mod != 3)
4380                     gen_lea_modrm(env, s, modrm);
4381                 reg = ((modrm >> 3) & 7) | REX_R(s);
4382                 val = x86_ldub_code(env, s);
4383                 switch (b) {
4384                 case 0x14: /* pextrb */
4385                     tcg_gen_ld8u_tl(s->T0, cpu_env, offsetof(CPUX86State,
4386                                             xmm_regs[reg].ZMM_B(val & 15)));
4387                     if (mod == 3) {
4388                         gen_op_mov_reg_v(s, ot, rm, s->T0);
4389                     } else {
4390                         tcg_gen_qemu_st_tl(s->T0, s->A0,
4391                                            s->mem_index, MO_UB);
4392                     }
4393                     break;
4394                 case 0x15: /* pextrw */
4395                     tcg_gen_ld16u_tl(s->T0, cpu_env, offsetof(CPUX86State,
4396                                             xmm_regs[reg].ZMM_W(val & 7)));
4397                     if (mod == 3) {
4398                         gen_op_mov_reg_v(s, ot, rm, s->T0);
4399                     } else {
4400                         tcg_gen_qemu_st_tl(s->T0, s->A0,
4401                                            s->mem_index, MO_LEUW);
4402                     }
4403                     break;
4404                 case 0x16:
4405                     if (ot == MO_32) { /* pextrd */
4406                         tcg_gen_ld_i32(s->tmp2_i32, cpu_env,
4407                                         offsetof(CPUX86State,
4408                                                 xmm_regs[reg].ZMM_L(val & 3)));
4409                         if (mod == 3) {
4410                             tcg_gen_extu_i32_tl(cpu_regs[rm], s->tmp2_i32);
4411                         } else {
4412                             tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
4413                                                 s->mem_index, MO_LEUL);
4414                         }
4415                     } else { /* pextrq */
4416 #ifdef TARGET_X86_64
4417                         tcg_gen_ld_i64(s->tmp1_i64, cpu_env,
4418                                         offsetof(CPUX86State,
4419                                                 xmm_regs[reg].ZMM_Q(val & 1)));
4420                         if (mod == 3) {
4421                             tcg_gen_mov_i64(cpu_regs[rm], s->tmp1_i64);
4422                         } else {
4423                             tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
4424                                                 s->mem_index, MO_LEUQ);
4425                         }
4426 #else
4427                         goto illegal_op;
4428 #endif
4429                     }
4430                     break;
4431                 case 0x17: /* extractps */
4432                     tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State,
4433                                             xmm_regs[reg].ZMM_L(val & 3)));
4434                     if (mod == 3) {
4435                         gen_op_mov_reg_v(s, ot, rm, s->T0);
4436                     } else {
4437                         tcg_gen_qemu_st_tl(s->T0, s->A0,
4438                                            s->mem_index, MO_LEUL);
4439                     }
4440                     break;
4441                 case 0x20: /* pinsrb */
4442                     if (mod == 3) {
4443                         gen_op_mov_v_reg(s, MO_32, s->T0, rm);
4444                     } else {
4445                         tcg_gen_qemu_ld_tl(s->T0, s->A0,
4446                                            s->mem_index, MO_UB);
4447                     }
4448                     tcg_gen_st8_tl(s->T0, cpu_env, offsetof(CPUX86State,
4449                                             xmm_regs[reg].ZMM_B(val & 15)));
4450                     break;
4451                 case 0x21: /* insertps */
4452                     if (mod == 3) {
4453                         tcg_gen_ld_i32(s->tmp2_i32, cpu_env,
4454                                         offsetof(CPUX86State,xmm_regs[rm]
4455                                                 .ZMM_L((val >> 6) & 3)));
4456                     } else {
4457                         tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
4458                                             s->mem_index, MO_LEUL);
4459                     }
4460                     tcg_gen_st_i32(s->tmp2_i32, cpu_env,
4461                                     offsetof(CPUX86State,xmm_regs[reg]
4462                                             .ZMM_L((val >> 4) & 3)));
4463                     if ((val >> 0) & 1)
4464                         tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4465                                         cpu_env, offsetof(CPUX86State,
4466                                                 xmm_regs[reg].ZMM_L(0)));
4467                     if ((val >> 1) & 1)
4468                         tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4469                                         cpu_env, offsetof(CPUX86State,
4470                                                 xmm_regs[reg].ZMM_L(1)));
4471                     if ((val >> 2) & 1)
4472                         tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4473                                         cpu_env, offsetof(CPUX86State,
4474                                                 xmm_regs[reg].ZMM_L(2)));
4475                     if ((val >> 3) & 1)
4476                         tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4477                                         cpu_env, offsetof(CPUX86State,
4478                                                 xmm_regs[reg].ZMM_L(3)));
4479                     break;
4480                 case 0x22:
4481                     if (ot == MO_32) { /* pinsrd */
4482                         if (mod == 3) {
4483                             tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[rm]);
4484                         } else {
4485                             tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
4486                                                 s->mem_index, MO_LEUL);
4487                         }
4488                         tcg_gen_st_i32(s->tmp2_i32, cpu_env,
4489                                         offsetof(CPUX86State,
4490                                                 xmm_regs[reg].ZMM_L(val & 3)));
4491                     } else { /* pinsrq */
4492 #ifdef TARGET_X86_64
4493                         if (mod == 3) {
4494                             gen_op_mov_v_reg(s, ot, s->tmp1_i64, rm);
4495                         } else {
4496                             tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
4497                                                 s->mem_index, MO_LEUQ);
4498                         }
4499                         tcg_gen_st_i64(s->tmp1_i64, cpu_env,
4500                                         offsetof(CPUX86State,
4501                                                 xmm_regs[reg].ZMM_Q(val & 1)));
4502 #else
4503                         goto illegal_op;
4504 #endif
4505                     }
4506                     break;
4507                 }
4508                 return;
4509             }
4510 
4511             if (b1 == 0) {
4512                 CHECK_NO_VEX(s);
4513                 /* MMX */
4514                 if ((op7->flags & SSE_OPF_MMX) == 0) {
4515                     goto illegal_op;
4516                 }
4517                 op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
4518                 if (mod == 3) {
4519                     op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
4520                 } else {
4521                     op2_offset = offsetof(CPUX86State,mmx_t0);
4522                     gen_lea_modrm(env, s, modrm);
4523                     gen_ldq_env_A0(s, op2_offset);
4524                 }
4525                 val = x86_ldub_code(env, s);
4526                 tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4527                 tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4528 
4529                 /* We only actually have one MMX instuction (palignr) */
4530                 assert(b == 0x0f);
4531 
4532                 op7->fn[0].op1(cpu_env, s->ptr0, s->ptr1,
4533                                tcg_const_i32(val));
4534                 break;
4535             }
4536 
4537             /* SSE */
4538             op1_offset = ZMM_OFFSET(reg);
4539             if (mod == 3) {
4540                 op2_offset = ZMM_OFFSET(rm | REX_B(s));
4541             } else {
4542                 op2_offset = offsetof(CPUX86State, xmm_t0);
4543                 gen_lea_modrm(env, s, modrm);
4544                 gen_ldo_env_A0(s, op2_offset, true);
4545             }
4546 
4547             val = x86_ldub_code(env, s);
4548             if ((b & 0xfc) == 0x60) { /* pcmpXstrX */
4549                 set_cc_op(s, CC_OP_EFLAGS);
4550 
4551                 if (s->dflag == MO_64) {
4552                     /* The helper must use entire 64-bit gp registers */
4553                     val |= 1 << 8;
4554                 }
4555             }
4556 
4557             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4558             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4559             op7->fn[b1].op1(cpu_env, s->ptr0, s->ptr1, tcg_const_i32(val));
4560             if (op7->flags & SSE_OPF_CMP) {
4561                 set_cc_op(s, CC_OP_EFLAGS);
4562             }
4563             break;
4564 
4565         case 0x33a:
4566             /* Various integer extensions at 0f 3a f[0-f].  */
4567             b = modrm | (b1 << 8);
4568             modrm = x86_ldub_code(env, s);
4569             reg = ((modrm >> 3) & 7) | REX_R(s);
4570 
4571             switch (b) {
4572             case 0x3f0: /* rorx Gy,Ey, Ib */
4573                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
4574                     || !(s->prefix & PREFIX_VEX)
4575                     || s->vex_l != 0) {
4576                     goto illegal_op;
4577                 }
4578                 ot = mo_64_32(s->dflag);
4579                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4580                 b = x86_ldub_code(env, s);
4581                 if (ot == MO_64) {
4582                     tcg_gen_rotri_tl(s->T0, s->T0, b & 63);
4583                 } else {
4584                     tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
4585                     tcg_gen_rotri_i32(s->tmp2_i32, s->tmp2_i32, b & 31);
4586                     tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
4587                 }
4588                 gen_op_mov_reg_v(s, ot, reg, s->T0);
4589                 break;
4590 
4591             default:
4592                 goto unknown_op;
4593             }
4594             break;
4595 
4596         default:
4597         unknown_op:
4598             gen_unknown_opcode(env, s);
4599             return;
4600         }
4601     } else {
4602         /* generic MMX or SSE operation */
4603         switch(b) {
4604         case 0x70: /* pshufx insn */
4605         case 0xc6: /* pshufx insn */
4606         case 0xc2: /* compare insns */
4607             s->rip_offset = 1;
4608             break;
4609         default:
4610             break;
4611         }
4612         if (is_xmm) {
4613             op1_offset = ZMM_OFFSET(reg);
4614             if (mod != 3) {
4615                 int sz = 4;
4616 
4617                 gen_lea_modrm(env, s, modrm);
4618                 op2_offset = offsetof(CPUX86State, xmm_t0);
4619 
4620                 if (sse_op_flags & SSE_OPF_SCALAR) {
4621                     if (sse_op_flags & SSE_OPF_CMP) {
4622                         /* ucomis[sd], comis[sd] */
4623                         if (b1 == 0) {
4624                             sz = 2;
4625                         } else {
4626                             sz = 3;
4627                         }
4628                     } else {
4629                         /* Most sse scalar operations.  */
4630                         if (b1 == 2) {
4631                             sz = 2;
4632                         } else if (b1 == 3) {
4633                             sz = 3;
4634                         }
4635                     }
4636                 }
4637 
4638                 switch (sz) {
4639                 case 2:
4640                     /* 32 bit access */
4641                     gen_op_ld_v(s, MO_32, s->T0, s->A0);
4642                     tcg_gen_st32_tl(s->T0, cpu_env,
4643                                     offsetof(CPUX86State, xmm_t0.ZMM_L(0)));
4644                     break;
4645                 case 3:
4646                     /* 64 bit access */
4647                     gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.ZMM_D(0)));
4648                     break;
4649                 default:
4650                     /* 128 bit access */
4651                     gen_ldo_env_A0(s, op2_offset, true);
4652                     break;
4653                 }
4654             } else {
4655                 rm = (modrm & 7) | REX_B(s);
4656                 op2_offset = ZMM_OFFSET(rm);
4657             }
4658         } else {
4659             CHECK_NO_VEX(s);
4660             op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
4661             if (mod != 3) {
4662                 gen_lea_modrm(env, s, modrm);
4663                 op2_offset = offsetof(CPUX86State,mmx_t0);
4664                 gen_ldq_env_A0(s, op2_offset);
4665             } else {
4666                 rm = (modrm & 7);
4667                 op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
4668             }
4669             if (sse_op_flags & SSE_OPF_3DNOW) {
4670                 /* 3DNow! data insns */
4671                 val = x86_ldub_code(env, s);
4672                 SSEFunc_0_epp op_3dnow = sse_op_table5[val];
4673                 if (!op_3dnow) {
4674                     goto unknown_op;
4675                 }
4676                 tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4677                 tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4678                 op_3dnow(cpu_env, s->ptr0, s->ptr1);
4679                 return;
4680             }
4681         }
4682         tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4683         tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4684         if (sse_op_flags & SSE_OPF_SHUF) {
4685             val = x86_ldub_code(env, s);
4686             sse_op_fn.op1i(s->ptr0, s->ptr1, tcg_const_i32(val));
4687         } else if (b == 0xf7) {
4688             /* maskmov : we must prepare A0 */
4689             if (mod != 3) {
4690                 goto illegal_op;
4691             }
4692             tcg_gen_mov_tl(s->A0, cpu_regs[R_EDI]);
4693             gen_extu(s->aflag, s->A0);
4694             gen_add_A0_ds_seg(s);
4695             sse_op_fn.op1t(cpu_env, s->ptr0, s->ptr1, s->A0);
4696         } else if (b == 0xc2) {
4697             /* compare insns, bits 7:3 (7:5 for AVX) are ignored */
4698             val = x86_ldub_code(env, s) & 7;
4699             sse_op_table4[val][b1](cpu_env, s->ptr0, s->ptr1);
4700         } else {
4701             sse_op_fn.op1(cpu_env, s->ptr0, s->ptr1);
4702         }
4703 
4704         if (sse_op_flags & SSE_OPF_CMP) {
4705             set_cc_op(s, CC_OP_EFLAGS);
4706         }
4707     }
4708 }
4709 
4710 /* convert one instruction. s->base.is_jmp is set if the translation must
4711    be stopped. Return the next pc value */
4712 static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
4713 {
4714     CPUX86State *env = cpu->env_ptr;
4715     int b, prefixes;
4716     int shift;
4717     MemOp ot, aflag, dflag;
4718     int modrm, reg, rm, mod, op, opreg, val;
4719     target_ulong next_eip, tval;
4720     target_ulong pc_start = s->base.pc_next;
4721     bool orig_cc_op_dirty = s->cc_op_dirty;
4722     CCOp orig_cc_op = s->cc_op;
4723 
4724     s->pc_start = s->pc = pc_start;
4725     s->override = -1;
4726 #ifdef TARGET_X86_64
4727     s->rex_w = false;
4728     s->rex_r = 0;
4729     s->rex_x = 0;
4730     s->rex_b = 0;
4731 #endif
4732     s->rip_offset = 0; /* for relative ip address */
4733     s->vex_l = 0;
4734     s->vex_v = 0;
4735     switch (sigsetjmp(s->jmpbuf, 0)) {
4736     case 0:
4737         break;
4738     case 1:
4739         gen_exception_gpf(s);
4740         return s->pc;
4741     case 2:
4742         /* Restore state that may affect the next instruction. */
4743         s->cc_op_dirty = orig_cc_op_dirty;
4744         s->cc_op = orig_cc_op;
4745         s->base.num_insns--;
4746         tcg_remove_ops_after(s->prev_insn_end);
4747         s->base.is_jmp = DISAS_TOO_MANY;
4748         return pc_start;
4749     default:
4750         g_assert_not_reached();
4751     }
4752 
4753     prefixes = 0;
4754 
4755  next_byte:
4756     b = x86_ldub_code(env, s);
4757     /* Collect prefixes.  */
4758     switch (b) {
4759     case 0xf3:
4760         prefixes |= PREFIX_REPZ;
4761         prefixes &= ~PREFIX_REPNZ;
4762         goto next_byte;
4763     case 0xf2:
4764         prefixes |= PREFIX_REPNZ;
4765         prefixes &= ~PREFIX_REPZ;
4766         goto next_byte;
4767     case 0xf0:
4768         prefixes |= PREFIX_LOCK;
4769         goto next_byte;
4770     case 0x2e:
4771         s->override = R_CS;
4772         goto next_byte;
4773     case 0x36:
4774         s->override = R_SS;
4775         goto next_byte;
4776     case 0x3e:
4777         s->override = R_DS;
4778         goto next_byte;
4779     case 0x26:
4780         s->override = R_ES;
4781         goto next_byte;
4782     case 0x64:
4783         s->override = R_FS;
4784         goto next_byte;
4785     case 0x65:
4786         s->override = R_GS;
4787         goto next_byte;
4788     case 0x66:
4789         prefixes |= PREFIX_DATA;
4790         goto next_byte;
4791     case 0x67:
4792         prefixes |= PREFIX_ADR;
4793         goto next_byte;
4794 #ifdef TARGET_X86_64
4795     case 0x40 ... 0x4f:
4796         if (CODE64(s)) {
4797             /* REX prefix */
4798             prefixes |= PREFIX_REX;
4799             s->rex_w = (b >> 3) & 1;
4800             s->rex_r = (b & 0x4) << 1;
4801             s->rex_x = (b & 0x2) << 2;
4802             s->rex_b = (b & 0x1) << 3;
4803             goto next_byte;
4804         }
4805         break;
4806 #endif
4807     case 0xc5: /* 2-byte VEX */
4808     case 0xc4: /* 3-byte VEX */
4809         /* VEX prefixes cannot be used except in 32-bit mode.
4810            Otherwise the instruction is LES or LDS.  */
4811         if (CODE32(s) && !VM86(s)) {
4812             static const int pp_prefix[4] = {
4813                 0, PREFIX_DATA, PREFIX_REPZ, PREFIX_REPNZ
4814             };
4815             int vex3, vex2 = x86_ldub_code(env, s);
4816 
4817             if (!CODE64(s) && (vex2 & 0xc0) != 0xc0) {
4818                 /* 4.1.4.6: In 32-bit mode, bits [7:6] must be 11b,
4819                    otherwise the instruction is LES or LDS.  */
4820                 s->pc--; /* rewind the advance_pc() x86_ldub_code() did */
4821                 break;
4822             }
4823 
4824             /* 4.1.1-4.1.3: No preceding lock, 66, f2, f3, or rex prefixes. */
4825             if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ
4826                             | PREFIX_LOCK | PREFIX_DATA | PREFIX_REX)) {
4827                 goto illegal_op;
4828             }
4829 #ifdef TARGET_X86_64
4830             s->rex_r = (~vex2 >> 4) & 8;
4831 #endif
4832             if (b == 0xc5) {
4833                 /* 2-byte VEX prefix: RVVVVlpp, implied 0f leading opcode byte */
4834                 vex3 = vex2;
4835                 b = x86_ldub_code(env, s) | 0x100;
4836             } else {
4837                 /* 3-byte VEX prefix: RXBmmmmm wVVVVlpp */
4838                 vex3 = x86_ldub_code(env, s);
4839 #ifdef TARGET_X86_64
4840                 s->rex_x = (~vex2 >> 3) & 8;
4841                 s->rex_b = (~vex2 >> 2) & 8;
4842                 s->rex_w = (vex3 >> 7) & 1;
4843 #endif
4844                 switch (vex2 & 0x1f) {
4845                 case 0x01: /* Implied 0f leading opcode bytes.  */
4846                     b = x86_ldub_code(env, s) | 0x100;
4847                     break;
4848                 case 0x02: /* Implied 0f 38 leading opcode bytes.  */
4849                     b = 0x138;
4850                     break;
4851                 case 0x03: /* Implied 0f 3a leading opcode bytes.  */
4852                     b = 0x13a;
4853                     break;
4854                 default:   /* Reserved for future use.  */
4855                     goto unknown_op;
4856                 }
4857             }
4858             s->vex_v = (~vex3 >> 3) & 0xf;
4859             s->vex_l = (vex3 >> 2) & 1;
4860             prefixes |= pp_prefix[vex3 & 3] | PREFIX_VEX;
4861         }
4862         break;
4863     }
4864 
4865     /* Post-process prefixes.  */
4866     if (CODE64(s)) {
4867         /* In 64-bit mode, the default data size is 32-bit.  Select 64-bit
4868            data with rex_w, and 16-bit data with 0x66; rex_w takes precedence
4869            over 0x66 if both are present.  */
4870         dflag = (REX_W(s) ? MO_64 : prefixes & PREFIX_DATA ? MO_16 : MO_32);
4871         /* In 64-bit mode, 0x67 selects 32-bit addressing.  */
4872         aflag = (prefixes & PREFIX_ADR ? MO_32 : MO_64);
4873     } else {
4874         /* In 16/32-bit mode, 0x66 selects the opposite data size.  */
4875         if (CODE32(s) ^ ((prefixes & PREFIX_DATA) != 0)) {
4876             dflag = MO_32;
4877         } else {
4878             dflag = MO_16;
4879         }
4880         /* In 16/32-bit mode, 0x67 selects the opposite addressing.  */
4881         if (CODE32(s) ^ ((prefixes & PREFIX_ADR) != 0)) {
4882             aflag = MO_32;
4883         }  else {
4884             aflag = MO_16;
4885         }
4886     }
4887 
4888     s->prefix = prefixes;
4889     s->aflag = aflag;
4890     s->dflag = dflag;
4891 
4892     /* now check op code */
4893  reswitch:
4894     switch(b) {
4895     case 0x0f:
4896         /**************************/
4897         /* extended op code */
4898         b = x86_ldub_code(env, s) | 0x100;
4899         goto reswitch;
4900 
4901         /**************************/
4902         /* arith & logic */
4903     case 0x00 ... 0x05:
4904     case 0x08 ... 0x0d:
4905     case 0x10 ... 0x15:
4906     case 0x18 ... 0x1d:
4907     case 0x20 ... 0x25:
4908     case 0x28 ... 0x2d:
4909     case 0x30 ... 0x35:
4910     case 0x38 ... 0x3d:
4911         {
4912             int op, f, val;
4913             op = (b >> 3) & 7;
4914             f = (b >> 1) & 3;
4915 
4916             ot = mo_b_d(b, dflag);
4917 
4918             switch(f) {
4919             case 0: /* OP Ev, Gv */
4920                 modrm = x86_ldub_code(env, s);
4921                 reg = ((modrm >> 3) & 7) | REX_R(s);
4922                 mod = (modrm >> 6) & 3;
4923                 rm = (modrm & 7) | REX_B(s);
4924                 if (mod != 3) {
4925                     gen_lea_modrm(env, s, modrm);
4926                     opreg = OR_TMP0;
4927                 } else if (op == OP_XORL && rm == reg) {
4928                 xor_zero:
4929                     /* xor reg, reg optimisation */
4930                     set_cc_op(s, CC_OP_CLR);
4931                     tcg_gen_movi_tl(s->T0, 0);
4932                     gen_op_mov_reg_v(s, ot, reg, s->T0);
4933                     break;
4934                 } else {
4935                     opreg = rm;
4936                 }
4937                 gen_op_mov_v_reg(s, ot, s->T1, reg);
4938                 gen_op(s, op, ot, opreg);
4939                 break;
4940             case 1: /* OP Gv, Ev */
4941                 modrm = x86_ldub_code(env, s);
4942                 mod = (modrm >> 6) & 3;
4943                 reg = ((modrm >> 3) & 7) | REX_R(s);
4944                 rm = (modrm & 7) | REX_B(s);
4945                 if (mod != 3) {
4946                     gen_lea_modrm(env, s, modrm);
4947                     gen_op_ld_v(s, ot, s->T1, s->A0);
4948                 } else if (op == OP_XORL && rm == reg) {
4949                     goto xor_zero;
4950                 } else {
4951                     gen_op_mov_v_reg(s, ot, s->T1, rm);
4952                 }
4953                 gen_op(s, op, ot, reg);
4954                 break;
4955             case 2: /* OP A, Iv */
4956                 val = insn_get(env, s, ot);
4957                 tcg_gen_movi_tl(s->T1, val);
4958                 gen_op(s, op, ot, OR_EAX);
4959                 break;
4960             }
4961         }
4962         break;
4963 
4964     case 0x82:
4965         if (CODE64(s))
4966             goto illegal_op;
4967         /* fall through */
4968     case 0x80: /* GRP1 */
4969     case 0x81:
4970     case 0x83:
4971         {
4972             int val;
4973 
4974             ot = mo_b_d(b, dflag);
4975 
4976             modrm = x86_ldub_code(env, s);
4977             mod = (modrm >> 6) & 3;
4978             rm = (modrm & 7) | REX_B(s);
4979             op = (modrm >> 3) & 7;
4980 
4981             if (mod != 3) {
4982                 if (b == 0x83)
4983                     s->rip_offset = 1;
4984                 else
4985                     s->rip_offset = insn_const_size(ot);
4986                 gen_lea_modrm(env, s, modrm);
4987                 opreg = OR_TMP0;
4988             } else {
4989                 opreg = rm;
4990             }
4991 
4992             switch(b) {
4993             default:
4994             case 0x80:
4995             case 0x81:
4996             case 0x82:
4997                 val = insn_get(env, s, ot);
4998                 break;
4999             case 0x83:
5000                 val = (int8_t)insn_get(env, s, MO_8);
5001                 break;
5002             }
5003             tcg_gen_movi_tl(s->T1, val);
5004             gen_op(s, op, ot, opreg);
5005         }
5006         break;
5007 
5008         /**************************/
5009         /* inc, dec, and other misc arith */
5010     case 0x40 ... 0x47: /* inc Gv */
5011         ot = dflag;
5012         gen_inc(s, ot, OR_EAX + (b & 7), 1);
5013         break;
5014     case 0x48 ... 0x4f: /* dec Gv */
5015         ot = dflag;
5016         gen_inc(s, ot, OR_EAX + (b & 7), -1);
5017         break;
5018     case 0xf6: /* GRP3 */
5019     case 0xf7:
5020         ot = mo_b_d(b, dflag);
5021 
5022         modrm = x86_ldub_code(env, s);
5023         mod = (modrm >> 6) & 3;
5024         rm = (modrm & 7) | REX_B(s);
5025         op = (modrm >> 3) & 7;
5026         if (mod != 3) {
5027             if (op == 0) {
5028                 s->rip_offset = insn_const_size(ot);
5029             }
5030             gen_lea_modrm(env, s, modrm);
5031             /* For those below that handle locked memory, don't load here.  */
5032             if (!(s->prefix & PREFIX_LOCK)
5033                 || op != 2) {
5034                 gen_op_ld_v(s, ot, s->T0, s->A0);
5035             }
5036         } else {
5037             gen_op_mov_v_reg(s, ot, s->T0, rm);
5038         }
5039 
5040         switch(op) {
5041         case 0: /* test */
5042             val = insn_get(env, s, ot);
5043             tcg_gen_movi_tl(s->T1, val);
5044             gen_op_testl_T0_T1_cc(s);
5045             set_cc_op(s, CC_OP_LOGICB + ot);
5046             break;
5047         case 2: /* not */
5048             if (s->prefix & PREFIX_LOCK) {
5049                 if (mod == 3) {
5050                     goto illegal_op;
5051                 }
5052                 tcg_gen_movi_tl(s->T0, ~0);
5053                 tcg_gen_atomic_xor_fetch_tl(s->T0, s->A0, s->T0,
5054                                             s->mem_index, ot | MO_LE);
5055             } else {
5056                 tcg_gen_not_tl(s->T0, s->T0);
5057                 if (mod != 3) {
5058                     gen_op_st_v(s, ot, s->T0, s->A0);
5059                 } else {
5060                     gen_op_mov_reg_v(s, ot, rm, s->T0);
5061                 }
5062             }
5063             break;
5064         case 3: /* neg */
5065             if (s->prefix & PREFIX_LOCK) {
5066                 TCGLabel *label1;
5067                 TCGv a0, t0, t1, t2;
5068 
5069                 if (mod == 3) {
5070                     goto illegal_op;
5071                 }
5072                 a0 = tcg_temp_local_new();
5073                 t0 = tcg_temp_local_new();
5074                 label1 = gen_new_label();
5075 
5076                 tcg_gen_mov_tl(a0, s->A0);
5077                 tcg_gen_mov_tl(t0, s->T0);
5078 
5079                 gen_set_label(label1);
5080                 t1 = tcg_temp_new();
5081                 t2 = tcg_temp_new();
5082                 tcg_gen_mov_tl(t2, t0);
5083                 tcg_gen_neg_tl(t1, t0);
5084                 tcg_gen_atomic_cmpxchg_tl(t0, a0, t0, t1,
5085                                           s->mem_index, ot | MO_LE);
5086                 tcg_temp_free(t1);
5087                 tcg_gen_brcond_tl(TCG_COND_NE, t0, t2, label1);
5088 
5089                 tcg_temp_free(t2);
5090                 tcg_temp_free(a0);
5091                 tcg_gen_mov_tl(s->T0, t0);
5092                 tcg_temp_free(t0);
5093             } else {
5094                 tcg_gen_neg_tl(s->T0, s->T0);
5095                 if (mod != 3) {
5096                     gen_op_st_v(s, ot, s->T0, s->A0);
5097                 } else {
5098                     gen_op_mov_reg_v(s, ot, rm, s->T0);
5099                 }
5100             }
5101             gen_op_update_neg_cc(s);
5102             set_cc_op(s, CC_OP_SUBB + ot);
5103             break;
5104         case 4: /* mul */
5105             switch(ot) {
5106             case MO_8:
5107                 gen_op_mov_v_reg(s, MO_8, s->T1, R_EAX);
5108                 tcg_gen_ext8u_tl(s->T0, s->T0);
5109                 tcg_gen_ext8u_tl(s->T1, s->T1);
5110                 /* XXX: use 32 bit mul which could be faster */
5111                 tcg_gen_mul_tl(s->T0, s->T0, s->T1);
5112                 gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
5113                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
5114                 tcg_gen_andi_tl(cpu_cc_src, s->T0, 0xff00);
5115                 set_cc_op(s, CC_OP_MULB);
5116                 break;
5117             case MO_16:
5118                 gen_op_mov_v_reg(s, MO_16, s->T1, R_EAX);
5119                 tcg_gen_ext16u_tl(s->T0, s->T0);
5120                 tcg_gen_ext16u_tl(s->T1, s->T1);
5121                 /* XXX: use 32 bit mul which could be faster */
5122                 tcg_gen_mul_tl(s->T0, s->T0, s->T1);
5123                 gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
5124                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
5125                 tcg_gen_shri_tl(s->T0, s->T0, 16);
5126                 gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0);
5127                 tcg_gen_mov_tl(cpu_cc_src, s->T0);
5128                 set_cc_op(s, CC_OP_MULW);
5129                 break;
5130             default:
5131             case MO_32:
5132                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5133                 tcg_gen_trunc_tl_i32(s->tmp3_i32, cpu_regs[R_EAX]);
5134                 tcg_gen_mulu2_i32(s->tmp2_i32, s->tmp3_i32,
5135                                   s->tmp2_i32, s->tmp3_i32);
5136                 tcg_gen_extu_i32_tl(cpu_regs[R_EAX], s->tmp2_i32);
5137                 tcg_gen_extu_i32_tl(cpu_regs[R_EDX], s->tmp3_i32);
5138                 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
5139                 tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]);
5140                 set_cc_op(s, CC_OP_MULL);
5141                 break;
5142 #ifdef TARGET_X86_64
5143             case MO_64:
5144                 tcg_gen_mulu2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
5145                                   s->T0, cpu_regs[R_EAX]);
5146                 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
5147                 tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]);
5148                 set_cc_op(s, CC_OP_MULQ);
5149                 break;
5150 #endif
5151             }
5152             break;
5153         case 5: /* imul */
5154             switch(ot) {
5155             case MO_8:
5156                 gen_op_mov_v_reg(s, MO_8, s->T1, R_EAX);
5157                 tcg_gen_ext8s_tl(s->T0, s->T0);
5158                 tcg_gen_ext8s_tl(s->T1, s->T1);
5159                 /* XXX: use 32 bit mul which could be faster */
5160                 tcg_gen_mul_tl(s->T0, s->T0, s->T1);
5161                 gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
5162                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
5163                 tcg_gen_ext8s_tl(s->tmp0, s->T0);
5164                 tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
5165                 set_cc_op(s, CC_OP_MULB);
5166                 break;
5167             case MO_16:
5168                 gen_op_mov_v_reg(s, MO_16, s->T1, R_EAX);
5169                 tcg_gen_ext16s_tl(s->T0, s->T0);
5170                 tcg_gen_ext16s_tl(s->T1, s->T1);
5171                 /* XXX: use 32 bit mul which could be faster */
5172                 tcg_gen_mul_tl(s->T0, s->T0, s->T1);
5173                 gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
5174                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
5175                 tcg_gen_ext16s_tl(s->tmp0, s->T0);
5176                 tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
5177                 tcg_gen_shri_tl(s->T0, s->T0, 16);
5178                 gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0);
5179                 set_cc_op(s, CC_OP_MULW);
5180                 break;
5181             default:
5182             case MO_32:
5183                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5184                 tcg_gen_trunc_tl_i32(s->tmp3_i32, cpu_regs[R_EAX]);
5185                 tcg_gen_muls2_i32(s->tmp2_i32, s->tmp3_i32,
5186                                   s->tmp2_i32, s->tmp3_i32);
5187                 tcg_gen_extu_i32_tl(cpu_regs[R_EAX], s->tmp2_i32);
5188                 tcg_gen_extu_i32_tl(cpu_regs[R_EDX], s->tmp3_i32);
5189                 tcg_gen_sari_i32(s->tmp2_i32, s->tmp2_i32, 31);
5190                 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
5191                 tcg_gen_sub_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
5192                 tcg_gen_extu_i32_tl(cpu_cc_src, s->tmp2_i32);
5193                 set_cc_op(s, CC_OP_MULL);
5194                 break;
5195 #ifdef TARGET_X86_64
5196             case MO_64:
5197                 tcg_gen_muls2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
5198                                   s->T0, cpu_regs[R_EAX]);
5199                 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
5200                 tcg_gen_sari_tl(cpu_cc_src, cpu_regs[R_EAX], 63);
5201                 tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, cpu_regs[R_EDX]);
5202                 set_cc_op(s, CC_OP_MULQ);
5203                 break;
5204 #endif
5205             }
5206             break;
5207         case 6: /* div */
5208             switch(ot) {
5209             case MO_8:
5210                 gen_helper_divb_AL(cpu_env, s->T0);
5211                 break;
5212             case MO_16:
5213                 gen_helper_divw_AX(cpu_env, s->T0);
5214                 break;
5215             default:
5216             case MO_32:
5217                 gen_helper_divl_EAX(cpu_env, s->T0);
5218                 break;
5219 #ifdef TARGET_X86_64
5220             case MO_64:
5221                 gen_helper_divq_EAX(cpu_env, s->T0);
5222                 break;
5223 #endif
5224             }
5225             break;
5226         case 7: /* idiv */
5227             switch(ot) {
5228             case MO_8:
5229                 gen_helper_idivb_AL(cpu_env, s->T0);
5230                 break;
5231             case MO_16:
5232                 gen_helper_idivw_AX(cpu_env, s->T0);
5233                 break;
5234             default:
5235             case MO_32:
5236                 gen_helper_idivl_EAX(cpu_env, s->T0);
5237                 break;
5238 #ifdef TARGET_X86_64
5239             case MO_64:
5240                 gen_helper_idivq_EAX(cpu_env, s->T0);
5241                 break;
5242 #endif
5243             }
5244             break;
5245         default:
5246             goto unknown_op;
5247         }
5248         break;
5249 
5250     case 0xfe: /* GRP4 */
5251     case 0xff: /* GRP5 */
5252         ot = mo_b_d(b, dflag);
5253 
5254         modrm = x86_ldub_code(env, s);
5255         mod = (modrm >> 6) & 3;
5256         rm = (modrm & 7) | REX_B(s);
5257         op = (modrm >> 3) & 7;
5258         if (op >= 2 && b == 0xfe) {
5259             goto unknown_op;
5260         }
5261         if (CODE64(s)) {
5262             if (op == 2 || op == 4) {
5263                 /* operand size for jumps is 64 bit */
5264                 ot = MO_64;
5265             } else if (op == 3 || op == 5) {
5266                 ot = dflag != MO_16 ? MO_32 + REX_W(s) : MO_16;
5267             } else if (op == 6) {
5268                 /* default push size is 64 bit */
5269                 ot = mo_pushpop(s, dflag);
5270             }
5271         }
5272         if (mod != 3) {
5273             gen_lea_modrm(env, s, modrm);
5274             if (op >= 2 && op != 3 && op != 5)
5275                 gen_op_ld_v(s, ot, s->T0, s->A0);
5276         } else {
5277             gen_op_mov_v_reg(s, ot, s->T0, rm);
5278         }
5279 
5280         switch(op) {
5281         case 0: /* inc Ev */
5282             if (mod != 3)
5283                 opreg = OR_TMP0;
5284             else
5285                 opreg = rm;
5286             gen_inc(s, ot, opreg, 1);
5287             break;
5288         case 1: /* dec Ev */
5289             if (mod != 3)
5290                 opreg = OR_TMP0;
5291             else
5292                 opreg = rm;
5293             gen_inc(s, ot, opreg, -1);
5294             break;
5295         case 2: /* call Ev */
5296             /* XXX: optimize if memory (no 'and' is necessary) */
5297             if (dflag == MO_16) {
5298                 tcg_gen_ext16u_tl(s->T0, s->T0);
5299             }
5300             next_eip = s->pc - s->cs_base;
5301             tcg_gen_movi_tl(s->T1, next_eip);
5302             gen_push_v(s, s->T1);
5303             gen_op_jmp_v(s->T0);
5304             gen_bnd_jmp(s);
5305             gen_jr(s, s->T0);
5306             break;
5307         case 3: /* lcall Ev */
5308             if (mod == 3) {
5309                 goto illegal_op;
5310             }
5311             gen_op_ld_v(s, ot, s->T1, s->A0);
5312             gen_add_A0_im(s, 1 << ot);
5313             gen_op_ld_v(s, MO_16, s->T0, s->A0);
5314         do_lcall:
5315             if (PE(s) && !VM86(s)) {
5316                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5317                 gen_helper_lcall_protected(cpu_env, s->tmp2_i32, s->T1,
5318                                            tcg_const_i32(dflag - 1),
5319                                            tcg_const_tl(s->pc - s->cs_base));
5320             } else {
5321                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5322                 gen_helper_lcall_real(cpu_env, s->tmp2_i32, s->T1,
5323                                       tcg_const_i32(dflag - 1),
5324                                       tcg_const_i32(s->pc - s->cs_base));
5325             }
5326             tcg_gen_ld_tl(s->tmp4, cpu_env, offsetof(CPUX86State, eip));
5327             gen_jr(s, s->tmp4);
5328             break;
5329         case 4: /* jmp Ev */
5330             if (dflag == MO_16) {
5331                 tcg_gen_ext16u_tl(s->T0, s->T0);
5332             }
5333             gen_op_jmp_v(s->T0);
5334             gen_bnd_jmp(s);
5335             gen_jr(s, s->T0);
5336             break;
5337         case 5: /* ljmp Ev */
5338             if (mod == 3) {
5339                 goto illegal_op;
5340             }
5341             gen_op_ld_v(s, ot, s->T1, s->A0);
5342             gen_add_A0_im(s, 1 << ot);
5343             gen_op_ld_v(s, MO_16, s->T0, s->A0);
5344         do_ljmp:
5345             if (PE(s) && !VM86(s)) {
5346                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5347                 gen_helper_ljmp_protected(cpu_env, s->tmp2_i32, s->T1,
5348                                           tcg_const_tl(s->pc - s->cs_base));
5349             } else {
5350                 gen_op_movl_seg_T0_vm(s, R_CS);
5351                 gen_op_jmp_v(s->T1);
5352             }
5353             tcg_gen_ld_tl(s->tmp4, cpu_env, offsetof(CPUX86State, eip));
5354             gen_jr(s, s->tmp4);
5355             break;
5356         case 6: /* push Ev */
5357             gen_push_v(s, s->T0);
5358             break;
5359         default:
5360             goto unknown_op;
5361         }
5362         break;
5363 
5364     case 0x84: /* test Ev, Gv */
5365     case 0x85:
5366         ot = mo_b_d(b, dflag);
5367 
5368         modrm = x86_ldub_code(env, s);
5369         reg = ((modrm >> 3) & 7) | REX_R(s);
5370 
5371         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
5372         gen_op_mov_v_reg(s, ot, s->T1, reg);
5373         gen_op_testl_T0_T1_cc(s);
5374         set_cc_op(s, CC_OP_LOGICB + ot);
5375         break;
5376 
5377     case 0xa8: /* test eAX, Iv */
5378     case 0xa9:
5379         ot = mo_b_d(b, dflag);
5380         val = insn_get(env, s, ot);
5381 
5382         gen_op_mov_v_reg(s, ot, s->T0, OR_EAX);
5383         tcg_gen_movi_tl(s->T1, val);
5384         gen_op_testl_T0_T1_cc(s);
5385         set_cc_op(s, CC_OP_LOGICB + ot);
5386         break;
5387 
5388     case 0x98: /* CWDE/CBW */
5389         switch (dflag) {
5390 #ifdef TARGET_X86_64
5391         case MO_64:
5392             gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
5393             tcg_gen_ext32s_tl(s->T0, s->T0);
5394             gen_op_mov_reg_v(s, MO_64, R_EAX, s->T0);
5395             break;
5396 #endif
5397         case MO_32:
5398             gen_op_mov_v_reg(s, MO_16, s->T0, R_EAX);
5399             tcg_gen_ext16s_tl(s->T0, s->T0);
5400             gen_op_mov_reg_v(s, MO_32, R_EAX, s->T0);
5401             break;
5402         case MO_16:
5403             gen_op_mov_v_reg(s, MO_8, s->T0, R_EAX);
5404             tcg_gen_ext8s_tl(s->T0, s->T0);
5405             gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
5406             break;
5407         default:
5408             tcg_abort();
5409         }
5410         break;
5411     case 0x99: /* CDQ/CWD */
5412         switch (dflag) {
5413 #ifdef TARGET_X86_64
5414         case MO_64:
5415             gen_op_mov_v_reg(s, MO_64, s->T0, R_EAX);
5416             tcg_gen_sari_tl(s->T0, s->T0, 63);
5417             gen_op_mov_reg_v(s, MO_64, R_EDX, s->T0);
5418             break;
5419 #endif
5420         case MO_32:
5421             gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
5422             tcg_gen_ext32s_tl(s->T0, s->T0);
5423             tcg_gen_sari_tl(s->T0, s->T0, 31);
5424             gen_op_mov_reg_v(s, MO_32, R_EDX, s->T0);
5425             break;
5426         case MO_16:
5427             gen_op_mov_v_reg(s, MO_16, s->T0, R_EAX);
5428             tcg_gen_ext16s_tl(s->T0, s->T0);
5429             tcg_gen_sari_tl(s->T0, s->T0, 15);
5430             gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0);
5431             break;
5432         default:
5433             tcg_abort();
5434         }
5435         break;
5436     case 0x1af: /* imul Gv, Ev */
5437     case 0x69: /* imul Gv, Ev, I */
5438     case 0x6b:
5439         ot = dflag;
5440         modrm = x86_ldub_code(env, s);
5441         reg = ((modrm >> 3) & 7) | REX_R(s);
5442         if (b == 0x69)
5443             s->rip_offset = insn_const_size(ot);
5444         else if (b == 0x6b)
5445             s->rip_offset = 1;
5446         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
5447         if (b == 0x69) {
5448             val = insn_get(env, s, ot);
5449             tcg_gen_movi_tl(s->T1, val);
5450         } else if (b == 0x6b) {
5451             val = (int8_t)insn_get(env, s, MO_8);
5452             tcg_gen_movi_tl(s->T1, val);
5453         } else {
5454             gen_op_mov_v_reg(s, ot, s->T1, reg);
5455         }
5456         switch (ot) {
5457 #ifdef TARGET_X86_64
5458         case MO_64:
5459             tcg_gen_muls2_i64(cpu_regs[reg], s->T1, s->T0, s->T1);
5460             tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
5461             tcg_gen_sari_tl(cpu_cc_src, cpu_cc_dst, 63);
5462             tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, s->T1);
5463             break;
5464 #endif
5465         case MO_32:
5466             tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5467             tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
5468             tcg_gen_muls2_i32(s->tmp2_i32, s->tmp3_i32,
5469                               s->tmp2_i32, s->tmp3_i32);
5470             tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
5471             tcg_gen_sari_i32(s->tmp2_i32, s->tmp2_i32, 31);
5472             tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
5473             tcg_gen_sub_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
5474             tcg_gen_extu_i32_tl(cpu_cc_src, s->tmp2_i32);
5475             break;
5476         default:
5477             tcg_gen_ext16s_tl(s->T0, s->T0);
5478             tcg_gen_ext16s_tl(s->T1, s->T1);
5479             /* XXX: use 32 bit mul which could be faster */
5480             tcg_gen_mul_tl(s->T0, s->T0, s->T1);
5481             tcg_gen_mov_tl(cpu_cc_dst, s->T0);
5482             tcg_gen_ext16s_tl(s->tmp0, s->T0);
5483             tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
5484             gen_op_mov_reg_v(s, ot, reg, s->T0);
5485             break;
5486         }
5487         set_cc_op(s, CC_OP_MULB + ot);
5488         break;
5489     case 0x1c0:
5490     case 0x1c1: /* xadd Ev, Gv */
5491         ot = mo_b_d(b, dflag);
5492         modrm = x86_ldub_code(env, s);
5493         reg = ((modrm >> 3) & 7) | REX_R(s);
5494         mod = (modrm >> 6) & 3;
5495         gen_op_mov_v_reg(s, ot, s->T0, reg);
5496         if (mod == 3) {
5497             rm = (modrm & 7) | REX_B(s);
5498             gen_op_mov_v_reg(s, ot, s->T1, rm);
5499             tcg_gen_add_tl(s->T0, s->T0, s->T1);
5500             gen_op_mov_reg_v(s, ot, reg, s->T1);
5501             gen_op_mov_reg_v(s, ot, rm, s->T0);
5502         } else {
5503             gen_lea_modrm(env, s, modrm);
5504             if (s->prefix & PREFIX_LOCK) {
5505                 tcg_gen_atomic_fetch_add_tl(s->T1, s->A0, s->T0,
5506                                             s->mem_index, ot | MO_LE);
5507                 tcg_gen_add_tl(s->T0, s->T0, s->T1);
5508             } else {
5509                 gen_op_ld_v(s, ot, s->T1, s->A0);
5510                 tcg_gen_add_tl(s->T0, s->T0, s->T1);
5511                 gen_op_st_v(s, ot, s->T0, s->A0);
5512             }
5513             gen_op_mov_reg_v(s, ot, reg, s->T1);
5514         }
5515         gen_op_update2_cc(s);
5516         set_cc_op(s, CC_OP_ADDB + ot);
5517         break;
5518     case 0x1b0:
5519     case 0x1b1: /* cmpxchg Ev, Gv */
5520         {
5521             TCGv oldv, newv, cmpv;
5522 
5523             ot = mo_b_d(b, dflag);
5524             modrm = x86_ldub_code(env, s);
5525             reg = ((modrm >> 3) & 7) | REX_R(s);
5526             mod = (modrm >> 6) & 3;
5527             oldv = tcg_temp_new();
5528             newv = tcg_temp_new();
5529             cmpv = tcg_temp_new();
5530             gen_op_mov_v_reg(s, ot, newv, reg);
5531             tcg_gen_mov_tl(cmpv, cpu_regs[R_EAX]);
5532 
5533             if (s->prefix & PREFIX_LOCK) {
5534                 if (mod == 3) {
5535                     goto illegal_op;
5536                 }
5537                 gen_lea_modrm(env, s, modrm);
5538                 tcg_gen_atomic_cmpxchg_tl(oldv, s->A0, cmpv, newv,
5539                                           s->mem_index, ot | MO_LE);
5540                 gen_op_mov_reg_v(s, ot, R_EAX, oldv);
5541             } else {
5542                 if (mod == 3) {
5543                     rm = (modrm & 7) | REX_B(s);
5544                     gen_op_mov_v_reg(s, ot, oldv, rm);
5545                 } else {
5546                     gen_lea_modrm(env, s, modrm);
5547                     gen_op_ld_v(s, ot, oldv, s->A0);
5548                     rm = 0; /* avoid warning */
5549                 }
5550                 gen_extu(ot, oldv);
5551                 gen_extu(ot, cmpv);
5552                 /* store value = (old == cmp ? new : old);  */
5553                 tcg_gen_movcond_tl(TCG_COND_EQ, newv, oldv, cmpv, newv, oldv);
5554                 if (mod == 3) {
5555                     gen_op_mov_reg_v(s, ot, R_EAX, oldv);
5556                     gen_op_mov_reg_v(s, ot, rm, newv);
5557                 } else {
5558                     /* Perform an unconditional store cycle like physical cpu;
5559                        must be before changing accumulator to ensure
5560                        idempotency if the store faults and the instruction
5561                        is restarted */
5562                     gen_op_st_v(s, ot, newv, s->A0);
5563                     gen_op_mov_reg_v(s, ot, R_EAX, oldv);
5564                 }
5565             }
5566             tcg_gen_mov_tl(cpu_cc_src, oldv);
5567             tcg_gen_mov_tl(s->cc_srcT, cmpv);
5568             tcg_gen_sub_tl(cpu_cc_dst, cmpv, oldv);
5569             set_cc_op(s, CC_OP_SUBB + ot);
5570             tcg_temp_free(oldv);
5571             tcg_temp_free(newv);
5572             tcg_temp_free(cmpv);
5573         }
5574         break;
5575     case 0x1c7: /* cmpxchg8b */
5576         modrm = x86_ldub_code(env, s);
5577         mod = (modrm >> 6) & 3;
5578         switch ((modrm >> 3) & 7) {
5579         case 1: /* CMPXCHG8, CMPXCHG16 */
5580             if (mod == 3) {
5581                 goto illegal_op;
5582             }
5583 #ifdef TARGET_X86_64
5584             if (dflag == MO_64) {
5585                 if (!(s->cpuid_ext_features & CPUID_EXT_CX16)) {
5586                     goto illegal_op;
5587                 }
5588                 gen_lea_modrm(env, s, modrm);
5589                 if ((s->prefix & PREFIX_LOCK) &&
5590                     (tb_cflags(s->base.tb) & CF_PARALLEL)) {
5591                     gen_helper_cmpxchg16b(cpu_env, s->A0);
5592                 } else {
5593                     gen_helper_cmpxchg16b_unlocked(cpu_env, s->A0);
5594                 }
5595                 set_cc_op(s, CC_OP_EFLAGS);
5596                 break;
5597             }
5598 #endif
5599             if (!(s->cpuid_features & CPUID_CX8)) {
5600                 goto illegal_op;
5601             }
5602             gen_lea_modrm(env, s, modrm);
5603             if ((s->prefix & PREFIX_LOCK) &&
5604                 (tb_cflags(s->base.tb) & CF_PARALLEL)) {
5605                 gen_helper_cmpxchg8b(cpu_env, s->A0);
5606             } else {
5607                 gen_helper_cmpxchg8b_unlocked(cpu_env, s->A0);
5608             }
5609             set_cc_op(s, CC_OP_EFLAGS);
5610             break;
5611 
5612         case 7: /* RDSEED */
5613         case 6: /* RDRAND */
5614             if (mod != 3 ||
5615                 (s->prefix & (PREFIX_LOCK | PREFIX_REPZ | PREFIX_REPNZ)) ||
5616                 !(s->cpuid_ext_features & CPUID_EXT_RDRAND)) {
5617                 goto illegal_op;
5618             }
5619             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
5620                 gen_io_start();
5621             }
5622             gen_helper_rdrand(s->T0, cpu_env);
5623             rm = (modrm & 7) | REX_B(s);
5624             gen_op_mov_reg_v(s, dflag, rm, s->T0);
5625             set_cc_op(s, CC_OP_EFLAGS);
5626             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
5627                 gen_jmp(s, s->pc - s->cs_base);
5628             }
5629             break;
5630 
5631         default:
5632             goto illegal_op;
5633         }
5634         break;
5635 
5636         /**************************/
5637         /* push/pop */
5638     case 0x50 ... 0x57: /* push */
5639         gen_op_mov_v_reg(s, MO_32, s->T0, (b & 7) | REX_B(s));
5640         gen_push_v(s, s->T0);
5641         break;
5642     case 0x58 ... 0x5f: /* pop */
5643         ot = gen_pop_T0(s);
5644         /* NOTE: order is important for pop %sp */
5645         gen_pop_update(s, ot);
5646         gen_op_mov_reg_v(s, ot, (b & 7) | REX_B(s), s->T0);
5647         break;
5648     case 0x60: /* pusha */
5649         if (CODE64(s))
5650             goto illegal_op;
5651         gen_pusha(s);
5652         break;
5653     case 0x61: /* popa */
5654         if (CODE64(s))
5655             goto illegal_op;
5656         gen_popa(s);
5657         break;
5658     case 0x68: /* push Iv */
5659     case 0x6a:
5660         ot = mo_pushpop(s, dflag);
5661         if (b == 0x68)
5662             val = insn_get(env, s, ot);
5663         else
5664             val = (int8_t)insn_get(env, s, MO_8);
5665         tcg_gen_movi_tl(s->T0, val);
5666         gen_push_v(s, s->T0);
5667         break;
5668     case 0x8f: /* pop Ev */
5669         modrm = x86_ldub_code(env, s);
5670         mod = (modrm >> 6) & 3;
5671         ot = gen_pop_T0(s);
5672         if (mod == 3) {
5673             /* NOTE: order is important for pop %sp */
5674             gen_pop_update(s, ot);
5675             rm = (modrm & 7) | REX_B(s);
5676             gen_op_mov_reg_v(s, ot, rm, s->T0);
5677         } else {
5678             /* NOTE: order is important too for MMU exceptions */
5679             s->popl_esp_hack = 1 << ot;
5680             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
5681             s->popl_esp_hack = 0;
5682             gen_pop_update(s, ot);
5683         }
5684         break;
5685     case 0xc8: /* enter */
5686         {
5687             int level;
5688             val = x86_lduw_code(env, s);
5689             level = x86_ldub_code(env, s);
5690             gen_enter(s, val, level);
5691         }
5692         break;
5693     case 0xc9: /* leave */
5694         gen_leave(s);
5695         break;
5696     case 0x06: /* push es */
5697     case 0x0e: /* push cs */
5698     case 0x16: /* push ss */
5699     case 0x1e: /* push ds */
5700         if (CODE64(s))
5701             goto illegal_op;
5702         gen_op_movl_T0_seg(s, b >> 3);
5703         gen_push_v(s, s->T0);
5704         break;
5705     case 0x1a0: /* push fs */
5706     case 0x1a8: /* push gs */
5707         gen_op_movl_T0_seg(s, (b >> 3) & 7);
5708         gen_push_v(s, s->T0);
5709         break;
5710     case 0x07: /* pop es */
5711     case 0x17: /* pop ss */
5712     case 0x1f: /* pop ds */
5713         if (CODE64(s))
5714             goto illegal_op;
5715         reg = b >> 3;
5716         ot = gen_pop_T0(s);
5717         gen_movl_seg_T0(s, reg);
5718         gen_pop_update(s, ot);
5719         /* Note that reg == R_SS in gen_movl_seg_T0 always sets is_jmp.  */
5720         if (s->base.is_jmp) {
5721             gen_jmp_im(s, s->pc - s->cs_base);
5722             if (reg == R_SS) {
5723                 s->flags &= ~HF_TF_MASK;
5724                 gen_eob_inhibit_irq(s, true);
5725             } else {
5726                 gen_eob(s);
5727             }
5728         }
5729         break;
5730     case 0x1a1: /* pop fs */
5731     case 0x1a9: /* pop gs */
5732         ot = gen_pop_T0(s);
5733         gen_movl_seg_T0(s, (b >> 3) & 7);
5734         gen_pop_update(s, ot);
5735         if (s->base.is_jmp) {
5736             gen_jmp_im(s, s->pc - s->cs_base);
5737             gen_eob(s);
5738         }
5739         break;
5740 
5741         /**************************/
5742         /* mov */
5743     case 0x88:
5744     case 0x89: /* mov Gv, Ev */
5745         ot = mo_b_d(b, dflag);
5746         modrm = x86_ldub_code(env, s);
5747         reg = ((modrm >> 3) & 7) | REX_R(s);
5748 
5749         /* generate a generic store */
5750         gen_ldst_modrm(env, s, modrm, ot, reg, 1);
5751         break;
5752     case 0xc6:
5753     case 0xc7: /* mov Ev, Iv */
5754         ot = mo_b_d(b, dflag);
5755         modrm = x86_ldub_code(env, s);
5756         mod = (modrm >> 6) & 3;
5757         if (mod != 3) {
5758             s->rip_offset = insn_const_size(ot);
5759             gen_lea_modrm(env, s, modrm);
5760         }
5761         val = insn_get(env, s, ot);
5762         tcg_gen_movi_tl(s->T0, val);
5763         if (mod != 3) {
5764             gen_op_st_v(s, ot, s->T0, s->A0);
5765         } else {
5766             gen_op_mov_reg_v(s, ot, (modrm & 7) | REX_B(s), s->T0);
5767         }
5768         break;
5769     case 0x8a:
5770     case 0x8b: /* mov Ev, Gv */
5771         ot = mo_b_d(b, dflag);
5772         modrm = x86_ldub_code(env, s);
5773         reg = ((modrm >> 3) & 7) | REX_R(s);
5774 
5775         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
5776         gen_op_mov_reg_v(s, ot, reg, s->T0);
5777         break;
5778     case 0x8e: /* mov seg, Gv */
5779         modrm = x86_ldub_code(env, s);
5780         reg = (modrm >> 3) & 7;
5781         if (reg >= 6 || reg == R_CS)
5782             goto illegal_op;
5783         gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
5784         gen_movl_seg_T0(s, reg);
5785         /* Note that reg == R_SS in gen_movl_seg_T0 always sets is_jmp.  */
5786         if (s->base.is_jmp) {
5787             gen_jmp_im(s, s->pc - s->cs_base);
5788             if (reg == R_SS) {
5789                 s->flags &= ~HF_TF_MASK;
5790                 gen_eob_inhibit_irq(s, true);
5791             } else {
5792                 gen_eob(s);
5793             }
5794         }
5795         break;
5796     case 0x8c: /* mov Gv, seg */
5797         modrm = x86_ldub_code(env, s);
5798         reg = (modrm >> 3) & 7;
5799         mod = (modrm >> 6) & 3;
5800         if (reg >= 6)
5801             goto illegal_op;
5802         gen_op_movl_T0_seg(s, reg);
5803         ot = mod == 3 ? dflag : MO_16;
5804         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
5805         break;
5806 
5807     case 0x1b6: /* movzbS Gv, Eb */
5808     case 0x1b7: /* movzwS Gv, Eb */
5809     case 0x1be: /* movsbS Gv, Eb */
5810     case 0x1bf: /* movswS Gv, Eb */
5811         {
5812             MemOp d_ot;
5813             MemOp s_ot;
5814 
5815             /* d_ot is the size of destination */
5816             d_ot = dflag;
5817             /* ot is the size of source */
5818             ot = (b & 1) + MO_8;
5819             /* s_ot is the sign+size of source */
5820             s_ot = b & 8 ? MO_SIGN | ot : ot;
5821 
5822             modrm = x86_ldub_code(env, s);
5823             reg = ((modrm >> 3) & 7) | REX_R(s);
5824             mod = (modrm >> 6) & 3;
5825             rm = (modrm & 7) | REX_B(s);
5826 
5827             if (mod == 3) {
5828                 if (s_ot == MO_SB && byte_reg_is_xH(s, rm)) {
5829                     tcg_gen_sextract_tl(s->T0, cpu_regs[rm - 4], 8, 8);
5830                 } else {
5831                     gen_op_mov_v_reg(s, ot, s->T0, rm);
5832                     switch (s_ot) {
5833                     case MO_UB:
5834                         tcg_gen_ext8u_tl(s->T0, s->T0);
5835                         break;
5836                     case MO_SB:
5837                         tcg_gen_ext8s_tl(s->T0, s->T0);
5838                         break;
5839                     case MO_UW:
5840                         tcg_gen_ext16u_tl(s->T0, s->T0);
5841                         break;
5842                     default:
5843                     case MO_SW:
5844                         tcg_gen_ext16s_tl(s->T0, s->T0);
5845                         break;
5846                     }
5847                 }
5848                 gen_op_mov_reg_v(s, d_ot, reg, s->T0);
5849             } else {
5850                 gen_lea_modrm(env, s, modrm);
5851                 gen_op_ld_v(s, s_ot, s->T0, s->A0);
5852                 gen_op_mov_reg_v(s, d_ot, reg, s->T0);
5853             }
5854         }
5855         break;
5856 
5857     case 0x8d: /* lea */
5858         modrm = x86_ldub_code(env, s);
5859         mod = (modrm >> 6) & 3;
5860         if (mod == 3)
5861             goto illegal_op;
5862         reg = ((modrm >> 3) & 7) | REX_R(s);
5863         {
5864             AddressParts a = gen_lea_modrm_0(env, s, modrm);
5865             TCGv ea = gen_lea_modrm_1(s, a);
5866             gen_lea_v_seg(s, s->aflag, ea, -1, -1);
5867             gen_op_mov_reg_v(s, dflag, reg, s->A0);
5868         }
5869         break;
5870 
5871     case 0xa0: /* mov EAX, Ov */
5872     case 0xa1:
5873     case 0xa2: /* mov Ov, EAX */
5874     case 0xa3:
5875         {
5876             target_ulong offset_addr;
5877 
5878             ot = mo_b_d(b, dflag);
5879             offset_addr = insn_get_addr(env, s, s->aflag);
5880             tcg_gen_movi_tl(s->A0, offset_addr);
5881             gen_add_A0_ds_seg(s);
5882             if ((b & 2) == 0) {
5883                 gen_op_ld_v(s, ot, s->T0, s->A0);
5884                 gen_op_mov_reg_v(s, ot, R_EAX, s->T0);
5885             } else {
5886                 gen_op_mov_v_reg(s, ot, s->T0, R_EAX);
5887                 gen_op_st_v(s, ot, s->T0, s->A0);
5888             }
5889         }
5890         break;
5891     case 0xd7: /* xlat */
5892         tcg_gen_mov_tl(s->A0, cpu_regs[R_EBX]);
5893         tcg_gen_ext8u_tl(s->T0, cpu_regs[R_EAX]);
5894         tcg_gen_add_tl(s->A0, s->A0, s->T0);
5895         gen_extu(s->aflag, s->A0);
5896         gen_add_A0_ds_seg(s);
5897         gen_op_ld_v(s, MO_8, s->T0, s->A0);
5898         gen_op_mov_reg_v(s, MO_8, R_EAX, s->T0);
5899         break;
5900     case 0xb0 ... 0xb7: /* mov R, Ib */
5901         val = insn_get(env, s, MO_8);
5902         tcg_gen_movi_tl(s->T0, val);
5903         gen_op_mov_reg_v(s, MO_8, (b & 7) | REX_B(s), s->T0);
5904         break;
5905     case 0xb8 ... 0xbf: /* mov R, Iv */
5906 #ifdef TARGET_X86_64
5907         if (dflag == MO_64) {
5908             uint64_t tmp;
5909             /* 64 bit case */
5910             tmp = x86_ldq_code(env, s);
5911             reg = (b & 7) | REX_B(s);
5912             tcg_gen_movi_tl(s->T0, tmp);
5913             gen_op_mov_reg_v(s, MO_64, reg, s->T0);
5914         } else
5915 #endif
5916         {
5917             ot = dflag;
5918             val = insn_get(env, s, ot);
5919             reg = (b & 7) | REX_B(s);
5920             tcg_gen_movi_tl(s->T0, val);
5921             gen_op_mov_reg_v(s, ot, reg, s->T0);
5922         }
5923         break;
5924 
5925     case 0x91 ... 0x97: /* xchg R, EAX */
5926     do_xchg_reg_eax:
5927         ot = dflag;
5928         reg = (b & 7) | REX_B(s);
5929         rm = R_EAX;
5930         goto do_xchg_reg;
5931     case 0x86:
5932     case 0x87: /* xchg Ev, Gv */
5933         ot = mo_b_d(b, dflag);
5934         modrm = x86_ldub_code(env, s);
5935         reg = ((modrm >> 3) & 7) | REX_R(s);
5936         mod = (modrm >> 6) & 3;
5937         if (mod == 3) {
5938             rm = (modrm & 7) | REX_B(s);
5939         do_xchg_reg:
5940             gen_op_mov_v_reg(s, ot, s->T0, reg);
5941             gen_op_mov_v_reg(s, ot, s->T1, rm);
5942             gen_op_mov_reg_v(s, ot, rm, s->T0);
5943             gen_op_mov_reg_v(s, ot, reg, s->T1);
5944         } else {
5945             gen_lea_modrm(env, s, modrm);
5946             gen_op_mov_v_reg(s, ot, s->T0, reg);
5947             /* for xchg, lock is implicit */
5948             tcg_gen_atomic_xchg_tl(s->T1, s->A0, s->T0,
5949                                    s->mem_index, ot | MO_LE);
5950             gen_op_mov_reg_v(s, ot, reg, s->T1);
5951         }
5952         break;
5953     case 0xc4: /* les Gv */
5954         /* In CODE64 this is VEX3; see above.  */
5955         op = R_ES;
5956         goto do_lxx;
5957     case 0xc5: /* lds Gv */
5958         /* In CODE64 this is VEX2; see above.  */
5959         op = R_DS;
5960         goto do_lxx;
5961     case 0x1b2: /* lss Gv */
5962         op = R_SS;
5963         goto do_lxx;
5964     case 0x1b4: /* lfs Gv */
5965         op = R_FS;
5966         goto do_lxx;
5967     case 0x1b5: /* lgs Gv */
5968         op = R_GS;
5969     do_lxx:
5970         ot = dflag != MO_16 ? MO_32 : MO_16;
5971         modrm = x86_ldub_code(env, s);
5972         reg = ((modrm >> 3) & 7) | REX_R(s);
5973         mod = (modrm >> 6) & 3;
5974         if (mod == 3)
5975             goto illegal_op;
5976         gen_lea_modrm(env, s, modrm);
5977         gen_op_ld_v(s, ot, s->T1, s->A0);
5978         gen_add_A0_im(s, 1 << ot);
5979         /* load the segment first to handle exceptions properly */
5980         gen_op_ld_v(s, MO_16, s->T0, s->A0);
5981         gen_movl_seg_T0(s, op);
5982         /* then put the data */
5983         gen_op_mov_reg_v(s, ot, reg, s->T1);
5984         if (s->base.is_jmp) {
5985             gen_jmp_im(s, s->pc - s->cs_base);
5986             gen_eob(s);
5987         }
5988         break;
5989 
5990         /************************/
5991         /* shifts */
5992     case 0xc0:
5993     case 0xc1:
5994         /* shift Ev,Ib */
5995         shift = 2;
5996     grp2:
5997         {
5998             ot = mo_b_d(b, dflag);
5999             modrm = x86_ldub_code(env, s);
6000             mod = (modrm >> 6) & 3;
6001             op = (modrm >> 3) & 7;
6002 
6003             if (mod != 3) {
6004                 if (shift == 2) {
6005                     s->rip_offset = 1;
6006                 }
6007                 gen_lea_modrm(env, s, modrm);
6008                 opreg = OR_TMP0;
6009             } else {
6010                 opreg = (modrm & 7) | REX_B(s);
6011             }
6012 
6013             /* simpler op */
6014             if (shift == 0) {
6015                 gen_shift(s, op, ot, opreg, OR_ECX);
6016             } else {
6017                 if (shift == 2) {
6018                     shift = x86_ldub_code(env, s);
6019                 }
6020                 gen_shifti(s, op, ot, opreg, shift);
6021             }
6022         }
6023         break;
6024     case 0xd0:
6025     case 0xd1:
6026         /* shift Ev,1 */
6027         shift = 1;
6028         goto grp2;
6029     case 0xd2:
6030     case 0xd3:
6031         /* shift Ev,cl */
6032         shift = 0;
6033         goto grp2;
6034 
6035     case 0x1a4: /* shld imm */
6036         op = 0;
6037         shift = 1;
6038         goto do_shiftd;
6039     case 0x1a5: /* shld cl */
6040         op = 0;
6041         shift = 0;
6042         goto do_shiftd;
6043     case 0x1ac: /* shrd imm */
6044         op = 1;
6045         shift = 1;
6046         goto do_shiftd;
6047     case 0x1ad: /* shrd cl */
6048         op = 1;
6049         shift = 0;
6050     do_shiftd:
6051         ot = dflag;
6052         modrm = x86_ldub_code(env, s);
6053         mod = (modrm >> 6) & 3;
6054         rm = (modrm & 7) | REX_B(s);
6055         reg = ((modrm >> 3) & 7) | REX_R(s);
6056         if (mod != 3) {
6057             gen_lea_modrm(env, s, modrm);
6058             opreg = OR_TMP0;
6059         } else {
6060             opreg = rm;
6061         }
6062         gen_op_mov_v_reg(s, ot, s->T1, reg);
6063 
6064         if (shift) {
6065             TCGv imm = tcg_const_tl(x86_ldub_code(env, s));
6066             gen_shiftd_rm_T1(s, ot, opreg, op, imm);
6067             tcg_temp_free(imm);
6068         } else {
6069             gen_shiftd_rm_T1(s, ot, opreg, op, cpu_regs[R_ECX]);
6070         }
6071         break;
6072 
6073         /************************/
6074         /* floats */
6075     case 0xd8 ... 0xdf:
6076         {
6077             bool update_fip = true;
6078 
6079             if (s->flags & (HF_EM_MASK | HF_TS_MASK)) {
6080                 /* if CR0.EM or CR0.TS are set, generate an FPU exception */
6081                 /* XXX: what to do if illegal op ? */
6082                 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
6083                 break;
6084             }
6085             modrm = x86_ldub_code(env, s);
6086             mod = (modrm >> 6) & 3;
6087             rm = modrm & 7;
6088             op = ((b & 7) << 3) | ((modrm >> 3) & 7);
6089             if (mod != 3) {
6090                 /* memory op */
6091                 AddressParts a = gen_lea_modrm_0(env, s, modrm);
6092                 TCGv ea = gen_lea_modrm_1(s, a);
6093                 TCGv last_addr = tcg_temp_new();
6094                 bool update_fdp = true;
6095 
6096                 tcg_gen_mov_tl(last_addr, ea);
6097                 gen_lea_v_seg(s, s->aflag, ea, a.def_seg, s->override);
6098 
6099                 switch (op) {
6100                 case 0x00 ... 0x07: /* fxxxs */
6101                 case 0x10 ... 0x17: /* fixxxl */
6102                 case 0x20 ... 0x27: /* fxxxl */
6103                 case 0x30 ... 0x37: /* fixxx */
6104                     {
6105                         int op1;
6106                         op1 = op & 7;
6107 
6108                         switch (op >> 4) {
6109                         case 0:
6110                             tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
6111                                                 s->mem_index, MO_LEUL);
6112                             gen_helper_flds_FT0(cpu_env, s->tmp2_i32);
6113                             break;
6114                         case 1:
6115                             tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
6116                                                 s->mem_index, MO_LEUL);
6117                             gen_helper_fildl_FT0(cpu_env, s->tmp2_i32);
6118                             break;
6119                         case 2:
6120                             tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
6121                                                 s->mem_index, MO_LEUQ);
6122                             gen_helper_fldl_FT0(cpu_env, s->tmp1_i64);
6123                             break;
6124                         case 3:
6125                         default:
6126                             tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
6127                                                 s->mem_index, MO_LESW);
6128                             gen_helper_fildl_FT0(cpu_env, s->tmp2_i32);
6129                             break;
6130                         }
6131 
6132                         gen_helper_fp_arith_ST0_FT0(op1);
6133                         if (op1 == 3) {
6134                             /* fcomp needs pop */
6135                             gen_helper_fpop(cpu_env);
6136                         }
6137                     }
6138                     break;
6139                 case 0x08: /* flds */
6140                 case 0x0a: /* fsts */
6141                 case 0x0b: /* fstps */
6142                 case 0x18 ... 0x1b: /* fildl, fisttpl, fistl, fistpl */
6143                 case 0x28 ... 0x2b: /* fldl, fisttpll, fstl, fstpl */
6144                 case 0x38 ... 0x3b: /* filds, fisttps, fists, fistps */
6145                     switch (op & 7) {
6146                     case 0:
6147                         switch (op >> 4) {
6148                         case 0:
6149                             tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
6150                                                 s->mem_index, MO_LEUL);
6151                             gen_helper_flds_ST0(cpu_env, s->tmp2_i32);
6152                             break;
6153                         case 1:
6154                             tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
6155                                                 s->mem_index, MO_LEUL);
6156                             gen_helper_fildl_ST0(cpu_env, s->tmp2_i32);
6157                             break;
6158                         case 2:
6159                             tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
6160                                                 s->mem_index, MO_LEUQ);
6161                             gen_helper_fldl_ST0(cpu_env, s->tmp1_i64);
6162                             break;
6163                         case 3:
6164                         default:
6165                             tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
6166                                                 s->mem_index, MO_LESW);
6167                             gen_helper_fildl_ST0(cpu_env, s->tmp2_i32);
6168                             break;
6169                         }
6170                         break;
6171                     case 1:
6172                         /* XXX: the corresponding CPUID bit must be tested ! */
6173                         switch (op >> 4) {
6174                         case 1:
6175                             gen_helper_fisttl_ST0(s->tmp2_i32, cpu_env);
6176                             tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6177                                                 s->mem_index, MO_LEUL);
6178                             break;
6179                         case 2:
6180                             gen_helper_fisttll_ST0(s->tmp1_i64, cpu_env);
6181                             tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
6182                                                 s->mem_index, MO_LEUQ);
6183                             break;
6184                         case 3:
6185                         default:
6186                             gen_helper_fistt_ST0(s->tmp2_i32, cpu_env);
6187                             tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6188                                                 s->mem_index, MO_LEUW);
6189                             break;
6190                         }
6191                         gen_helper_fpop(cpu_env);
6192                         break;
6193                     default:
6194                         switch (op >> 4) {
6195                         case 0:
6196                             gen_helper_fsts_ST0(s->tmp2_i32, cpu_env);
6197                             tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6198                                                 s->mem_index, MO_LEUL);
6199                             break;
6200                         case 1:
6201                             gen_helper_fistl_ST0(s->tmp2_i32, cpu_env);
6202                             tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6203                                                 s->mem_index, MO_LEUL);
6204                             break;
6205                         case 2:
6206                             gen_helper_fstl_ST0(s->tmp1_i64, cpu_env);
6207                             tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
6208                                                 s->mem_index, MO_LEUQ);
6209                             break;
6210                         case 3:
6211                         default:
6212                             gen_helper_fist_ST0(s->tmp2_i32, cpu_env);
6213                             tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6214                                                 s->mem_index, MO_LEUW);
6215                             break;
6216                         }
6217                         if ((op & 7) == 3) {
6218                             gen_helper_fpop(cpu_env);
6219                         }
6220                         break;
6221                     }
6222                     break;
6223                 case 0x0c: /* fldenv mem */
6224                     gen_helper_fldenv(cpu_env, s->A0,
6225                                       tcg_const_i32(dflag - 1));
6226                     update_fip = update_fdp = false;
6227                     break;
6228                 case 0x0d: /* fldcw mem */
6229                     tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
6230                                         s->mem_index, MO_LEUW);
6231                     gen_helper_fldcw(cpu_env, s->tmp2_i32);
6232                     update_fip = update_fdp = false;
6233                     break;
6234                 case 0x0e: /* fnstenv mem */
6235                     gen_helper_fstenv(cpu_env, s->A0,
6236                                       tcg_const_i32(dflag - 1));
6237                     update_fip = update_fdp = false;
6238                     break;
6239                 case 0x0f: /* fnstcw mem */
6240                     gen_helper_fnstcw(s->tmp2_i32, cpu_env);
6241                     tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6242                                         s->mem_index, MO_LEUW);
6243                     update_fip = update_fdp = false;
6244                     break;
6245                 case 0x1d: /* fldt mem */
6246                     gen_helper_fldt_ST0(cpu_env, s->A0);
6247                     break;
6248                 case 0x1f: /* fstpt mem */
6249                     gen_helper_fstt_ST0(cpu_env, s->A0);
6250                     gen_helper_fpop(cpu_env);
6251                     break;
6252                 case 0x2c: /* frstor mem */
6253                     gen_helper_frstor(cpu_env, s->A0,
6254                                       tcg_const_i32(dflag - 1));
6255                     update_fip = update_fdp = false;
6256                     break;
6257                 case 0x2e: /* fnsave mem */
6258                     gen_helper_fsave(cpu_env, s->A0,
6259                                      tcg_const_i32(dflag - 1));
6260                     update_fip = update_fdp = false;
6261                     break;
6262                 case 0x2f: /* fnstsw mem */
6263                     gen_helper_fnstsw(s->tmp2_i32, cpu_env);
6264                     tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6265                                         s->mem_index, MO_LEUW);
6266                     update_fip = update_fdp = false;
6267                     break;
6268                 case 0x3c: /* fbld */
6269                     gen_helper_fbld_ST0(cpu_env, s->A0);
6270                     break;
6271                 case 0x3e: /* fbstp */
6272                     gen_helper_fbst_ST0(cpu_env, s->A0);
6273                     gen_helper_fpop(cpu_env);
6274                     break;
6275                 case 0x3d: /* fildll */
6276                     tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
6277                                         s->mem_index, MO_LEUQ);
6278                     gen_helper_fildll_ST0(cpu_env, s->tmp1_i64);
6279                     break;
6280                 case 0x3f: /* fistpll */
6281                     gen_helper_fistll_ST0(s->tmp1_i64, cpu_env);
6282                     tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
6283                                         s->mem_index, MO_LEUQ);
6284                     gen_helper_fpop(cpu_env);
6285                     break;
6286                 default:
6287                     goto unknown_op;
6288                 }
6289 
6290                 if (update_fdp) {
6291                     int last_seg = s->override >= 0 ? s->override : a.def_seg;
6292 
6293                     tcg_gen_ld_i32(s->tmp2_i32, cpu_env,
6294                                    offsetof(CPUX86State,
6295                                             segs[last_seg].selector));
6296                     tcg_gen_st16_i32(s->tmp2_i32, cpu_env,
6297                                      offsetof(CPUX86State, fpds));
6298                     tcg_gen_st_tl(last_addr, cpu_env,
6299                                   offsetof(CPUX86State, fpdp));
6300                 }
6301                 tcg_temp_free(last_addr);
6302             } else {
6303                 /* register float ops */
6304                 opreg = rm;
6305 
6306                 switch (op) {
6307                 case 0x08: /* fld sti */
6308                     gen_helper_fpush(cpu_env);
6309                     gen_helper_fmov_ST0_STN(cpu_env,
6310                                             tcg_const_i32((opreg + 1) & 7));
6311                     break;
6312                 case 0x09: /* fxchg sti */
6313                 case 0x29: /* fxchg4 sti, undocumented op */
6314                 case 0x39: /* fxchg7 sti, undocumented op */
6315                     gen_helper_fxchg_ST0_STN(cpu_env, tcg_const_i32(opreg));
6316                     break;
6317                 case 0x0a: /* grp d9/2 */
6318                     switch (rm) {
6319                     case 0: /* fnop */
6320                         /* check exceptions (FreeBSD FPU probe) */
6321                         gen_helper_fwait(cpu_env);
6322                         update_fip = false;
6323                         break;
6324                     default:
6325                         goto unknown_op;
6326                     }
6327                     break;
6328                 case 0x0c: /* grp d9/4 */
6329                     switch (rm) {
6330                     case 0: /* fchs */
6331                         gen_helper_fchs_ST0(cpu_env);
6332                         break;
6333                     case 1: /* fabs */
6334                         gen_helper_fabs_ST0(cpu_env);
6335                         break;
6336                     case 4: /* ftst */
6337                         gen_helper_fldz_FT0(cpu_env);
6338                         gen_helper_fcom_ST0_FT0(cpu_env);
6339                         break;
6340                     case 5: /* fxam */
6341                         gen_helper_fxam_ST0(cpu_env);
6342                         break;
6343                     default:
6344                         goto unknown_op;
6345                     }
6346                     break;
6347                 case 0x0d: /* grp d9/5 */
6348                     {
6349                         switch (rm) {
6350                         case 0:
6351                             gen_helper_fpush(cpu_env);
6352                             gen_helper_fld1_ST0(cpu_env);
6353                             break;
6354                         case 1:
6355                             gen_helper_fpush(cpu_env);
6356                             gen_helper_fldl2t_ST0(cpu_env);
6357                             break;
6358                         case 2:
6359                             gen_helper_fpush(cpu_env);
6360                             gen_helper_fldl2e_ST0(cpu_env);
6361                             break;
6362                         case 3:
6363                             gen_helper_fpush(cpu_env);
6364                             gen_helper_fldpi_ST0(cpu_env);
6365                             break;
6366                         case 4:
6367                             gen_helper_fpush(cpu_env);
6368                             gen_helper_fldlg2_ST0(cpu_env);
6369                             break;
6370                         case 5:
6371                             gen_helper_fpush(cpu_env);
6372                             gen_helper_fldln2_ST0(cpu_env);
6373                             break;
6374                         case 6:
6375                             gen_helper_fpush(cpu_env);
6376                             gen_helper_fldz_ST0(cpu_env);
6377                             break;
6378                         default:
6379                             goto unknown_op;
6380                         }
6381                     }
6382                     break;
6383                 case 0x0e: /* grp d9/6 */
6384                     switch (rm) {
6385                     case 0: /* f2xm1 */
6386                         gen_helper_f2xm1(cpu_env);
6387                         break;
6388                     case 1: /* fyl2x */
6389                         gen_helper_fyl2x(cpu_env);
6390                         break;
6391                     case 2: /* fptan */
6392                         gen_helper_fptan(cpu_env);
6393                         break;
6394                     case 3: /* fpatan */
6395                         gen_helper_fpatan(cpu_env);
6396                         break;
6397                     case 4: /* fxtract */
6398                         gen_helper_fxtract(cpu_env);
6399                         break;
6400                     case 5: /* fprem1 */
6401                         gen_helper_fprem1(cpu_env);
6402                         break;
6403                     case 6: /* fdecstp */
6404                         gen_helper_fdecstp(cpu_env);
6405                         break;
6406                     default:
6407                     case 7: /* fincstp */
6408                         gen_helper_fincstp(cpu_env);
6409                         break;
6410                     }
6411                     break;
6412                 case 0x0f: /* grp d9/7 */
6413                     switch (rm) {
6414                     case 0: /* fprem */
6415                         gen_helper_fprem(cpu_env);
6416                         break;
6417                     case 1: /* fyl2xp1 */
6418                         gen_helper_fyl2xp1(cpu_env);
6419                         break;
6420                     case 2: /* fsqrt */
6421                         gen_helper_fsqrt(cpu_env);
6422                         break;
6423                     case 3: /* fsincos */
6424                         gen_helper_fsincos(cpu_env);
6425                         break;
6426                     case 5: /* fscale */
6427                         gen_helper_fscale(cpu_env);
6428                         break;
6429                     case 4: /* frndint */
6430                         gen_helper_frndint(cpu_env);
6431                         break;
6432                     case 6: /* fsin */
6433                         gen_helper_fsin(cpu_env);
6434                         break;
6435                     default:
6436                     case 7: /* fcos */
6437                         gen_helper_fcos(cpu_env);
6438                         break;
6439                     }
6440                     break;
6441                 case 0x00: case 0x01: case 0x04 ... 0x07: /* fxxx st, sti */
6442                 case 0x20: case 0x21: case 0x24 ... 0x27: /* fxxx sti, st */
6443                 case 0x30: case 0x31: case 0x34 ... 0x37: /* fxxxp sti, st */
6444                     {
6445                         int op1;
6446 
6447                         op1 = op & 7;
6448                         if (op >= 0x20) {
6449                             gen_helper_fp_arith_STN_ST0(op1, opreg);
6450                             if (op >= 0x30) {
6451                                 gen_helper_fpop(cpu_env);
6452                             }
6453                         } else {
6454                             gen_helper_fmov_FT0_STN(cpu_env,
6455                                                     tcg_const_i32(opreg));
6456                             gen_helper_fp_arith_ST0_FT0(op1);
6457                         }
6458                     }
6459                     break;
6460                 case 0x02: /* fcom */
6461                 case 0x22: /* fcom2, undocumented op */
6462                     gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6463                     gen_helper_fcom_ST0_FT0(cpu_env);
6464                     break;
6465                 case 0x03: /* fcomp */
6466                 case 0x23: /* fcomp3, undocumented op */
6467                 case 0x32: /* fcomp5, undocumented op */
6468                     gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6469                     gen_helper_fcom_ST0_FT0(cpu_env);
6470                     gen_helper_fpop(cpu_env);
6471                     break;
6472                 case 0x15: /* da/5 */
6473                     switch (rm) {
6474                     case 1: /* fucompp */
6475                         gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(1));
6476                         gen_helper_fucom_ST0_FT0(cpu_env);
6477                         gen_helper_fpop(cpu_env);
6478                         gen_helper_fpop(cpu_env);
6479                         break;
6480                     default:
6481                         goto unknown_op;
6482                     }
6483                     break;
6484                 case 0x1c:
6485                     switch (rm) {
6486                     case 0: /* feni (287 only, just do nop here) */
6487                         break;
6488                     case 1: /* fdisi (287 only, just do nop here) */
6489                         break;
6490                     case 2: /* fclex */
6491                         gen_helper_fclex(cpu_env);
6492                         update_fip = false;
6493                         break;
6494                     case 3: /* fninit */
6495                         gen_helper_fninit(cpu_env);
6496                         update_fip = false;
6497                         break;
6498                     case 4: /* fsetpm (287 only, just do nop here) */
6499                         break;
6500                     default:
6501                         goto unknown_op;
6502                     }
6503                     break;
6504                 case 0x1d: /* fucomi */
6505                     if (!(s->cpuid_features & CPUID_CMOV)) {
6506                         goto illegal_op;
6507                     }
6508                     gen_update_cc_op(s);
6509                     gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6510                     gen_helper_fucomi_ST0_FT0(cpu_env);
6511                     set_cc_op(s, CC_OP_EFLAGS);
6512                     break;
6513                 case 0x1e: /* fcomi */
6514                     if (!(s->cpuid_features & CPUID_CMOV)) {
6515                         goto illegal_op;
6516                     }
6517                     gen_update_cc_op(s);
6518                     gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6519                     gen_helper_fcomi_ST0_FT0(cpu_env);
6520                     set_cc_op(s, CC_OP_EFLAGS);
6521                     break;
6522                 case 0x28: /* ffree sti */
6523                     gen_helper_ffree_STN(cpu_env, tcg_const_i32(opreg));
6524                     break;
6525                 case 0x2a: /* fst sti */
6526                     gen_helper_fmov_STN_ST0(cpu_env, tcg_const_i32(opreg));
6527                     break;
6528                 case 0x2b: /* fstp sti */
6529                 case 0x0b: /* fstp1 sti, undocumented op */
6530                 case 0x3a: /* fstp8 sti, undocumented op */
6531                 case 0x3b: /* fstp9 sti, undocumented op */
6532                     gen_helper_fmov_STN_ST0(cpu_env, tcg_const_i32(opreg));
6533                     gen_helper_fpop(cpu_env);
6534                     break;
6535                 case 0x2c: /* fucom st(i) */
6536                     gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6537                     gen_helper_fucom_ST0_FT0(cpu_env);
6538                     break;
6539                 case 0x2d: /* fucomp st(i) */
6540                     gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6541                     gen_helper_fucom_ST0_FT0(cpu_env);
6542                     gen_helper_fpop(cpu_env);
6543                     break;
6544                 case 0x33: /* de/3 */
6545                     switch (rm) {
6546                     case 1: /* fcompp */
6547                         gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(1));
6548                         gen_helper_fcom_ST0_FT0(cpu_env);
6549                         gen_helper_fpop(cpu_env);
6550                         gen_helper_fpop(cpu_env);
6551                         break;
6552                     default:
6553                         goto unknown_op;
6554                     }
6555                     break;
6556                 case 0x38: /* ffreep sti, undocumented op */
6557                     gen_helper_ffree_STN(cpu_env, tcg_const_i32(opreg));
6558                     gen_helper_fpop(cpu_env);
6559                     break;
6560                 case 0x3c: /* df/4 */
6561                     switch (rm) {
6562                     case 0:
6563                         gen_helper_fnstsw(s->tmp2_i32, cpu_env);
6564                         tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
6565                         gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
6566                         break;
6567                     default:
6568                         goto unknown_op;
6569                     }
6570                     break;
6571                 case 0x3d: /* fucomip */
6572                     if (!(s->cpuid_features & CPUID_CMOV)) {
6573                         goto illegal_op;
6574                     }
6575                     gen_update_cc_op(s);
6576                     gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6577                     gen_helper_fucomi_ST0_FT0(cpu_env);
6578                     gen_helper_fpop(cpu_env);
6579                     set_cc_op(s, CC_OP_EFLAGS);
6580                     break;
6581                 case 0x3e: /* fcomip */
6582                     if (!(s->cpuid_features & CPUID_CMOV)) {
6583                         goto illegal_op;
6584                     }
6585                     gen_update_cc_op(s);
6586                     gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6587                     gen_helper_fcomi_ST0_FT0(cpu_env);
6588                     gen_helper_fpop(cpu_env);
6589                     set_cc_op(s, CC_OP_EFLAGS);
6590                     break;
6591                 case 0x10 ... 0x13: /* fcmovxx */
6592                 case 0x18 ... 0x1b:
6593                     {
6594                         int op1;
6595                         TCGLabel *l1;
6596                         static const uint8_t fcmov_cc[8] = {
6597                             (JCC_B << 1),
6598                             (JCC_Z << 1),
6599                             (JCC_BE << 1),
6600                             (JCC_P << 1),
6601                         };
6602 
6603                         if (!(s->cpuid_features & CPUID_CMOV)) {
6604                             goto illegal_op;
6605                         }
6606                         op1 = fcmov_cc[op & 3] | (((op >> 3) & 1) ^ 1);
6607                         l1 = gen_new_label();
6608                         gen_jcc1_noeob(s, op1, l1);
6609                         gen_helper_fmov_ST0_STN(cpu_env, tcg_const_i32(opreg));
6610                         gen_set_label(l1);
6611                     }
6612                     break;
6613                 default:
6614                     goto unknown_op;
6615                 }
6616             }
6617 
6618             if (update_fip) {
6619                 tcg_gen_ld_i32(s->tmp2_i32, cpu_env,
6620                                offsetof(CPUX86State, segs[R_CS].selector));
6621                 tcg_gen_st16_i32(s->tmp2_i32, cpu_env,
6622                                  offsetof(CPUX86State, fpcs));
6623                 tcg_gen_st_tl(tcg_constant_tl(pc_start - s->cs_base),
6624                               cpu_env, offsetof(CPUX86State, fpip));
6625             }
6626         }
6627         break;
6628         /************************/
6629         /* string ops */
6630 
6631     case 0xa4: /* movsS */
6632     case 0xa5:
6633         ot = mo_b_d(b, dflag);
6634         if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6635             gen_repz_movs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6636         } else {
6637             gen_movs(s, ot);
6638         }
6639         break;
6640 
6641     case 0xaa: /* stosS */
6642     case 0xab:
6643         ot = mo_b_d(b, dflag);
6644         if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6645             gen_repz_stos(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6646         } else {
6647             gen_stos(s, ot);
6648         }
6649         break;
6650     case 0xac: /* lodsS */
6651     case 0xad:
6652         ot = mo_b_d(b, dflag);
6653         if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6654             gen_repz_lods(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6655         } else {
6656             gen_lods(s, ot);
6657         }
6658         break;
6659     case 0xae: /* scasS */
6660     case 0xaf:
6661         ot = mo_b_d(b, dflag);
6662         if (prefixes & PREFIX_REPNZ) {
6663             gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1);
6664         } else if (prefixes & PREFIX_REPZ) {
6665             gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0);
6666         } else {
6667             gen_scas(s, ot);
6668         }
6669         break;
6670 
6671     case 0xa6: /* cmpsS */
6672     case 0xa7:
6673         ot = mo_b_d(b, dflag);
6674         if (prefixes & PREFIX_REPNZ) {
6675             gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1);
6676         } else if (prefixes & PREFIX_REPZ) {
6677             gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0);
6678         } else {
6679             gen_cmps(s, ot);
6680         }
6681         break;
6682     case 0x6c: /* insS */
6683     case 0x6d:
6684         ot = mo_b_d32(b, dflag);
6685         tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
6686         tcg_gen_ext16u_i32(s->tmp2_i32, s->tmp2_i32);
6687         if (!gen_check_io(s, ot, s->tmp2_i32,
6688                           SVM_IOIO_TYPE_MASK | SVM_IOIO_STR_MASK)) {
6689             break;
6690         }
6691         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6692             gen_io_start();
6693         }
6694         if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6695             gen_repz_ins(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6696             /* jump generated by gen_repz_ins */
6697         } else {
6698             gen_ins(s, ot);
6699             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6700                 gen_jmp(s, s->pc - s->cs_base);
6701             }
6702         }
6703         break;
6704     case 0x6e: /* outsS */
6705     case 0x6f:
6706         ot = mo_b_d32(b, dflag);
6707         tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
6708         tcg_gen_ext16u_i32(s->tmp2_i32, s->tmp2_i32);
6709         if (!gen_check_io(s, ot, s->tmp2_i32, SVM_IOIO_STR_MASK)) {
6710             break;
6711         }
6712         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6713             gen_io_start();
6714         }
6715         if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6716             gen_repz_outs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6717             /* jump generated by gen_repz_outs */
6718         } else {
6719             gen_outs(s, ot);
6720             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6721                 gen_jmp(s, s->pc - s->cs_base);
6722             }
6723         }
6724         break;
6725 
6726         /************************/
6727         /* port I/O */
6728 
6729     case 0xe4:
6730     case 0xe5:
6731         ot = mo_b_d32(b, dflag);
6732         val = x86_ldub_code(env, s);
6733         tcg_gen_movi_i32(s->tmp2_i32, val);
6734         if (!gen_check_io(s, ot, s->tmp2_i32, SVM_IOIO_TYPE_MASK)) {
6735             break;
6736         }
6737         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6738             gen_io_start();
6739         }
6740         gen_helper_in_func(ot, s->T1, s->tmp2_i32);
6741         gen_op_mov_reg_v(s, ot, R_EAX, s->T1);
6742         gen_bpt_io(s, s->tmp2_i32, ot);
6743         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6744             gen_jmp(s, s->pc - s->cs_base);
6745         }
6746         break;
6747     case 0xe6:
6748     case 0xe7:
6749         ot = mo_b_d32(b, dflag);
6750         val = x86_ldub_code(env, s);
6751         tcg_gen_movi_i32(s->tmp2_i32, val);
6752         if (!gen_check_io(s, ot, s->tmp2_i32, 0)) {
6753             break;
6754         }
6755         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6756             gen_io_start();
6757         }
6758         gen_op_mov_v_reg(s, ot, s->T1, R_EAX);
6759         tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
6760         gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32);
6761         gen_bpt_io(s, s->tmp2_i32, ot);
6762         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6763             gen_jmp(s, s->pc - s->cs_base);
6764         }
6765         break;
6766     case 0xec:
6767     case 0xed:
6768         ot = mo_b_d32(b, dflag);
6769         tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
6770         tcg_gen_ext16u_i32(s->tmp2_i32, s->tmp2_i32);
6771         if (!gen_check_io(s, ot, s->tmp2_i32, SVM_IOIO_TYPE_MASK)) {
6772             break;
6773         }
6774         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6775             gen_io_start();
6776         }
6777         gen_helper_in_func(ot, s->T1, s->tmp2_i32);
6778         gen_op_mov_reg_v(s, ot, R_EAX, s->T1);
6779         gen_bpt_io(s, s->tmp2_i32, ot);
6780         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6781             gen_jmp(s, s->pc - s->cs_base);
6782         }
6783         break;
6784     case 0xee:
6785     case 0xef:
6786         ot = mo_b_d32(b, dflag);
6787         tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
6788         tcg_gen_ext16u_i32(s->tmp2_i32, s->tmp2_i32);
6789         if (!gen_check_io(s, ot, s->tmp2_i32, 0)) {
6790             break;
6791         }
6792         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6793             gen_io_start();
6794         }
6795         gen_op_mov_v_reg(s, ot, s->T1, R_EAX);
6796         tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
6797         gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32);
6798         gen_bpt_io(s, s->tmp2_i32, ot);
6799         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6800             gen_jmp(s, s->pc - s->cs_base);
6801         }
6802         break;
6803 
6804         /************************/
6805         /* control */
6806     case 0xc2: /* ret im */
6807         val = x86_ldsw_code(env, s);
6808         ot = gen_pop_T0(s);
6809         gen_stack_update(s, val + (1 << ot));
6810         /* Note that gen_pop_T0 uses a zero-extending load.  */
6811         gen_op_jmp_v(s->T0);
6812         gen_bnd_jmp(s);
6813         gen_jr(s, s->T0);
6814         break;
6815     case 0xc3: /* ret */
6816         ot = gen_pop_T0(s);
6817         gen_pop_update(s, ot);
6818         /* Note that gen_pop_T0 uses a zero-extending load.  */
6819         gen_op_jmp_v(s->T0);
6820         gen_bnd_jmp(s);
6821         gen_jr(s, s->T0);
6822         break;
6823     case 0xca: /* lret im */
6824         val = x86_ldsw_code(env, s);
6825     do_lret:
6826         if (PE(s) && !VM86(s)) {
6827             gen_update_cc_op(s);
6828             gen_jmp_im(s, pc_start - s->cs_base);
6829             gen_helper_lret_protected(cpu_env, tcg_const_i32(dflag - 1),
6830                                       tcg_const_i32(val));
6831         } else {
6832             gen_stack_A0(s);
6833             /* pop offset */
6834             gen_op_ld_v(s, dflag, s->T0, s->A0);
6835             /* NOTE: keeping EIP updated is not a problem in case of
6836                exception */
6837             gen_op_jmp_v(s->T0);
6838             /* pop selector */
6839             gen_add_A0_im(s, 1 << dflag);
6840             gen_op_ld_v(s, dflag, s->T0, s->A0);
6841             gen_op_movl_seg_T0_vm(s, R_CS);
6842             /* add stack offset */
6843             gen_stack_update(s, val + (2 << dflag));
6844         }
6845         gen_eob(s);
6846         break;
6847     case 0xcb: /* lret */
6848         val = 0;
6849         goto do_lret;
6850     case 0xcf: /* iret */
6851         gen_svm_check_intercept(s, SVM_EXIT_IRET);
6852         if (!PE(s) || VM86(s)) {
6853             /* real mode or vm86 mode */
6854             if (!check_vm86_iopl(s)) {
6855                 break;
6856             }
6857             gen_helper_iret_real(cpu_env, tcg_const_i32(dflag - 1));
6858         } else {
6859             gen_helper_iret_protected(cpu_env, tcg_const_i32(dflag - 1),
6860                                       tcg_const_i32(s->pc - s->cs_base));
6861         }
6862         set_cc_op(s, CC_OP_EFLAGS);
6863         gen_eob(s);
6864         break;
6865     case 0xe8: /* call im */
6866         {
6867             if (dflag != MO_16) {
6868                 tval = (int32_t)insn_get(env, s, MO_32);
6869             } else {
6870                 tval = (int16_t)insn_get(env, s, MO_16);
6871             }
6872             next_eip = s->pc - s->cs_base;
6873             tval += next_eip;
6874             if (dflag == MO_16) {
6875                 tval &= 0xffff;
6876             } else if (!CODE64(s)) {
6877                 tval &= 0xffffffff;
6878             }
6879             tcg_gen_movi_tl(s->T0, next_eip);
6880             gen_push_v(s, s->T0);
6881             gen_bnd_jmp(s);
6882             gen_jmp(s, tval);
6883         }
6884         break;
6885     case 0x9a: /* lcall im */
6886         {
6887             unsigned int selector, offset;
6888 
6889             if (CODE64(s))
6890                 goto illegal_op;
6891             ot = dflag;
6892             offset = insn_get(env, s, ot);
6893             selector = insn_get(env, s, MO_16);
6894 
6895             tcg_gen_movi_tl(s->T0, selector);
6896             tcg_gen_movi_tl(s->T1, offset);
6897         }
6898         goto do_lcall;
6899     case 0xe9: /* jmp im */
6900         if (dflag != MO_16) {
6901             tval = (int32_t)insn_get(env, s, MO_32);
6902         } else {
6903             tval = (int16_t)insn_get(env, s, MO_16);
6904         }
6905         tval += s->pc - s->cs_base;
6906         if (dflag == MO_16) {
6907             tval &= 0xffff;
6908         } else if (!CODE64(s)) {
6909             tval &= 0xffffffff;
6910         }
6911         gen_bnd_jmp(s);
6912         gen_jmp(s, tval);
6913         break;
6914     case 0xea: /* ljmp im */
6915         {
6916             unsigned int selector, offset;
6917 
6918             if (CODE64(s))
6919                 goto illegal_op;
6920             ot = dflag;
6921             offset = insn_get(env, s, ot);
6922             selector = insn_get(env, s, MO_16);
6923 
6924             tcg_gen_movi_tl(s->T0, selector);
6925             tcg_gen_movi_tl(s->T1, offset);
6926         }
6927         goto do_ljmp;
6928     case 0xeb: /* jmp Jb */
6929         tval = (int8_t)insn_get(env, s, MO_8);
6930         tval += s->pc - s->cs_base;
6931         if (dflag == MO_16) {
6932             tval &= 0xffff;
6933         }
6934         gen_jmp(s, tval);
6935         break;
6936     case 0x70 ... 0x7f: /* jcc Jb */
6937         tval = (int8_t)insn_get(env, s, MO_8);
6938         goto do_jcc;
6939     case 0x180 ... 0x18f: /* jcc Jv */
6940         if (dflag != MO_16) {
6941             tval = (int32_t)insn_get(env, s, MO_32);
6942         } else {
6943             tval = (int16_t)insn_get(env, s, MO_16);
6944         }
6945     do_jcc:
6946         next_eip = s->pc - s->cs_base;
6947         tval += next_eip;
6948         if (dflag == MO_16) {
6949             tval &= 0xffff;
6950         }
6951         gen_bnd_jmp(s);
6952         gen_jcc(s, b, tval, next_eip);
6953         break;
6954 
6955     case 0x190 ... 0x19f: /* setcc Gv */
6956         modrm = x86_ldub_code(env, s);
6957         gen_setcc1(s, b, s->T0);
6958         gen_ldst_modrm(env, s, modrm, MO_8, OR_TMP0, 1);
6959         break;
6960     case 0x140 ... 0x14f: /* cmov Gv, Ev */
6961         if (!(s->cpuid_features & CPUID_CMOV)) {
6962             goto illegal_op;
6963         }
6964         ot = dflag;
6965         modrm = x86_ldub_code(env, s);
6966         reg = ((modrm >> 3) & 7) | REX_R(s);
6967         gen_cmovcc1(env, s, ot, b, modrm, reg);
6968         break;
6969 
6970         /************************/
6971         /* flags */
6972     case 0x9c: /* pushf */
6973         gen_svm_check_intercept(s, SVM_EXIT_PUSHF);
6974         if (check_vm86_iopl(s)) {
6975             gen_update_cc_op(s);
6976             gen_helper_read_eflags(s->T0, cpu_env);
6977             gen_push_v(s, s->T0);
6978         }
6979         break;
6980     case 0x9d: /* popf */
6981         gen_svm_check_intercept(s, SVM_EXIT_POPF);
6982         if (check_vm86_iopl(s)) {
6983             ot = gen_pop_T0(s);
6984             if (CPL(s) == 0) {
6985                 if (dflag != MO_16) {
6986                     gen_helper_write_eflags(cpu_env, s->T0,
6987                                             tcg_const_i32((TF_MASK | AC_MASK |
6988                                                            ID_MASK | NT_MASK |
6989                                                            IF_MASK |
6990                                                            IOPL_MASK)));
6991                 } else {
6992                     gen_helper_write_eflags(cpu_env, s->T0,
6993                                             tcg_const_i32((TF_MASK | AC_MASK |
6994                                                            ID_MASK | NT_MASK |
6995                                                            IF_MASK | IOPL_MASK)
6996                                                           & 0xffff));
6997                 }
6998             } else {
6999                 if (CPL(s) <= IOPL(s)) {
7000                     if (dflag != MO_16) {
7001                         gen_helper_write_eflags(cpu_env, s->T0,
7002                                                 tcg_const_i32((TF_MASK |
7003                                                                AC_MASK |
7004                                                                ID_MASK |
7005                                                                NT_MASK |
7006                                                                IF_MASK)));
7007                     } else {
7008                         gen_helper_write_eflags(cpu_env, s->T0,
7009                                                 tcg_const_i32((TF_MASK |
7010                                                                AC_MASK |
7011                                                                ID_MASK |
7012                                                                NT_MASK |
7013                                                                IF_MASK)
7014                                                               & 0xffff));
7015                     }
7016                 } else {
7017                     if (dflag != MO_16) {
7018                         gen_helper_write_eflags(cpu_env, s->T0,
7019                                            tcg_const_i32((TF_MASK | AC_MASK |
7020                                                           ID_MASK | NT_MASK)));
7021                     } else {
7022                         gen_helper_write_eflags(cpu_env, s->T0,
7023                                            tcg_const_i32((TF_MASK | AC_MASK |
7024                                                           ID_MASK | NT_MASK)
7025                                                          & 0xffff));
7026                     }
7027                 }
7028             }
7029             gen_pop_update(s, ot);
7030             set_cc_op(s, CC_OP_EFLAGS);
7031             /* abort translation because TF/AC flag may change */
7032             gen_jmp_im(s, s->pc - s->cs_base);
7033             gen_eob(s);
7034         }
7035         break;
7036     case 0x9e: /* sahf */
7037         if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
7038             goto illegal_op;
7039         gen_op_mov_v_reg(s, MO_8, s->T0, R_AH);
7040         gen_compute_eflags(s);
7041         tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, CC_O);
7042         tcg_gen_andi_tl(s->T0, s->T0, CC_S | CC_Z | CC_A | CC_P | CC_C);
7043         tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, s->T0);
7044         break;
7045     case 0x9f: /* lahf */
7046         if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
7047             goto illegal_op;
7048         gen_compute_eflags(s);
7049         /* Note: gen_compute_eflags() only gives the condition codes */
7050         tcg_gen_ori_tl(s->T0, cpu_cc_src, 0x02);
7051         gen_op_mov_reg_v(s, MO_8, R_AH, s->T0);
7052         break;
7053     case 0xf5: /* cmc */
7054         gen_compute_eflags(s);
7055         tcg_gen_xori_tl(cpu_cc_src, cpu_cc_src, CC_C);
7056         break;
7057     case 0xf8: /* clc */
7058         gen_compute_eflags(s);
7059         tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_C);
7060         break;
7061     case 0xf9: /* stc */
7062         gen_compute_eflags(s);
7063         tcg_gen_ori_tl(cpu_cc_src, cpu_cc_src, CC_C);
7064         break;
7065     case 0xfc: /* cld */
7066         tcg_gen_movi_i32(s->tmp2_i32, 1);
7067         tcg_gen_st_i32(s->tmp2_i32, cpu_env, offsetof(CPUX86State, df));
7068         break;
7069     case 0xfd: /* std */
7070         tcg_gen_movi_i32(s->tmp2_i32, -1);
7071         tcg_gen_st_i32(s->tmp2_i32, cpu_env, offsetof(CPUX86State, df));
7072         break;
7073 
7074         /************************/
7075         /* bit operations */
7076     case 0x1ba: /* bt/bts/btr/btc Gv, im */
7077         ot = dflag;
7078         modrm = x86_ldub_code(env, s);
7079         op = (modrm >> 3) & 7;
7080         mod = (modrm >> 6) & 3;
7081         rm = (modrm & 7) | REX_B(s);
7082         if (mod != 3) {
7083             s->rip_offset = 1;
7084             gen_lea_modrm(env, s, modrm);
7085             if (!(s->prefix & PREFIX_LOCK)) {
7086                 gen_op_ld_v(s, ot, s->T0, s->A0);
7087             }
7088         } else {
7089             gen_op_mov_v_reg(s, ot, s->T0, rm);
7090         }
7091         /* load shift */
7092         val = x86_ldub_code(env, s);
7093         tcg_gen_movi_tl(s->T1, val);
7094         if (op < 4)
7095             goto unknown_op;
7096         op -= 4;
7097         goto bt_op;
7098     case 0x1a3: /* bt Gv, Ev */
7099         op = 0;
7100         goto do_btx;
7101     case 0x1ab: /* bts */
7102         op = 1;
7103         goto do_btx;
7104     case 0x1b3: /* btr */
7105         op = 2;
7106         goto do_btx;
7107     case 0x1bb: /* btc */
7108         op = 3;
7109     do_btx:
7110         ot = dflag;
7111         modrm = x86_ldub_code(env, s);
7112         reg = ((modrm >> 3) & 7) | REX_R(s);
7113         mod = (modrm >> 6) & 3;
7114         rm = (modrm & 7) | REX_B(s);
7115         gen_op_mov_v_reg(s, MO_32, s->T1, reg);
7116         if (mod != 3) {
7117             AddressParts a = gen_lea_modrm_0(env, s, modrm);
7118             /* specific case: we need to add a displacement */
7119             gen_exts(ot, s->T1);
7120             tcg_gen_sari_tl(s->tmp0, s->T1, 3 + ot);
7121             tcg_gen_shli_tl(s->tmp0, s->tmp0, ot);
7122             tcg_gen_add_tl(s->A0, gen_lea_modrm_1(s, a), s->tmp0);
7123             gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
7124             if (!(s->prefix & PREFIX_LOCK)) {
7125                 gen_op_ld_v(s, ot, s->T0, s->A0);
7126             }
7127         } else {
7128             gen_op_mov_v_reg(s, ot, s->T0, rm);
7129         }
7130     bt_op:
7131         tcg_gen_andi_tl(s->T1, s->T1, (1 << (3 + ot)) - 1);
7132         tcg_gen_movi_tl(s->tmp0, 1);
7133         tcg_gen_shl_tl(s->tmp0, s->tmp0, s->T1);
7134         if (s->prefix & PREFIX_LOCK) {
7135             switch (op) {
7136             case 0: /* bt */
7137                 /* Needs no atomic ops; we surpressed the normal
7138                    memory load for LOCK above so do it now.  */
7139                 gen_op_ld_v(s, ot, s->T0, s->A0);
7140                 break;
7141             case 1: /* bts */
7142                 tcg_gen_atomic_fetch_or_tl(s->T0, s->A0, s->tmp0,
7143                                            s->mem_index, ot | MO_LE);
7144                 break;
7145             case 2: /* btr */
7146                 tcg_gen_not_tl(s->tmp0, s->tmp0);
7147                 tcg_gen_atomic_fetch_and_tl(s->T0, s->A0, s->tmp0,
7148                                             s->mem_index, ot | MO_LE);
7149                 break;
7150             default:
7151             case 3: /* btc */
7152                 tcg_gen_atomic_fetch_xor_tl(s->T0, s->A0, s->tmp0,
7153                                             s->mem_index, ot | MO_LE);
7154                 break;
7155             }
7156             tcg_gen_shr_tl(s->tmp4, s->T0, s->T1);
7157         } else {
7158             tcg_gen_shr_tl(s->tmp4, s->T0, s->T1);
7159             switch (op) {
7160             case 0: /* bt */
7161                 /* Data already loaded; nothing to do.  */
7162                 break;
7163             case 1: /* bts */
7164                 tcg_gen_or_tl(s->T0, s->T0, s->tmp0);
7165                 break;
7166             case 2: /* btr */
7167                 tcg_gen_andc_tl(s->T0, s->T0, s->tmp0);
7168                 break;
7169             default:
7170             case 3: /* btc */
7171                 tcg_gen_xor_tl(s->T0, s->T0, s->tmp0);
7172                 break;
7173             }
7174             if (op != 0) {
7175                 if (mod != 3) {
7176                     gen_op_st_v(s, ot, s->T0, s->A0);
7177                 } else {
7178                     gen_op_mov_reg_v(s, ot, rm, s->T0);
7179                 }
7180             }
7181         }
7182 
7183         /* Delay all CC updates until after the store above.  Note that
7184            C is the result of the test, Z is unchanged, and the others
7185            are all undefined.  */
7186         switch (s->cc_op) {
7187         case CC_OP_MULB ... CC_OP_MULQ:
7188         case CC_OP_ADDB ... CC_OP_ADDQ:
7189         case CC_OP_ADCB ... CC_OP_ADCQ:
7190         case CC_OP_SUBB ... CC_OP_SUBQ:
7191         case CC_OP_SBBB ... CC_OP_SBBQ:
7192         case CC_OP_LOGICB ... CC_OP_LOGICQ:
7193         case CC_OP_INCB ... CC_OP_INCQ:
7194         case CC_OP_DECB ... CC_OP_DECQ:
7195         case CC_OP_SHLB ... CC_OP_SHLQ:
7196         case CC_OP_SARB ... CC_OP_SARQ:
7197         case CC_OP_BMILGB ... CC_OP_BMILGQ:
7198             /* Z was going to be computed from the non-zero status of CC_DST.
7199                We can get that same Z value (and the new C value) by leaving
7200                CC_DST alone, setting CC_SRC, and using a CC_OP_SAR of the
7201                same width.  */
7202             tcg_gen_mov_tl(cpu_cc_src, s->tmp4);
7203             set_cc_op(s, ((s->cc_op - CC_OP_MULB) & 3) + CC_OP_SARB);
7204             break;
7205         default:
7206             /* Otherwise, generate EFLAGS and replace the C bit.  */
7207             gen_compute_eflags(s);
7208             tcg_gen_deposit_tl(cpu_cc_src, cpu_cc_src, s->tmp4,
7209                                ctz32(CC_C), 1);
7210             break;
7211         }
7212         break;
7213     case 0x1bc: /* bsf / tzcnt */
7214     case 0x1bd: /* bsr / lzcnt */
7215         ot = dflag;
7216         modrm = x86_ldub_code(env, s);
7217         reg = ((modrm >> 3) & 7) | REX_R(s);
7218         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
7219         gen_extu(ot, s->T0);
7220 
7221         /* Note that lzcnt and tzcnt are in different extensions.  */
7222         if ((prefixes & PREFIX_REPZ)
7223             && (b & 1
7224                 ? s->cpuid_ext3_features & CPUID_EXT3_ABM
7225                 : s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)) {
7226             int size = 8 << ot;
7227             /* For lzcnt/tzcnt, C bit is defined related to the input. */
7228             tcg_gen_mov_tl(cpu_cc_src, s->T0);
7229             if (b & 1) {
7230                 /* For lzcnt, reduce the target_ulong result by the
7231                    number of zeros that we expect to find at the top.  */
7232                 tcg_gen_clzi_tl(s->T0, s->T0, TARGET_LONG_BITS);
7233                 tcg_gen_subi_tl(s->T0, s->T0, TARGET_LONG_BITS - size);
7234             } else {
7235                 /* For tzcnt, a zero input must return the operand size.  */
7236                 tcg_gen_ctzi_tl(s->T0, s->T0, size);
7237             }
7238             /* For lzcnt/tzcnt, Z bit is defined related to the result.  */
7239             gen_op_update1_cc(s);
7240             set_cc_op(s, CC_OP_BMILGB + ot);
7241         } else {
7242             /* For bsr/bsf, only the Z bit is defined and it is related
7243                to the input and not the result.  */
7244             tcg_gen_mov_tl(cpu_cc_dst, s->T0);
7245             set_cc_op(s, CC_OP_LOGICB + ot);
7246 
7247             /* ??? The manual says that the output is undefined when the
7248                input is zero, but real hardware leaves it unchanged, and
7249                real programs appear to depend on that.  Accomplish this
7250                by passing the output as the value to return upon zero.  */
7251             if (b & 1) {
7252                 /* For bsr, return the bit index of the first 1 bit,
7253                    not the count of leading zeros.  */
7254                 tcg_gen_xori_tl(s->T1, cpu_regs[reg], TARGET_LONG_BITS - 1);
7255                 tcg_gen_clz_tl(s->T0, s->T0, s->T1);
7256                 tcg_gen_xori_tl(s->T0, s->T0, TARGET_LONG_BITS - 1);
7257             } else {
7258                 tcg_gen_ctz_tl(s->T0, s->T0, cpu_regs[reg]);
7259             }
7260         }
7261         gen_op_mov_reg_v(s, ot, reg, s->T0);
7262         break;
7263         /************************/
7264         /* bcd */
7265     case 0x27: /* daa */
7266         if (CODE64(s))
7267             goto illegal_op;
7268         gen_update_cc_op(s);
7269         gen_helper_daa(cpu_env);
7270         set_cc_op(s, CC_OP_EFLAGS);
7271         break;
7272     case 0x2f: /* das */
7273         if (CODE64(s))
7274             goto illegal_op;
7275         gen_update_cc_op(s);
7276         gen_helper_das(cpu_env);
7277         set_cc_op(s, CC_OP_EFLAGS);
7278         break;
7279     case 0x37: /* aaa */
7280         if (CODE64(s))
7281             goto illegal_op;
7282         gen_update_cc_op(s);
7283         gen_helper_aaa(cpu_env);
7284         set_cc_op(s, CC_OP_EFLAGS);
7285         break;
7286     case 0x3f: /* aas */
7287         if (CODE64(s))
7288             goto illegal_op;
7289         gen_update_cc_op(s);
7290         gen_helper_aas(cpu_env);
7291         set_cc_op(s, CC_OP_EFLAGS);
7292         break;
7293     case 0xd4: /* aam */
7294         if (CODE64(s))
7295             goto illegal_op;
7296         val = x86_ldub_code(env, s);
7297         if (val == 0) {
7298             gen_exception(s, EXCP00_DIVZ, pc_start - s->cs_base);
7299         } else {
7300             gen_helper_aam(cpu_env, tcg_const_i32(val));
7301             set_cc_op(s, CC_OP_LOGICB);
7302         }
7303         break;
7304     case 0xd5: /* aad */
7305         if (CODE64(s))
7306             goto illegal_op;
7307         val = x86_ldub_code(env, s);
7308         gen_helper_aad(cpu_env, tcg_const_i32(val));
7309         set_cc_op(s, CC_OP_LOGICB);
7310         break;
7311         /************************/
7312         /* misc */
7313     case 0x90: /* nop */
7314         /* XXX: correct lock test for all insn */
7315         if (prefixes & PREFIX_LOCK) {
7316             goto illegal_op;
7317         }
7318         /* If REX_B is set, then this is xchg eax, r8d, not a nop.  */
7319         if (REX_B(s)) {
7320             goto do_xchg_reg_eax;
7321         }
7322         if (prefixes & PREFIX_REPZ) {
7323             gen_update_cc_op(s);
7324             gen_jmp_im(s, pc_start - s->cs_base);
7325             gen_helper_pause(cpu_env, tcg_const_i32(s->pc - pc_start));
7326             s->base.is_jmp = DISAS_NORETURN;
7327         }
7328         break;
7329     case 0x9b: /* fwait */
7330         if ((s->flags & (HF_MP_MASK | HF_TS_MASK)) ==
7331             (HF_MP_MASK | HF_TS_MASK)) {
7332             gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
7333         } else {
7334             gen_helper_fwait(cpu_env);
7335         }
7336         break;
7337     case 0xcc: /* int3 */
7338         gen_interrupt(s, EXCP03_INT3, pc_start - s->cs_base, s->pc - s->cs_base);
7339         break;
7340     case 0xcd: /* int N */
7341         val = x86_ldub_code(env, s);
7342         if (check_vm86_iopl(s)) {
7343             gen_interrupt(s, val, pc_start - s->cs_base, s->pc - s->cs_base);
7344         }
7345         break;
7346     case 0xce: /* into */
7347         if (CODE64(s))
7348             goto illegal_op;
7349         gen_update_cc_op(s);
7350         gen_jmp_im(s, pc_start - s->cs_base);
7351         gen_helper_into(cpu_env, tcg_const_i32(s->pc - pc_start));
7352         break;
7353 #ifdef WANT_ICEBP
7354     case 0xf1: /* icebp (undocumented, exits to external debugger) */
7355         gen_svm_check_intercept(s, SVM_EXIT_ICEBP);
7356         gen_debug(s);
7357         break;
7358 #endif
7359     case 0xfa: /* cli */
7360         if (check_iopl(s)) {
7361             gen_helper_cli(cpu_env);
7362         }
7363         break;
7364     case 0xfb: /* sti */
7365         if (check_iopl(s)) {
7366             gen_helper_sti(cpu_env);
7367             /* interruptions are enabled only the first insn after sti */
7368             gen_jmp_im(s, s->pc - s->cs_base);
7369             gen_eob_inhibit_irq(s, true);
7370         }
7371         break;
7372     case 0x62: /* bound */
7373         if (CODE64(s))
7374             goto illegal_op;
7375         ot = dflag;
7376         modrm = x86_ldub_code(env, s);
7377         reg = (modrm >> 3) & 7;
7378         mod = (modrm >> 6) & 3;
7379         if (mod == 3)
7380             goto illegal_op;
7381         gen_op_mov_v_reg(s, ot, s->T0, reg);
7382         gen_lea_modrm(env, s, modrm);
7383         tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
7384         if (ot == MO_16) {
7385             gen_helper_boundw(cpu_env, s->A0, s->tmp2_i32);
7386         } else {
7387             gen_helper_boundl(cpu_env, s->A0, s->tmp2_i32);
7388         }
7389         break;
7390     case 0x1c8 ... 0x1cf: /* bswap reg */
7391         reg = (b & 7) | REX_B(s);
7392 #ifdef TARGET_X86_64
7393         if (dflag == MO_64) {
7394             tcg_gen_bswap64_i64(cpu_regs[reg], cpu_regs[reg]);
7395             break;
7396         }
7397 #endif
7398         tcg_gen_bswap32_tl(cpu_regs[reg], cpu_regs[reg], TCG_BSWAP_OZ);
7399         break;
7400     case 0xd6: /* salc */
7401         if (CODE64(s))
7402             goto illegal_op;
7403         gen_compute_eflags_c(s, s->T0);
7404         tcg_gen_neg_tl(s->T0, s->T0);
7405         gen_op_mov_reg_v(s, MO_8, R_EAX, s->T0);
7406         break;
7407     case 0xe0: /* loopnz */
7408     case 0xe1: /* loopz */
7409     case 0xe2: /* loop */
7410     case 0xe3: /* jecxz */
7411         {
7412             TCGLabel *l1, *l2, *l3;
7413 
7414             tval = (int8_t)insn_get(env, s, MO_8);
7415             next_eip = s->pc - s->cs_base;
7416             tval += next_eip;
7417             if (dflag == MO_16) {
7418                 tval &= 0xffff;
7419             }
7420 
7421             l1 = gen_new_label();
7422             l2 = gen_new_label();
7423             l3 = gen_new_label();
7424             gen_update_cc_op(s);
7425             b &= 3;
7426             switch(b) {
7427             case 0: /* loopnz */
7428             case 1: /* loopz */
7429                 gen_op_add_reg_im(s, s->aflag, R_ECX, -1);
7430                 gen_op_jz_ecx(s, s->aflag, l3);
7431                 gen_jcc1(s, (JCC_Z << 1) | (b ^ 1), l1);
7432                 break;
7433             case 2: /* loop */
7434                 gen_op_add_reg_im(s, s->aflag, R_ECX, -1);
7435                 gen_op_jnz_ecx(s, s->aflag, l1);
7436                 break;
7437             default:
7438             case 3: /* jcxz */
7439                 gen_op_jz_ecx(s, s->aflag, l1);
7440                 break;
7441             }
7442 
7443             gen_set_label(l3);
7444             gen_jmp_im(s, next_eip);
7445             tcg_gen_br(l2);
7446 
7447             gen_set_label(l1);
7448             gen_jmp_im(s, tval);
7449             gen_set_label(l2);
7450             gen_eob(s);
7451         }
7452         break;
7453     case 0x130: /* wrmsr */
7454     case 0x132: /* rdmsr */
7455         if (check_cpl0(s)) {
7456             gen_update_cc_op(s);
7457             gen_jmp_im(s, pc_start - s->cs_base);
7458             if (b & 2) {
7459                 gen_helper_rdmsr(cpu_env);
7460             } else {
7461                 gen_helper_wrmsr(cpu_env);
7462                 gen_jmp_im(s, s->pc - s->cs_base);
7463                 gen_eob(s);
7464             }
7465         }
7466         break;
7467     case 0x131: /* rdtsc */
7468         gen_update_cc_op(s);
7469         gen_jmp_im(s, pc_start - s->cs_base);
7470         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7471             gen_io_start();
7472         }
7473         gen_helper_rdtsc(cpu_env);
7474         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7475             gen_jmp(s, s->pc - s->cs_base);
7476         }
7477         break;
7478     case 0x133: /* rdpmc */
7479         gen_update_cc_op(s);
7480         gen_jmp_im(s, pc_start - s->cs_base);
7481         gen_helper_rdpmc(cpu_env);
7482         s->base.is_jmp = DISAS_NORETURN;
7483         break;
7484     case 0x134: /* sysenter */
7485         /* For Intel SYSENTER is valid on 64-bit */
7486         if (CODE64(s) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1)
7487             goto illegal_op;
7488         if (!PE(s)) {
7489             gen_exception_gpf(s);
7490         } else {
7491             gen_helper_sysenter(cpu_env);
7492             gen_eob(s);
7493         }
7494         break;
7495     case 0x135: /* sysexit */
7496         /* For Intel SYSEXIT is valid on 64-bit */
7497         if (CODE64(s) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1)
7498             goto illegal_op;
7499         if (!PE(s)) {
7500             gen_exception_gpf(s);
7501         } else {
7502             gen_helper_sysexit(cpu_env, tcg_const_i32(dflag - 1));
7503             gen_eob(s);
7504         }
7505         break;
7506 #ifdef TARGET_X86_64
7507     case 0x105: /* syscall */
7508         /* XXX: is it usable in real mode ? */
7509         gen_update_cc_op(s);
7510         gen_jmp_im(s, pc_start - s->cs_base);
7511         gen_helper_syscall(cpu_env, tcg_const_i32(s->pc - pc_start));
7512         /* TF handling for the syscall insn is different. The TF bit is  checked
7513            after the syscall insn completes. This allows #DB to not be
7514            generated after one has entered CPL0 if TF is set in FMASK.  */
7515         gen_eob_worker(s, false, true);
7516         break;
7517     case 0x107: /* sysret */
7518         if (!PE(s)) {
7519             gen_exception_gpf(s);
7520         } else {
7521             gen_helper_sysret(cpu_env, tcg_const_i32(dflag - 1));
7522             /* condition codes are modified only in long mode */
7523             if (LMA(s)) {
7524                 set_cc_op(s, CC_OP_EFLAGS);
7525             }
7526             /* TF handling for the sysret insn is different. The TF bit is
7527                checked after the sysret insn completes. This allows #DB to be
7528                generated "as if" the syscall insn in userspace has just
7529                completed.  */
7530             gen_eob_worker(s, false, true);
7531         }
7532         break;
7533 #endif
7534     case 0x1a2: /* cpuid */
7535         gen_update_cc_op(s);
7536         gen_jmp_im(s, pc_start - s->cs_base);
7537         gen_helper_cpuid(cpu_env);
7538         break;
7539     case 0xf4: /* hlt */
7540         if (check_cpl0(s)) {
7541             gen_update_cc_op(s);
7542             gen_jmp_im(s, pc_start - s->cs_base);
7543             gen_helper_hlt(cpu_env, tcg_const_i32(s->pc - pc_start));
7544             s->base.is_jmp = DISAS_NORETURN;
7545         }
7546         break;
7547     case 0x100:
7548         modrm = x86_ldub_code(env, s);
7549         mod = (modrm >> 6) & 3;
7550         op = (modrm >> 3) & 7;
7551         switch(op) {
7552         case 0: /* sldt */
7553             if (!PE(s) || VM86(s))
7554                 goto illegal_op;
7555             if (s->flags & HF_UMIP_MASK && !check_cpl0(s)) {
7556                 break;
7557             }
7558             gen_svm_check_intercept(s, SVM_EXIT_LDTR_READ);
7559             tcg_gen_ld32u_tl(s->T0, cpu_env,
7560                              offsetof(CPUX86State, ldt.selector));
7561             ot = mod == 3 ? dflag : MO_16;
7562             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7563             break;
7564         case 2: /* lldt */
7565             if (!PE(s) || VM86(s))
7566                 goto illegal_op;
7567             if (check_cpl0(s)) {
7568                 gen_svm_check_intercept(s, SVM_EXIT_LDTR_WRITE);
7569                 gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7570                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
7571                 gen_helper_lldt(cpu_env, s->tmp2_i32);
7572             }
7573             break;
7574         case 1: /* str */
7575             if (!PE(s) || VM86(s))
7576                 goto illegal_op;
7577             if (s->flags & HF_UMIP_MASK && !check_cpl0(s)) {
7578                 break;
7579             }
7580             gen_svm_check_intercept(s, SVM_EXIT_TR_READ);
7581             tcg_gen_ld32u_tl(s->T0, cpu_env,
7582                              offsetof(CPUX86State, tr.selector));
7583             ot = mod == 3 ? dflag : MO_16;
7584             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7585             break;
7586         case 3: /* ltr */
7587             if (!PE(s) || VM86(s))
7588                 goto illegal_op;
7589             if (check_cpl0(s)) {
7590                 gen_svm_check_intercept(s, SVM_EXIT_TR_WRITE);
7591                 gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7592                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
7593                 gen_helper_ltr(cpu_env, s->tmp2_i32);
7594             }
7595             break;
7596         case 4: /* verr */
7597         case 5: /* verw */
7598             if (!PE(s) || VM86(s))
7599                 goto illegal_op;
7600             gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7601             gen_update_cc_op(s);
7602             if (op == 4) {
7603                 gen_helper_verr(cpu_env, s->T0);
7604             } else {
7605                 gen_helper_verw(cpu_env, s->T0);
7606             }
7607             set_cc_op(s, CC_OP_EFLAGS);
7608             break;
7609         default:
7610             goto unknown_op;
7611         }
7612         break;
7613 
7614     case 0x101:
7615         modrm = x86_ldub_code(env, s);
7616         switch (modrm) {
7617         CASE_MODRM_MEM_OP(0): /* sgdt */
7618             if (s->flags & HF_UMIP_MASK && !check_cpl0(s)) {
7619                 break;
7620             }
7621             gen_svm_check_intercept(s, SVM_EXIT_GDTR_READ);
7622             gen_lea_modrm(env, s, modrm);
7623             tcg_gen_ld32u_tl(s->T0,
7624                              cpu_env, offsetof(CPUX86State, gdt.limit));
7625             gen_op_st_v(s, MO_16, s->T0, s->A0);
7626             gen_add_A0_im(s, 2);
7627             tcg_gen_ld_tl(s->T0, cpu_env, offsetof(CPUX86State, gdt.base));
7628             if (dflag == MO_16) {
7629                 tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
7630             }
7631             gen_op_st_v(s, CODE64(s) + MO_32, s->T0, s->A0);
7632             break;
7633 
7634         case 0xc8: /* monitor */
7635             if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) || CPL(s) != 0) {
7636                 goto illegal_op;
7637             }
7638             gen_update_cc_op(s);
7639             gen_jmp_im(s, pc_start - s->cs_base);
7640             tcg_gen_mov_tl(s->A0, cpu_regs[R_EAX]);
7641             gen_extu(s->aflag, s->A0);
7642             gen_add_A0_ds_seg(s);
7643             gen_helper_monitor(cpu_env, s->A0);
7644             break;
7645 
7646         case 0xc9: /* mwait */
7647             if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) || CPL(s) != 0) {
7648                 goto illegal_op;
7649             }
7650             gen_update_cc_op(s);
7651             gen_jmp_im(s, pc_start - s->cs_base);
7652             gen_helper_mwait(cpu_env, tcg_const_i32(s->pc - pc_start));
7653             s->base.is_jmp = DISAS_NORETURN;
7654             break;
7655 
7656         case 0xca: /* clac */
7657             if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SMAP)
7658                 || CPL(s) != 0) {
7659                 goto illegal_op;
7660             }
7661             gen_helper_clac(cpu_env);
7662             gen_jmp_im(s, s->pc - s->cs_base);
7663             gen_eob(s);
7664             break;
7665 
7666         case 0xcb: /* stac */
7667             if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SMAP)
7668                 || CPL(s) != 0) {
7669                 goto illegal_op;
7670             }
7671             gen_helper_stac(cpu_env);
7672             gen_jmp_im(s, s->pc - s->cs_base);
7673             gen_eob(s);
7674             break;
7675 
7676         CASE_MODRM_MEM_OP(1): /* sidt */
7677             if (s->flags & HF_UMIP_MASK && !check_cpl0(s)) {
7678                 break;
7679             }
7680             gen_svm_check_intercept(s, SVM_EXIT_IDTR_READ);
7681             gen_lea_modrm(env, s, modrm);
7682             tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.limit));
7683             gen_op_st_v(s, MO_16, s->T0, s->A0);
7684             gen_add_A0_im(s, 2);
7685             tcg_gen_ld_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.base));
7686             if (dflag == MO_16) {
7687                 tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
7688             }
7689             gen_op_st_v(s, CODE64(s) + MO_32, s->T0, s->A0);
7690             break;
7691 
7692         case 0xd0: /* xgetbv */
7693             if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
7694                 || (s->prefix & (PREFIX_LOCK | PREFIX_DATA
7695                                  | PREFIX_REPZ | PREFIX_REPNZ))) {
7696                 goto illegal_op;
7697             }
7698             tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
7699             gen_helper_xgetbv(s->tmp1_i64, cpu_env, s->tmp2_i32);
7700             tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], s->tmp1_i64);
7701             break;
7702 
7703         case 0xd1: /* xsetbv */
7704             if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
7705                 || (s->prefix & (PREFIX_LOCK | PREFIX_DATA
7706                                  | PREFIX_REPZ | PREFIX_REPNZ))) {
7707                 goto illegal_op;
7708             }
7709             if (!check_cpl0(s)) {
7710                 break;
7711             }
7712             tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
7713                                   cpu_regs[R_EDX]);
7714             tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
7715             gen_helper_xsetbv(cpu_env, s->tmp2_i32, s->tmp1_i64);
7716             /* End TB because translation flags may change.  */
7717             gen_jmp_im(s, s->pc - s->cs_base);
7718             gen_eob(s);
7719             break;
7720 
7721         case 0xd8: /* VMRUN */
7722             if (!SVME(s) || !PE(s)) {
7723                 goto illegal_op;
7724             }
7725             if (!check_cpl0(s)) {
7726                 break;
7727             }
7728             gen_update_cc_op(s);
7729             gen_jmp_im(s, pc_start - s->cs_base);
7730             gen_helper_vmrun(cpu_env, tcg_const_i32(s->aflag - 1),
7731                              tcg_const_i32(s->pc - pc_start));
7732             tcg_gen_exit_tb(NULL, 0);
7733             s->base.is_jmp = DISAS_NORETURN;
7734             break;
7735 
7736         case 0xd9: /* VMMCALL */
7737             if (!SVME(s)) {
7738                 goto illegal_op;
7739             }
7740             gen_update_cc_op(s);
7741             gen_jmp_im(s, pc_start - s->cs_base);
7742             gen_helper_vmmcall(cpu_env);
7743             break;
7744 
7745         case 0xda: /* VMLOAD */
7746             if (!SVME(s) || !PE(s)) {
7747                 goto illegal_op;
7748             }
7749             if (!check_cpl0(s)) {
7750                 break;
7751             }
7752             gen_update_cc_op(s);
7753             gen_jmp_im(s, pc_start - s->cs_base);
7754             gen_helper_vmload(cpu_env, tcg_const_i32(s->aflag - 1));
7755             break;
7756 
7757         case 0xdb: /* VMSAVE */
7758             if (!SVME(s) || !PE(s)) {
7759                 goto illegal_op;
7760             }
7761             if (!check_cpl0(s)) {
7762                 break;
7763             }
7764             gen_update_cc_op(s);
7765             gen_jmp_im(s, pc_start - s->cs_base);
7766             gen_helper_vmsave(cpu_env, tcg_const_i32(s->aflag - 1));
7767             break;
7768 
7769         case 0xdc: /* STGI */
7770             if ((!SVME(s) && !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT))
7771                 || !PE(s)) {
7772                 goto illegal_op;
7773             }
7774             if (!check_cpl0(s)) {
7775                 break;
7776             }
7777             gen_update_cc_op(s);
7778             gen_helper_stgi(cpu_env);
7779             gen_jmp_im(s, s->pc - s->cs_base);
7780             gen_eob(s);
7781             break;
7782 
7783         case 0xdd: /* CLGI */
7784             if (!SVME(s) || !PE(s)) {
7785                 goto illegal_op;
7786             }
7787             if (!check_cpl0(s)) {
7788                 break;
7789             }
7790             gen_update_cc_op(s);
7791             gen_jmp_im(s, pc_start - s->cs_base);
7792             gen_helper_clgi(cpu_env);
7793             break;
7794 
7795         case 0xde: /* SKINIT */
7796             if ((!SVME(s) && !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT))
7797                 || !PE(s)) {
7798                 goto illegal_op;
7799             }
7800             gen_svm_check_intercept(s, SVM_EXIT_SKINIT);
7801             /* If not intercepted, not implemented -- raise #UD. */
7802             goto illegal_op;
7803 
7804         case 0xdf: /* INVLPGA */
7805             if (!SVME(s) || !PE(s)) {
7806                 goto illegal_op;
7807             }
7808             if (!check_cpl0(s)) {
7809                 break;
7810             }
7811             gen_svm_check_intercept(s, SVM_EXIT_INVLPGA);
7812             if (s->aflag == MO_64) {
7813                 tcg_gen_mov_tl(s->A0, cpu_regs[R_EAX]);
7814             } else {
7815                 tcg_gen_ext32u_tl(s->A0, cpu_regs[R_EAX]);
7816             }
7817             gen_helper_flush_page(cpu_env, s->A0);
7818             gen_jmp_im(s, s->pc - s->cs_base);
7819             gen_eob(s);
7820             break;
7821 
7822         CASE_MODRM_MEM_OP(2): /* lgdt */
7823             if (!check_cpl0(s)) {
7824                 break;
7825             }
7826             gen_svm_check_intercept(s, SVM_EXIT_GDTR_WRITE);
7827             gen_lea_modrm(env, s, modrm);
7828             gen_op_ld_v(s, MO_16, s->T1, s->A0);
7829             gen_add_A0_im(s, 2);
7830             gen_op_ld_v(s, CODE64(s) + MO_32, s->T0, s->A0);
7831             if (dflag == MO_16) {
7832                 tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
7833             }
7834             tcg_gen_st_tl(s->T0, cpu_env, offsetof(CPUX86State, gdt.base));
7835             tcg_gen_st32_tl(s->T1, cpu_env, offsetof(CPUX86State, gdt.limit));
7836             break;
7837 
7838         CASE_MODRM_MEM_OP(3): /* lidt */
7839             if (!check_cpl0(s)) {
7840                 break;
7841             }
7842             gen_svm_check_intercept(s, SVM_EXIT_IDTR_WRITE);
7843             gen_lea_modrm(env, s, modrm);
7844             gen_op_ld_v(s, MO_16, s->T1, s->A0);
7845             gen_add_A0_im(s, 2);
7846             gen_op_ld_v(s, CODE64(s) + MO_32, s->T0, s->A0);
7847             if (dflag == MO_16) {
7848                 tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
7849             }
7850             tcg_gen_st_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.base));
7851             tcg_gen_st32_tl(s->T1, cpu_env, offsetof(CPUX86State, idt.limit));
7852             break;
7853 
7854         CASE_MODRM_OP(4): /* smsw */
7855             if (s->flags & HF_UMIP_MASK && !check_cpl0(s)) {
7856                 break;
7857             }
7858             gen_svm_check_intercept(s, SVM_EXIT_READ_CR0);
7859             tcg_gen_ld_tl(s->T0, cpu_env, offsetof(CPUX86State, cr[0]));
7860             /*
7861              * In 32-bit mode, the higher 16 bits of the destination
7862              * register are undefined.  In practice CR0[31:0] is stored
7863              * just like in 64-bit mode.
7864              */
7865             mod = (modrm >> 6) & 3;
7866             ot = (mod != 3 ? MO_16 : s->dflag);
7867             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7868             break;
7869         case 0xee: /* rdpkru */
7870             if (prefixes & PREFIX_LOCK) {
7871                 goto illegal_op;
7872             }
7873             tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
7874             gen_helper_rdpkru(s->tmp1_i64, cpu_env, s->tmp2_i32);
7875             tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], s->tmp1_i64);
7876             break;
7877         case 0xef: /* wrpkru */
7878             if (prefixes & PREFIX_LOCK) {
7879                 goto illegal_op;
7880             }
7881             tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
7882                                   cpu_regs[R_EDX]);
7883             tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
7884             gen_helper_wrpkru(cpu_env, s->tmp2_i32, s->tmp1_i64);
7885             break;
7886 
7887         CASE_MODRM_OP(6): /* lmsw */
7888             if (!check_cpl0(s)) {
7889                 break;
7890             }
7891             gen_svm_check_intercept(s, SVM_EXIT_WRITE_CR0);
7892             gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7893             /*
7894              * Only the 4 lower bits of CR0 are modified.
7895              * PE cannot be set to zero if already set to one.
7896              */
7897             tcg_gen_ld_tl(s->T1, cpu_env, offsetof(CPUX86State, cr[0]));
7898             tcg_gen_andi_tl(s->T0, s->T0, 0xf);
7899             tcg_gen_andi_tl(s->T1, s->T1, ~0xe);
7900             tcg_gen_or_tl(s->T0, s->T0, s->T1);
7901             gen_helper_write_crN(cpu_env, tcg_constant_i32(0), s->T0);
7902             gen_jmp_im(s, s->pc - s->cs_base);
7903             gen_eob(s);
7904             break;
7905 
7906         CASE_MODRM_MEM_OP(7): /* invlpg */
7907             if (!check_cpl0(s)) {
7908                 break;
7909             }
7910             gen_svm_check_intercept(s, SVM_EXIT_INVLPG);
7911             gen_lea_modrm(env, s, modrm);
7912             gen_helper_flush_page(cpu_env, s->A0);
7913             gen_jmp_im(s, s->pc - s->cs_base);
7914             gen_eob(s);
7915             break;
7916 
7917         case 0xf8: /* swapgs */
7918 #ifdef TARGET_X86_64
7919             if (CODE64(s)) {
7920                 if (check_cpl0(s)) {
7921                     tcg_gen_mov_tl(s->T0, cpu_seg_base[R_GS]);
7922                     tcg_gen_ld_tl(cpu_seg_base[R_GS], cpu_env,
7923                                   offsetof(CPUX86State, kernelgsbase));
7924                     tcg_gen_st_tl(s->T0, cpu_env,
7925                                   offsetof(CPUX86State, kernelgsbase));
7926                 }
7927                 break;
7928             }
7929 #endif
7930             goto illegal_op;
7931 
7932         case 0xf9: /* rdtscp */
7933             if (!(s->cpuid_ext2_features & CPUID_EXT2_RDTSCP)) {
7934                 goto illegal_op;
7935             }
7936             gen_update_cc_op(s);
7937             gen_jmp_im(s, pc_start - s->cs_base);
7938             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7939                 gen_io_start();
7940             }
7941             gen_helper_rdtscp(cpu_env);
7942             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7943                 gen_jmp(s, s->pc - s->cs_base);
7944             }
7945             break;
7946 
7947         default:
7948             goto unknown_op;
7949         }
7950         break;
7951 
7952     case 0x108: /* invd */
7953     case 0x109: /* wbinvd */
7954         if (check_cpl0(s)) {
7955             gen_svm_check_intercept(s, (b & 2) ? SVM_EXIT_INVD : SVM_EXIT_WBINVD);
7956             /* nothing to do */
7957         }
7958         break;
7959     case 0x63: /* arpl or movslS (x86_64) */
7960 #ifdef TARGET_X86_64
7961         if (CODE64(s)) {
7962             int d_ot;
7963             /* d_ot is the size of destination */
7964             d_ot = dflag;
7965 
7966             modrm = x86_ldub_code(env, s);
7967             reg = ((modrm >> 3) & 7) | REX_R(s);
7968             mod = (modrm >> 6) & 3;
7969             rm = (modrm & 7) | REX_B(s);
7970 
7971             if (mod == 3) {
7972                 gen_op_mov_v_reg(s, MO_32, s->T0, rm);
7973                 /* sign extend */
7974                 if (d_ot == MO_64) {
7975                     tcg_gen_ext32s_tl(s->T0, s->T0);
7976                 }
7977                 gen_op_mov_reg_v(s, d_ot, reg, s->T0);
7978             } else {
7979                 gen_lea_modrm(env, s, modrm);
7980                 gen_op_ld_v(s, MO_32 | MO_SIGN, s->T0, s->A0);
7981                 gen_op_mov_reg_v(s, d_ot, reg, s->T0);
7982             }
7983         } else
7984 #endif
7985         {
7986             TCGLabel *label1;
7987             TCGv t0, t1, t2, a0;
7988 
7989             if (!PE(s) || VM86(s))
7990                 goto illegal_op;
7991             t0 = tcg_temp_local_new();
7992             t1 = tcg_temp_local_new();
7993             t2 = tcg_temp_local_new();
7994             ot = MO_16;
7995             modrm = x86_ldub_code(env, s);
7996             reg = (modrm >> 3) & 7;
7997             mod = (modrm >> 6) & 3;
7998             rm = modrm & 7;
7999             if (mod != 3) {
8000                 gen_lea_modrm(env, s, modrm);
8001                 gen_op_ld_v(s, ot, t0, s->A0);
8002                 a0 = tcg_temp_local_new();
8003                 tcg_gen_mov_tl(a0, s->A0);
8004             } else {
8005                 gen_op_mov_v_reg(s, ot, t0, rm);
8006                 a0 = NULL;
8007             }
8008             gen_op_mov_v_reg(s, ot, t1, reg);
8009             tcg_gen_andi_tl(s->tmp0, t0, 3);
8010             tcg_gen_andi_tl(t1, t1, 3);
8011             tcg_gen_movi_tl(t2, 0);
8012             label1 = gen_new_label();
8013             tcg_gen_brcond_tl(TCG_COND_GE, s->tmp0, t1, label1);
8014             tcg_gen_andi_tl(t0, t0, ~3);
8015             tcg_gen_or_tl(t0, t0, t1);
8016             tcg_gen_movi_tl(t2, CC_Z);
8017             gen_set_label(label1);
8018             if (mod != 3) {
8019                 gen_op_st_v(s, ot, t0, a0);
8020                 tcg_temp_free(a0);
8021            } else {
8022                 gen_op_mov_reg_v(s, ot, rm, t0);
8023             }
8024             gen_compute_eflags(s);
8025             tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_Z);
8026             tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t2);
8027             tcg_temp_free(t0);
8028             tcg_temp_free(t1);
8029             tcg_temp_free(t2);
8030         }
8031         break;
8032     case 0x102: /* lar */
8033     case 0x103: /* lsl */
8034         {
8035             TCGLabel *label1;
8036             TCGv t0;
8037             if (!PE(s) || VM86(s))
8038                 goto illegal_op;
8039             ot = dflag != MO_16 ? MO_32 : MO_16;
8040             modrm = x86_ldub_code(env, s);
8041             reg = ((modrm >> 3) & 7) | REX_R(s);
8042             gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
8043             t0 = tcg_temp_local_new();
8044             gen_update_cc_op(s);
8045             if (b == 0x102) {
8046                 gen_helper_lar(t0, cpu_env, s->T0);
8047             } else {
8048                 gen_helper_lsl(t0, cpu_env, s->T0);
8049             }
8050             tcg_gen_andi_tl(s->tmp0, cpu_cc_src, CC_Z);
8051             label1 = gen_new_label();
8052             tcg_gen_brcondi_tl(TCG_COND_EQ, s->tmp0, 0, label1);
8053             gen_op_mov_reg_v(s, ot, reg, t0);
8054             gen_set_label(label1);
8055             set_cc_op(s, CC_OP_EFLAGS);
8056             tcg_temp_free(t0);
8057         }
8058         break;
8059     case 0x118:
8060         modrm = x86_ldub_code(env, s);
8061         mod = (modrm >> 6) & 3;
8062         op = (modrm >> 3) & 7;
8063         switch(op) {
8064         case 0: /* prefetchnta */
8065         case 1: /* prefetchnt0 */
8066         case 2: /* prefetchnt0 */
8067         case 3: /* prefetchnt0 */
8068             if (mod == 3)
8069                 goto illegal_op;
8070             gen_nop_modrm(env, s, modrm);
8071             /* nothing more to do */
8072             break;
8073         default: /* nop (multi byte) */
8074             gen_nop_modrm(env, s, modrm);
8075             break;
8076         }
8077         break;
8078     case 0x11a:
8079         modrm = x86_ldub_code(env, s);
8080         if (s->flags & HF_MPX_EN_MASK) {
8081             mod = (modrm >> 6) & 3;
8082             reg = ((modrm >> 3) & 7) | REX_R(s);
8083             if (prefixes & PREFIX_REPZ) {
8084                 /* bndcl */
8085                 if (reg >= 4
8086                     || (prefixes & PREFIX_LOCK)
8087                     || s->aflag == MO_16) {
8088                     goto illegal_op;
8089                 }
8090                 gen_bndck(env, s, modrm, TCG_COND_LTU, cpu_bndl[reg]);
8091             } else if (prefixes & PREFIX_REPNZ) {
8092                 /* bndcu */
8093                 if (reg >= 4
8094                     || (prefixes & PREFIX_LOCK)
8095                     || s->aflag == MO_16) {
8096                     goto illegal_op;
8097                 }
8098                 TCGv_i64 notu = tcg_temp_new_i64();
8099                 tcg_gen_not_i64(notu, cpu_bndu[reg]);
8100                 gen_bndck(env, s, modrm, TCG_COND_GTU, notu);
8101                 tcg_temp_free_i64(notu);
8102             } else if (prefixes & PREFIX_DATA) {
8103                 /* bndmov -- from reg/mem */
8104                 if (reg >= 4 || s->aflag == MO_16) {
8105                     goto illegal_op;
8106                 }
8107                 if (mod == 3) {
8108                     int reg2 = (modrm & 7) | REX_B(s);
8109                     if (reg2 >= 4 || (prefixes & PREFIX_LOCK)) {
8110                         goto illegal_op;
8111                     }
8112                     if (s->flags & HF_MPX_IU_MASK) {
8113                         tcg_gen_mov_i64(cpu_bndl[reg], cpu_bndl[reg2]);
8114                         tcg_gen_mov_i64(cpu_bndu[reg], cpu_bndu[reg2]);
8115                     }
8116                 } else {
8117                     gen_lea_modrm(env, s, modrm);
8118                     if (CODE64(s)) {
8119                         tcg_gen_qemu_ld_i64(cpu_bndl[reg], s->A0,
8120                                             s->mem_index, MO_LEUQ);
8121                         tcg_gen_addi_tl(s->A0, s->A0, 8);
8122                         tcg_gen_qemu_ld_i64(cpu_bndu[reg], s->A0,
8123                                             s->mem_index, MO_LEUQ);
8124                     } else {
8125                         tcg_gen_qemu_ld_i64(cpu_bndl[reg], s->A0,
8126                                             s->mem_index, MO_LEUL);
8127                         tcg_gen_addi_tl(s->A0, s->A0, 4);
8128                         tcg_gen_qemu_ld_i64(cpu_bndu[reg], s->A0,
8129                                             s->mem_index, MO_LEUL);
8130                     }
8131                     /* bnd registers are now in-use */
8132                     gen_set_hflag(s, HF_MPX_IU_MASK);
8133                 }
8134             } else if (mod != 3) {
8135                 /* bndldx */
8136                 AddressParts a = gen_lea_modrm_0(env, s, modrm);
8137                 if (reg >= 4
8138                     || (prefixes & PREFIX_LOCK)
8139                     || s->aflag == MO_16
8140                     || a.base < -1) {
8141                     goto illegal_op;
8142                 }
8143                 if (a.base >= 0) {
8144                     tcg_gen_addi_tl(s->A0, cpu_regs[a.base], a.disp);
8145                 } else {
8146                     tcg_gen_movi_tl(s->A0, 0);
8147                 }
8148                 gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
8149                 if (a.index >= 0) {
8150                     tcg_gen_mov_tl(s->T0, cpu_regs[a.index]);
8151                 } else {
8152                     tcg_gen_movi_tl(s->T0, 0);
8153                 }
8154                 if (CODE64(s)) {
8155                     gen_helper_bndldx64(cpu_bndl[reg], cpu_env, s->A0, s->T0);
8156                     tcg_gen_ld_i64(cpu_bndu[reg], cpu_env,
8157                                    offsetof(CPUX86State, mmx_t0.MMX_Q(0)));
8158                 } else {
8159                     gen_helper_bndldx32(cpu_bndu[reg], cpu_env, s->A0, s->T0);
8160                     tcg_gen_ext32u_i64(cpu_bndl[reg], cpu_bndu[reg]);
8161                     tcg_gen_shri_i64(cpu_bndu[reg], cpu_bndu[reg], 32);
8162                 }
8163                 gen_set_hflag(s, HF_MPX_IU_MASK);
8164             }
8165         }
8166         gen_nop_modrm(env, s, modrm);
8167         break;
8168     case 0x11b:
8169         modrm = x86_ldub_code(env, s);
8170         if (s->flags & HF_MPX_EN_MASK) {
8171             mod = (modrm >> 6) & 3;
8172             reg = ((modrm >> 3) & 7) | REX_R(s);
8173             if (mod != 3 && (prefixes & PREFIX_REPZ)) {
8174                 /* bndmk */
8175                 if (reg >= 4
8176                     || (prefixes & PREFIX_LOCK)
8177                     || s->aflag == MO_16) {
8178                     goto illegal_op;
8179                 }
8180                 AddressParts a = gen_lea_modrm_0(env, s, modrm);
8181                 if (a.base >= 0) {
8182                     tcg_gen_extu_tl_i64(cpu_bndl[reg], cpu_regs[a.base]);
8183                     if (!CODE64(s)) {
8184                         tcg_gen_ext32u_i64(cpu_bndl[reg], cpu_bndl[reg]);
8185                     }
8186                 } else if (a.base == -1) {
8187                     /* no base register has lower bound of 0 */
8188                     tcg_gen_movi_i64(cpu_bndl[reg], 0);
8189                 } else {
8190                     /* rip-relative generates #ud */
8191                     goto illegal_op;
8192                 }
8193                 tcg_gen_not_tl(s->A0, gen_lea_modrm_1(s, a));
8194                 if (!CODE64(s)) {
8195                     tcg_gen_ext32u_tl(s->A0, s->A0);
8196                 }
8197                 tcg_gen_extu_tl_i64(cpu_bndu[reg], s->A0);
8198                 /* bnd registers are now in-use */
8199                 gen_set_hflag(s, HF_MPX_IU_MASK);
8200                 break;
8201             } else if (prefixes & PREFIX_REPNZ) {
8202                 /* bndcn */
8203                 if (reg >= 4
8204                     || (prefixes & PREFIX_LOCK)
8205                     || s->aflag == MO_16) {
8206                     goto illegal_op;
8207                 }
8208                 gen_bndck(env, s, modrm, TCG_COND_GTU, cpu_bndu[reg]);
8209             } else if (prefixes & PREFIX_DATA) {
8210                 /* bndmov -- to reg/mem */
8211                 if (reg >= 4 || s->aflag == MO_16) {
8212                     goto illegal_op;
8213                 }
8214                 if (mod == 3) {
8215                     int reg2 = (modrm & 7) | REX_B(s);
8216                     if (reg2 >= 4 || (prefixes & PREFIX_LOCK)) {
8217                         goto illegal_op;
8218                     }
8219                     if (s->flags & HF_MPX_IU_MASK) {
8220                         tcg_gen_mov_i64(cpu_bndl[reg2], cpu_bndl[reg]);
8221                         tcg_gen_mov_i64(cpu_bndu[reg2], cpu_bndu[reg]);
8222                     }
8223                 } else {
8224                     gen_lea_modrm(env, s, modrm);
8225                     if (CODE64(s)) {
8226                         tcg_gen_qemu_st_i64(cpu_bndl[reg], s->A0,
8227                                             s->mem_index, MO_LEUQ);
8228                         tcg_gen_addi_tl(s->A0, s->A0, 8);
8229                         tcg_gen_qemu_st_i64(cpu_bndu[reg], s->A0,
8230                                             s->mem_index, MO_LEUQ);
8231                     } else {
8232                         tcg_gen_qemu_st_i64(cpu_bndl[reg], s->A0,
8233                                             s->mem_index, MO_LEUL);
8234                         tcg_gen_addi_tl(s->A0, s->A0, 4);
8235                         tcg_gen_qemu_st_i64(cpu_bndu[reg], s->A0,
8236                                             s->mem_index, MO_LEUL);
8237                     }
8238                 }
8239             } else if (mod != 3) {
8240                 /* bndstx */
8241                 AddressParts a = gen_lea_modrm_0(env, s, modrm);
8242                 if (reg >= 4
8243                     || (prefixes & PREFIX_LOCK)
8244                     || s->aflag == MO_16
8245                     || a.base < -1) {
8246                     goto illegal_op;
8247                 }
8248                 if (a.base >= 0) {
8249                     tcg_gen_addi_tl(s->A0, cpu_regs[a.base], a.disp);
8250                 } else {
8251                     tcg_gen_movi_tl(s->A0, 0);
8252                 }
8253                 gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
8254                 if (a.index >= 0) {
8255                     tcg_gen_mov_tl(s->T0, cpu_regs[a.index]);
8256                 } else {
8257                     tcg_gen_movi_tl(s->T0, 0);
8258                 }
8259                 if (CODE64(s)) {
8260                     gen_helper_bndstx64(cpu_env, s->A0, s->T0,
8261                                         cpu_bndl[reg], cpu_bndu[reg]);
8262                 } else {
8263                     gen_helper_bndstx32(cpu_env, s->A0, s->T0,
8264                                         cpu_bndl[reg], cpu_bndu[reg]);
8265                 }
8266             }
8267         }
8268         gen_nop_modrm(env, s, modrm);
8269         break;
8270     case 0x119: case 0x11c ... 0x11f: /* nop (multi byte) */
8271         modrm = x86_ldub_code(env, s);
8272         gen_nop_modrm(env, s, modrm);
8273         break;
8274 
8275     case 0x120: /* mov reg, crN */
8276     case 0x122: /* mov crN, reg */
8277         if (!check_cpl0(s)) {
8278             break;
8279         }
8280         modrm = x86_ldub_code(env, s);
8281         /*
8282          * Ignore the mod bits (assume (modrm&0xc0)==0xc0).
8283          * AMD documentation (24594.pdf) and testing of Intel 386 and 486
8284          * processors all show that the mod bits are assumed to be 1's,
8285          * regardless of actual values.
8286          */
8287         rm = (modrm & 7) | REX_B(s);
8288         reg = ((modrm >> 3) & 7) | REX_R(s);
8289         switch (reg) {
8290         case 0:
8291             if ((prefixes & PREFIX_LOCK) &&
8292                 (s->cpuid_ext3_features & CPUID_EXT3_CR8LEG)) {
8293                 reg = 8;
8294             }
8295             break;
8296         case 2:
8297         case 3:
8298         case 4:
8299         case 8:
8300             break;
8301         default:
8302             goto unknown_op;
8303         }
8304         ot  = (CODE64(s) ? MO_64 : MO_32);
8305 
8306         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
8307             gen_io_start();
8308         }
8309         if (b & 2) {
8310             gen_svm_check_intercept(s, SVM_EXIT_WRITE_CR0 + reg);
8311             gen_op_mov_v_reg(s, ot, s->T0, rm);
8312             gen_helper_write_crN(cpu_env, tcg_constant_i32(reg), s->T0);
8313             gen_jmp_im(s, s->pc - s->cs_base);
8314             gen_eob(s);
8315         } else {
8316             gen_svm_check_intercept(s, SVM_EXIT_READ_CR0 + reg);
8317             gen_helper_read_crN(s->T0, cpu_env, tcg_constant_i32(reg));
8318             gen_op_mov_reg_v(s, ot, rm, s->T0);
8319             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
8320                 gen_jmp(s, s->pc - s->cs_base);
8321             }
8322         }
8323         break;
8324 
8325     case 0x121: /* mov reg, drN */
8326     case 0x123: /* mov drN, reg */
8327         if (check_cpl0(s)) {
8328             modrm = x86_ldub_code(env, s);
8329             /* Ignore the mod bits (assume (modrm&0xc0)==0xc0).
8330              * AMD documentation (24594.pdf) and testing of
8331              * intel 386 and 486 processors all show that the mod bits
8332              * are assumed to be 1's, regardless of actual values.
8333              */
8334             rm = (modrm & 7) | REX_B(s);
8335             reg = ((modrm >> 3) & 7) | REX_R(s);
8336             if (CODE64(s))
8337                 ot = MO_64;
8338             else
8339                 ot = MO_32;
8340             if (reg >= 8) {
8341                 goto illegal_op;
8342             }
8343             if (b & 2) {
8344                 gen_svm_check_intercept(s, SVM_EXIT_WRITE_DR0 + reg);
8345                 gen_op_mov_v_reg(s, ot, s->T0, rm);
8346                 tcg_gen_movi_i32(s->tmp2_i32, reg);
8347                 gen_helper_set_dr(cpu_env, s->tmp2_i32, s->T0);
8348                 gen_jmp_im(s, s->pc - s->cs_base);
8349                 gen_eob(s);
8350             } else {
8351                 gen_svm_check_intercept(s, SVM_EXIT_READ_DR0 + reg);
8352                 tcg_gen_movi_i32(s->tmp2_i32, reg);
8353                 gen_helper_get_dr(s->T0, cpu_env, s->tmp2_i32);
8354                 gen_op_mov_reg_v(s, ot, rm, s->T0);
8355             }
8356         }
8357         break;
8358     case 0x106: /* clts */
8359         if (check_cpl0(s)) {
8360             gen_svm_check_intercept(s, SVM_EXIT_WRITE_CR0);
8361             gen_helper_clts(cpu_env);
8362             /* abort block because static cpu state changed */
8363             gen_jmp_im(s, s->pc - s->cs_base);
8364             gen_eob(s);
8365         }
8366         break;
8367     /* MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4 support */
8368     case 0x1c3: /* MOVNTI reg, mem */
8369         if (!(s->cpuid_features & CPUID_SSE2))
8370             goto illegal_op;
8371         ot = mo_64_32(dflag);
8372         modrm = x86_ldub_code(env, s);
8373         mod = (modrm >> 6) & 3;
8374         if (mod == 3)
8375             goto illegal_op;
8376         reg = ((modrm >> 3) & 7) | REX_R(s);
8377         /* generate a generic store */
8378         gen_ldst_modrm(env, s, modrm, ot, reg, 1);
8379         break;
8380     case 0x1ae:
8381         modrm = x86_ldub_code(env, s);
8382         switch (modrm) {
8383         CASE_MODRM_MEM_OP(0): /* fxsave */
8384             if (!(s->cpuid_features & CPUID_FXSR)
8385                 || (prefixes & PREFIX_LOCK)) {
8386                 goto illegal_op;
8387             }
8388             if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) {
8389                 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8390                 break;
8391             }
8392             gen_lea_modrm(env, s, modrm);
8393             gen_helper_fxsave(cpu_env, s->A0);
8394             break;
8395 
8396         CASE_MODRM_MEM_OP(1): /* fxrstor */
8397             if (!(s->cpuid_features & CPUID_FXSR)
8398                 || (prefixes & PREFIX_LOCK)) {
8399                 goto illegal_op;
8400             }
8401             if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) {
8402                 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8403                 break;
8404             }
8405             gen_lea_modrm(env, s, modrm);
8406             gen_helper_fxrstor(cpu_env, s->A0);
8407             break;
8408 
8409         CASE_MODRM_MEM_OP(2): /* ldmxcsr */
8410             if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK)) {
8411                 goto illegal_op;
8412             }
8413             if (s->flags & HF_TS_MASK) {
8414                 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8415                 break;
8416             }
8417             gen_lea_modrm(env, s, modrm);
8418             tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUL);
8419             gen_helper_ldmxcsr(cpu_env, s->tmp2_i32);
8420             break;
8421 
8422         CASE_MODRM_MEM_OP(3): /* stmxcsr */
8423             if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK)) {
8424                 goto illegal_op;
8425             }
8426             if (s->flags & HF_TS_MASK) {
8427                 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8428                 break;
8429             }
8430             gen_helper_update_mxcsr(cpu_env);
8431             gen_lea_modrm(env, s, modrm);
8432             tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State, mxcsr));
8433             gen_op_st_v(s, MO_32, s->T0, s->A0);
8434             break;
8435 
8436         CASE_MODRM_MEM_OP(4): /* xsave */
8437             if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
8438                 || (prefixes & (PREFIX_LOCK | PREFIX_DATA
8439                                 | PREFIX_REPZ | PREFIX_REPNZ))) {
8440                 goto illegal_op;
8441             }
8442             gen_lea_modrm(env, s, modrm);
8443             tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
8444                                   cpu_regs[R_EDX]);
8445             gen_helper_xsave(cpu_env, s->A0, s->tmp1_i64);
8446             break;
8447 
8448         CASE_MODRM_MEM_OP(5): /* xrstor */
8449             if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
8450                 || (prefixes & (PREFIX_LOCK | PREFIX_DATA
8451                                 | PREFIX_REPZ | PREFIX_REPNZ))) {
8452                 goto illegal_op;
8453             }
8454             gen_lea_modrm(env, s, modrm);
8455             tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
8456                                   cpu_regs[R_EDX]);
8457             gen_helper_xrstor(cpu_env, s->A0, s->tmp1_i64);
8458             /* XRSTOR is how MPX is enabled, which changes how
8459                we translate.  Thus we need to end the TB.  */
8460             gen_update_cc_op(s);
8461             gen_jmp_im(s, s->pc - s->cs_base);
8462             gen_eob(s);
8463             break;
8464 
8465         CASE_MODRM_MEM_OP(6): /* xsaveopt / clwb */
8466             if (prefixes & PREFIX_LOCK) {
8467                 goto illegal_op;
8468             }
8469             if (prefixes & PREFIX_DATA) {
8470                 /* clwb */
8471                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_CLWB)) {
8472                     goto illegal_op;
8473                 }
8474                 gen_nop_modrm(env, s, modrm);
8475             } else {
8476                 /* xsaveopt */
8477                 if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
8478                     || (s->cpuid_xsave_features & CPUID_XSAVE_XSAVEOPT) == 0
8479                     || (prefixes & (PREFIX_REPZ | PREFIX_REPNZ))) {
8480                     goto illegal_op;
8481                 }
8482                 gen_lea_modrm(env, s, modrm);
8483                 tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
8484                                       cpu_regs[R_EDX]);
8485                 gen_helper_xsaveopt(cpu_env, s->A0, s->tmp1_i64);
8486             }
8487             break;
8488 
8489         CASE_MODRM_MEM_OP(7): /* clflush / clflushopt */
8490             if (prefixes & PREFIX_LOCK) {
8491                 goto illegal_op;
8492             }
8493             if (prefixes & PREFIX_DATA) {
8494                 /* clflushopt */
8495                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_CLFLUSHOPT)) {
8496                     goto illegal_op;
8497                 }
8498             } else {
8499                 /* clflush */
8500                 if ((s->prefix & (PREFIX_REPZ | PREFIX_REPNZ))
8501                     || !(s->cpuid_features & CPUID_CLFLUSH)) {
8502                     goto illegal_op;
8503                 }
8504             }
8505             gen_nop_modrm(env, s, modrm);
8506             break;
8507 
8508         case 0xc0 ... 0xc7: /* rdfsbase (f3 0f ae /0) */
8509         case 0xc8 ... 0xcf: /* rdgsbase (f3 0f ae /1) */
8510         case 0xd0 ... 0xd7: /* wrfsbase (f3 0f ae /2) */
8511         case 0xd8 ... 0xdf: /* wrgsbase (f3 0f ae /3) */
8512             if (CODE64(s)
8513                 && (prefixes & PREFIX_REPZ)
8514                 && !(prefixes & PREFIX_LOCK)
8515                 && (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_FSGSBASE)) {
8516                 TCGv base, treg, src, dst;
8517 
8518                 /* Preserve hflags bits by testing CR4 at runtime.  */
8519                 tcg_gen_movi_i32(s->tmp2_i32, CR4_FSGSBASE_MASK);
8520                 gen_helper_cr4_testbit(cpu_env, s->tmp2_i32);
8521 
8522                 base = cpu_seg_base[modrm & 8 ? R_GS : R_FS];
8523                 treg = cpu_regs[(modrm & 7) | REX_B(s)];
8524 
8525                 if (modrm & 0x10) {
8526                     /* wr*base */
8527                     dst = base, src = treg;
8528                 } else {
8529                     /* rd*base */
8530                     dst = treg, src = base;
8531                 }
8532 
8533                 if (s->dflag == MO_32) {
8534                     tcg_gen_ext32u_tl(dst, src);
8535                 } else {
8536                     tcg_gen_mov_tl(dst, src);
8537                 }
8538                 break;
8539             }
8540             goto unknown_op;
8541 
8542         case 0xf8: /* sfence / pcommit */
8543             if (prefixes & PREFIX_DATA) {
8544                 /* pcommit */
8545                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_PCOMMIT)
8546                     || (prefixes & PREFIX_LOCK)) {
8547                     goto illegal_op;
8548                 }
8549                 break;
8550             }
8551             /* fallthru */
8552         case 0xf9 ... 0xff: /* sfence */
8553             if (!(s->cpuid_features & CPUID_SSE)
8554                 || (prefixes & PREFIX_LOCK)) {
8555                 goto illegal_op;
8556             }
8557             tcg_gen_mb(TCG_MO_ST_ST | TCG_BAR_SC);
8558             break;
8559         case 0xe8 ... 0xef: /* lfence */
8560             if (!(s->cpuid_features & CPUID_SSE)
8561                 || (prefixes & PREFIX_LOCK)) {
8562                 goto illegal_op;
8563             }
8564             tcg_gen_mb(TCG_MO_LD_LD | TCG_BAR_SC);
8565             break;
8566         case 0xf0 ... 0xf7: /* mfence */
8567             if (!(s->cpuid_features & CPUID_SSE2)
8568                 || (prefixes & PREFIX_LOCK)) {
8569                 goto illegal_op;
8570             }
8571             tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
8572             break;
8573 
8574         default:
8575             goto unknown_op;
8576         }
8577         break;
8578 
8579     case 0x10d: /* 3DNow! prefetch(w) */
8580         modrm = x86_ldub_code(env, s);
8581         mod = (modrm >> 6) & 3;
8582         if (mod == 3)
8583             goto illegal_op;
8584         gen_nop_modrm(env, s, modrm);
8585         break;
8586     case 0x1aa: /* rsm */
8587         gen_svm_check_intercept(s, SVM_EXIT_RSM);
8588         if (!(s->flags & HF_SMM_MASK))
8589             goto illegal_op;
8590 #ifdef CONFIG_USER_ONLY
8591         /* we should not be in SMM mode */
8592         g_assert_not_reached();
8593 #else
8594         gen_update_cc_op(s);
8595         gen_jmp_im(s, s->pc - s->cs_base);
8596         gen_helper_rsm(cpu_env);
8597 #endif /* CONFIG_USER_ONLY */
8598         gen_eob(s);
8599         break;
8600     case 0x1b8: /* SSE4.2 popcnt */
8601         if ((prefixes & (PREFIX_REPZ | PREFIX_LOCK | PREFIX_REPNZ)) !=
8602              PREFIX_REPZ)
8603             goto illegal_op;
8604         if (!(s->cpuid_ext_features & CPUID_EXT_POPCNT))
8605             goto illegal_op;
8606 
8607         modrm = x86_ldub_code(env, s);
8608         reg = ((modrm >> 3) & 7) | REX_R(s);
8609 
8610         if (s->prefix & PREFIX_DATA) {
8611             ot = MO_16;
8612         } else {
8613             ot = mo_64_32(dflag);
8614         }
8615 
8616         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
8617         gen_extu(ot, s->T0);
8618         tcg_gen_mov_tl(cpu_cc_src, s->T0);
8619         tcg_gen_ctpop_tl(s->T0, s->T0);
8620         gen_op_mov_reg_v(s, ot, reg, s->T0);
8621 
8622         set_cc_op(s, CC_OP_POPCNT);
8623         break;
8624     case 0x10e ... 0x10f:
8625         /* 3DNow! instructions, ignore prefixes */
8626         s->prefix &= ~(PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA);
8627         /* fall through */
8628     case 0x110 ... 0x117:
8629     case 0x128 ... 0x12f:
8630     case 0x138 ... 0x13a:
8631     case 0x150 ... 0x179:
8632     case 0x17c ... 0x17f:
8633     case 0x1c2:
8634     case 0x1c4 ... 0x1c6:
8635     case 0x1d0 ... 0x1fe:
8636         gen_sse(env, s, b, pc_start);
8637         break;
8638     default:
8639         goto unknown_op;
8640     }
8641     return s->pc;
8642  illegal_op:
8643     gen_illegal_opcode(s);
8644     return s->pc;
8645  unknown_op:
8646     gen_unknown_opcode(env, s);
8647     return s->pc;
8648 }
8649 
8650 void tcg_x86_init(void)
8651 {
8652     static const char reg_names[CPU_NB_REGS][4] = {
8653 #ifdef TARGET_X86_64
8654         [R_EAX] = "rax",
8655         [R_EBX] = "rbx",
8656         [R_ECX] = "rcx",
8657         [R_EDX] = "rdx",
8658         [R_ESI] = "rsi",
8659         [R_EDI] = "rdi",
8660         [R_EBP] = "rbp",
8661         [R_ESP] = "rsp",
8662         [8]  = "r8",
8663         [9]  = "r9",
8664         [10] = "r10",
8665         [11] = "r11",
8666         [12] = "r12",
8667         [13] = "r13",
8668         [14] = "r14",
8669         [15] = "r15",
8670 #else
8671         [R_EAX] = "eax",
8672         [R_EBX] = "ebx",
8673         [R_ECX] = "ecx",
8674         [R_EDX] = "edx",
8675         [R_ESI] = "esi",
8676         [R_EDI] = "edi",
8677         [R_EBP] = "ebp",
8678         [R_ESP] = "esp",
8679 #endif
8680     };
8681     static const char seg_base_names[6][8] = {
8682         [R_CS] = "cs_base",
8683         [R_DS] = "ds_base",
8684         [R_ES] = "es_base",
8685         [R_FS] = "fs_base",
8686         [R_GS] = "gs_base",
8687         [R_SS] = "ss_base",
8688     };
8689     static const char bnd_regl_names[4][8] = {
8690         "bnd0_lb", "bnd1_lb", "bnd2_lb", "bnd3_lb"
8691     };
8692     static const char bnd_regu_names[4][8] = {
8693         "bnd0_ub", "bnd1_ub", "bnd2_ub", "bnd3_ub"
8694     };
8695     int i;
8696 
8697     cpu_cc_op = tcg_global_mem_new_i32(cpu_env,
8698                                        offsetof(CPUX86State, cc_op), "cc_op");
8699     cpu_cc_dst = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_dst),
8700                                     "cc_dst");
8701     cpu_cc_src = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_src),
8702                                     "cc_src");
8703     cpu_cc_src2 = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_src2),
8704                                      "cc_src2");
8705 
8706     for (i = 0; i < CPU_NB_REGS; ++i) {
8707         cpu_regs[i] = tcg_global_mem_new(cpu_env,
8708                                          offsetof(CPUX86State, regs[i]),
8709                                          reg_names[i]);
8710     }
8711 
8712     for (i = 0; i < 6; ++i) {
8713         cpu_seg_base[i]
8714             = tcg_global_mem_new(cpu_env,
8715                                  offsetof(CPUX86State, segs[i].base),
8716                                  seg_base_names[i]);
8717     }
8718 
8719     for (i = 0; i < 4; ++i) {
8720         cpu_bndl[i]
8721             = tcg_global_mem_new_i64(cpu_env,
8722                                      offsetof(CPUX86State, bnd_regs[i].lb),
8723                                      bnd_regl_names[i]);
8724         cpu_bndu[i]
8725             = tcg_global_mem_new_i64(cpu_env,
8726                                      offsetof(CPUX86State, bnd_regs[i].ub),
8727                                      bnd_regu_names[i]);
8728     }
8729 }
8730 
8731 static void i386_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu)
8732 {
8733     DisasContext *dc = container_of(dcbase, DisasContext, base);
8734     CPUX86State *env = cpu->env_ptr;
8735     uint32_t flags = dc->base.tb->flags;
8736     uint32_t cflags = tb_cflags(dc->base.tb);
8737     int cpl = (flags >> HF_CPL_SHIFT) & 3;
8738     int iopl = (flags >> IOPL_SHIFT) & 3;
8739 
8740     dc->cs_base = dc->base.tb->cs_base;
8741     dc->flags = flags;
8742 #ifndef CONFIG_USER_ONLY
8743     dc->cpl = cpl;
8744     dc->iopl = iopl;
8745 #endif
8746 
8747     /* We make some simplifying assumptions; validate they're correct. */
8748     g_assert(PE(dc) == ((flags & HF_PE_MASK) != 0));
8749     g_assert(CPL(dc) == cpl);
8750     g_assert(IOPL(dc) == iopl);
8751     g_assert(VM86(dc) == ((flags & HF_VM_MASK) != 0));
8752     g_assert(CODE32(dc) == ((flags & HF_CS32_MASK) != 0));
8753     g_assert(CODE64(dc) == ((flags & HF_CS64_MASK) != 0));
8754     g_assert(SS32(dc) == ((flags & HF_SS32_MASK) != 0));
8755     g_assert(LMA(dc) == ((flags & HF_LMA_MASK) != 0));
8756     g_assert(ADDSEG(dc) == ((flags & HF_ADDSEG_MASK) != 0));
8757     g_assert(SVME(dc) == ((flags & HF_SVME_MASK) != 0));
8758     g_assert(GUEST(dc) == ((flags & HF_GUEST_MASK) != 0));
8759 
8760     dc->cc_op = CC_OP_DYNAMIC;
8761     dc->cc_op_dirty = false;
8762     dc->popl_esp_hack = 0;
8763     /* select memory access functions */
8764     dc->mem_index = 0;
8765 #ifdef CONFIG_SOFTMMU
8766     dc->mem_index = cpu_mmu_index(env, false);
8767 #endif
8768     dc->cpuid_features = env->features[FEAT_1_EDX];
8769     dc->cpuid_ext_features = env->features[FEAT_1_ECX];
8770     dc->cpuid_ext2_features = env->features[FEAT_8000_0001_EDX];
8771     dc->cpuid_ext3_features = env->features[FEAT_8000_0001_ECX];
8772     dc->cpuid_7_0_ebx_features = env->features[FEAT_7_0_EBX];
8773     dc->cpuid_xsave_features = env->features[FEAT_XSAVE];
8774     dc->jmp_opt = !((cflags & CF_NO_GOTO_TB) ||
8775                     (flags & (HF_TF_MASK | HF_INHIBIT_IRQ_MASK)));
8776     /*
8777      * If jmp_opt, we want to handle each string instruction individually.
8778      * For icount also disable repz optimization so that each iteration
8779      * is accounted separately.
8780      */
8781     dc->repz_opt = !dc->jmp_opt && !(cflags & CF_USE_ICOUNT);
8782 
8783     dc->T0 = tcg_temp_new();
8784     dc->T1 = tcg_temp_new();
8785     dc->A0 = tcg_temp_new();
8786 
8787     dc->tmp0 = tcg_temp_new();
8788     dc->tmp1_i64 = tcg_temp_new_i64();
8789     dc->tmp2_i32 = tcg_temp_new_i32();
8790     dc->tmp3_i32 = tcg_temp_new_i32();
8791     dc->tmp4 = tcg_temp_new();
8792     dc->ptr0 = tcg_temp_new_ptr();
8793     dc->ptr1 = tcg_temp_new_ptr();
8794     dc->cc_srcT = tcg_temp_local_new();
8795 }
8796 
8797 static void i386_tr_tb_start(DisasContextBase *db, CPUState *cpu)
8798 {
8799 }
8800 
8801 static void i386_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
8802 {
8803     DisasContext *dc = container_of(dcbase, DisasContext, base);
8804 
8805     dc->prev_insn_end = tcg_last_op();
8806     tcg_gen_insn_start(dc->base.pc_next, dc->cc_op);
8807 }
8808 
8809 static void i386_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
8810 {
8811     DisasContext *dc = container_of(dcbase, DisasContext, base);
8812     target_ulong pc_next;
8813 
8814 #ifdef TARGET_VSYSCALL_PAGE
8815     /*
8816      * Detect entry into the vsyscall page and invoke the syscall.
8817      */
8818     if ((dc->base.pc_next & TARGET_PAGE_MASK) == TARGET_VSYSCALL_PAGE) {
8819         gen_exception(dc, EXCP_VSYSCALL, dc->base.pc_next);
8820         dc->base.pc_next = dc->pc + 1;
8821         return;
8822     }
8823 #endif
8824 
8825     pc_next = disas_insn(dc, cpu);
8826     dc->base.pc_next = pc_next;
8827 
8828     if (dc->base.is_jmp == DISAS_NEXT) {
8829         if (dc->flags & (HF_TF_MASK | HF_INHIBIT_IRQ_MASK)) {
8830             /*
8831              * If single step mode, we generate only one instruction and
8832              * generate an exception.
8833              * If irq were inhibited with HF_INHIBIT_IRQ_MASK, we clear
8834              * the flag and abort the translation to give the irqs a
8835              * chance to happen.
8836              */
8837             dc->base.is_jmp = DISAS_TOO_MANY;
8838         } else if (!is_same_page(&dc->base, pc_next)) {
8839             dc->base.is_jmp = DISAS_TOO_MANY;
8840         }
8841     }
8842 }
8843 
8844 static void i386_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
8845 {
8846     DisasContext *dc = container_of(dcbase, DisasContext, base);
8847 
8848     if (dc->base.is_jmp == DISAS_TOO_MANY) {
8849         gen_jmp_im(dc, dc->base.pc_next - dc->cs_base);
8850         gen_eob(dc);
8851     }
8852 }
8853 
8854 static void i386_tr_disas_log(const DisasContextBase *dcbase,
8855                               CPUState *cpu, FILE *logfile)
8856 {
8857     DisasContext *dc = container_of(dcbase, DisasContext, base);
8858 
8859     fprintf(logfile, "IN: %s\n", lookup_symbol(dc->base.pc_first));
8860     target_disas(logfile, cpu, dc->base.pc_first, dc->base.tb->size);
8861 }
8862 
8863 static const TranslatorOps i386_tr_ops = {
8864     .init_disas_context = i386_tr_init_disas_context,
8865     .tb_start           = i386_tr_tb_start,
8866     .insn_start         = i386_tr_insn_start,
8867     .translate_insn     = i386_tr_translate_insn,
8868     .tb_stop            = i386_tr_tb_stop,
8869     .disas_log          = i386_tr_disas_log,
8870 };
8871 
8872 /* generate intermediate code for basic block 'tb'.  */
8873 void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns,
8874                            target_ulong pc, void *host_pc)
8875 {
8876     DisasContext dc;
8877 
8878     translator_loop(cpu, tb, max_insns, pc, host_pc, &i386_tr_ops, &dc.base);
8879 }
8880 
8881 void restore_state_to_opc(CPUX86State *env, TranslationBlock *tb,
8882                           target_ulong *data)
8883 {
8884     int cc_op = data[1];
8885     env->eip = data[0] - tb->cs_base;
8886     if (cc_op != CC_OP_DYNAMIC) {
8887         env->cc_op = cc_op;
8888     }
8889 }
8890