xref: /qemu/target/i386/tcg/translate.c (revision d051d0e1)
1 /*
2  *  i386 translation
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 #include "qemu/osdep.h"
20 
21 #include "qemu/host-utils.h"
22 #include "cpu.h"
23 #include "disas/disas.h"
24 #include "exec/exec-all.h"
25 #include "tcg/tcg-op.h"
26 #include "exec/cpu_ldst.h"
27 #include "exec/translator.h"
28 
29 #include "exec/helper-proto.h"
30 #include "exec/helper-gen.h"
31 #include "helper-tcg.h"
32 
33 #include "exec/log.h"
34 
35 #define PREFIX_REPZ   0x01
36 #define PREFIX_REPNZ  0x02
37 #define PREFIX_LOCK   0x04
38 #define PREFIX_DATA   0x08
39 #define PREFIX_ADR    0x10
40 #define PREFIX_VEX    0x20
41 #define PREFIX_REX    0x40
42 
43 #ifdef TARGET_X86_64
44 # define ctztl  ctz64
45 # define clztl  clz64
46 #else
47 # define ctztl  ctz32
48 # define clztl  clz32
49 #endif
50 
51 /* For a switch indexed by MODRM, match all memory operands for a given OP.  */
52 #define CASE_MODRM_MEM_OP(OP) \
53     case (0 << 6) | (OP << 3) | 0 ... (0 << 6) | (OP << 3) | 7: \
54     case (1 << 6) | (OP << 3) | 0 ... (1 << 6) | (OP << 3) | 7: \
55     case (2 << 6) | (OP << 3) | 0 ... (2 << 6) | (OP << 3) | 7
56 
57 #define CASE_MODRM_OP(OP) \
58     case (0 << 6) | (OP << 3) | 0 ... (0 << 6) | (OP << 3) | 7: \
59     case (1 << 6) | (OP << 3) | 0 ... (1 << 6) | (OP << 3) | 7: \
60     case (2 << 6) | (OP << 3) | 0 ... (2 << 6) | (OP << 3) | 7: \
61     case (3 << 6) | (OP << 3) | 0 ... (3 << 6) | (OP << 3) | 7
62 
63 //#define MACRO_TEST   1
64 
65 /* global register indexes */
66 static TCGv cpu_cc_dst, cpu_cc_src, cpu_cc_src2;
67 static TCGv_i32 cpu_cc_op;
68 static TCGv cpu_regs[CPU_NB_REGS];
69 static TCGv cpu_seg_base[6];
70 static TCGv_i64 cpu_bndl[4];
71 static TCGv_i64 cpu_bndu[4];
72 
73 #include "exec/gen-icount.h"
74 
75 typedef struct DisasContext {
76     DisasContextBase base;
77 
78     target_ulong pc;       /* pc = eip + cs_base */
79     target_ulong pc_start; /* pc at TB entry */
80     target_ulong cs_base;  /* base of CS segment */
81 
82     MemOp aflag;
83     MemOp dflag;
84 
85     int8_t override; /* -1 if no override, else R_CS, R_DS, etc */
86     uint8_t prefix;
87 
88 #ifndef CONFIG_USER_ONLY
89     uint8_t cpl;   /* code priv level */
90     uint8_t iopl;  /* i/o priv level */
91 #endif
92     uint8_t vex_l;  /* vex vector length */
93     uint8_t vex_v;  /* vex vvvv register, without 1's complement.  */
94     uint8_t popl_esp_hack; /* for correct popl with esp base handling */
95     uint8_t rip_offset; /* only used in x86_64, but left for simplicity */
96 
97 #ifdef TARGET_X86_64
98     uint8_t rex_r;
99     uint8_t rex_x;
100     uint8_t rex_b;
101     bool rex_w;
102 #endif
103     bool jmp_opt; /* use direct block chaining for direct jumps */
104     bool repz_opt; /* optimize jumps within repz instructions */
105     bool cc_op_dirty;
106 
107     CCOp cc_op;  /* current CC operation */
108     int mem_index; /* select memory access functions */
109     uint32_t flags; /* all execution flags */
110     int cpuid_features;
111     int cpuid_ext_features;
112     int cpuid_ext2_features;
113     int cpuid_ext3_features;
114     int cpuid_7_0_ebx_features;
115     int cpuid_xsave_features;
116 
117     /* TCG local temps */
118     TCGv cc_srcT;
119     TCGv A0;
120     TCGv T0;
121     TCGv T1;
122 
123     /* TCG local register indexes (only used inside old micro ops) */
124     TCGv tmp0;
125     TCGv tmp4;
126     TCGv_ptr ptr0;
127     TCGv_ptr ptr1;
128     TCGv_i32 tmp2_i32;
129     TCGv_i32 tmp3_i32;
130     TCGv_i64 tmp1_i64;
131 
132     sigjmp_buf jmpbuf;
133 } DisasContext;
134 
135 /* The environment in which user-only runs is constrained. */
136 #ifdef CONFIG_USER_ONLY
137 #define PE(S)     true
138 #define CPL(S)    3
139 #define IOPL(S)   0
140 #define SVME(S)   false
141 #define GUEST(S)  false
142 #else
143 #define PE(S)     (((S)->flags & HF_PE_MASK) != 0)
144 #define CPL(S)    ((S)->cpl)
145 #define IOPL(S)   ((S)->iopl)
146 #define SVME(S)   (((S)->flags & HF_SVME_MASK) != 0)
147 #define GUEST(S)  (((S)->flags & HF_GUEST_MASK) != 0)
148 #endif
149 #if defined(CONFIG_USER_ONLY) && defined(TARGET_X86_64)
150 #define VM86(S)   false
151 #define CODE32(S) true
152 #define SS32(S)   true
153 #define ADDSEG(S) false
154 #else
155 #define VM86(S)   (((S)->flags & HF_VM_MASK) != 0)
156 #define CODE32(S) (((S)->flags & HF_CS32_MASK) != 0)
157 #define SS32(S)   (((S)->flags & HF_SS32_MASK) != 0)
158 #define ADDSEG(S) (((S)->flags & HF_ADDSEG_MASK) != 0)
159 #endif
160 #if !defined(TARGET_X86_64)
161 #define CODE64(S) false
162 #define LMA(S)    false
163 #elif defined(CONFIG_USER_ONLY)
164 #define CODE64(S) true
165 #define LMA(S)    true
166 #else
167 #define CODE64(S) (((S)->flags & HF_CS64_MASK) != 0)
168 #define LMA(S)    (((S)->flags & HF_LMA_MASK) != 0)
169 #endif
170 
171 #ifdef TARGET_X86_64
172 #define REX_PREFIX(S)  (((S)->prefix & PREFIX_REX) != 0)
173 #define REX_W(S)       ((S)->rex_w)
174 #define REX_R(S)       ((S)->rex_r + 0)
175 #define REX_X(S)       ((S)->rex_x + 0)
176 #define REX_B(S)       ((S)->rex_b + 0)
177 #else
178 #define REX_PREFIX(S)  false
179 #define REX_W(S)       false
180 #define REX_R(S)       0
181 #define REX_X(S)       0
182 #define REX_B(S)       0
183 #endif
184 
185 /*
186  * Many sysemu-only helpers are not reachable for user-only.
187  * Define stub generators here, so that we need not either sprinkle
188  * ifdefs through the translator, nor provide the helper function.
189  */
190 #define STUB_HELPER(NAME, ...) \
191     static inline void gen_helper_##NAME(__VA_ARGS__) \
192     { qemu_build_not_reached(); }
193 
194 #ifdef CONFIG_USER_ONLY
195 STUB_HELPER(clgi, TCGv_env env)
196 STUB_HELPER(flush_page, TCGv_env env, TCGv addr)
197 STUB_HELPER(hlt, TCGv_env env, TCGv_i32 pc_ofs)
198 STUB_HELPER(inb, TCGv ret, TCGv_env env, TCGv_i32 port)
199 STUB_HELPER(inw, TCGv ret, TCGv_env env, TCGv_i32 port)
200 STUB_HELPER(inl, TCGv ret, TCGv_env env, TCGv_i32 port)
201 STUB_HELPER(monitor, TCGv_env env, TCGv addr)
202 STUB_HELPER(mwait, TCGv_env env, TCGv_i32 pc_ofs)
203 STUB_HELPER(outb, TCGv_env env, TCGv_i32 port, TCGv_i32 val)
204 STUB_HELPER(outw, TCGv_env env, TCGv_i32 port, TCGv_i32 val)
205 STUB_HELPER(outl, TCGv_env env, TCGv_i32 port, TCGv_i32 val)
206 STUB_HELPER(rdmsr, TCGv_env env)
207 STUB_HELPER(read_crN, TCGv ret, TCGv_env env, TCGv_i32 reg)
208 STUB_HELPER(get_dr, TCGv ret, TCGv_env env, TCGv_i32 reg)
209 STUB_HELPER(set_dr, TCGv_env env, TCGv_i32 reg, TCGv val)
210 STUB_HELPER(stgi, TCGv_env env)
211 STUB_HELPER(svm_check_intercept, TCGv_env env, TCGv_i32 type)
212 STUB_HELPER(vmload, TCGv_env env, TCGv_i32 aflag)
213 STUB_HELPER(vmmcall, TCGv_env env)
214 STUB_HELPER(vmrun, TCGv_env env, TCGv_i32 aflag, TCGv_i32 pc_ofs)
215 STUB_HELPER(vmsave, TCGv_env env, TCGv_i32 aflag)
216 STUB_HELPER(write_crN, TCGv_env env, TCGv_i32 reg, TCGv val)
217 STUB_HELPER(wrmsr, TCGv_env env)
218 #endif
219 
220 static void gen_eob(DisasContext *s);
221 static void gen_jr(DisasContext *s, TCGv dest);
222 static void gen_jmp(DisasContext *s, target_ulong eip);
223 static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num);
224 static void gen_op(DisasContext *s1, int op, MemOp ot, int d);
225 static void gen_exception_gpf(DisasContext *s);
226 
227 /* i386 arith/logic operations */
228 enum {
229     OP_ADDL,
230     OP_ORL,
231     OP_ADCL,
232     OP_SBBL,
233     OP_ANDL,
234     OP_SUBL,
235     OP_XORL,
236     OP_CMPL,
237 };
238 
239 /* i386 shift ops */
240 enum {
241     OP_ROL,
242     OP_ROR,
243     OP_RCL,
244     OP_RCR,
245     OP_SHL,
246     OP_SHR,
247     OP_SHL1, /* undocumented */
248     OP_SAR = 7,
249 };
250 
251 enum {
252     JCC_O,
253     JCC_B,
254     JCC_Z,
255     JCC_BE,
256     JCC_S,
257     JCC_P,
258     JCC_L,
259     JCC_LE,
260 };
261 
262 enum {
263     /* I386 int registers */
264     OR_EAX,   /* MUST be even numbered */
265     OR_ECX,
266     OR_EDX,
267     OR_EBX,
268     OR_ESP,
269     OR_EBP,
270     OR_ESI,
271     OR_EDI,
272 
273     OR_TMP0 = 16,    /* temporary operand register */
274     OR_TMP1,
275     OR_A0, /* temporary register used when doing address evaluation */
276 };
277 
278 enum {
279     USES_CC_DST  = 1,
280     USES_CC_SRC  = 2,
281     USES_CC_SRC2 = 4,
282     USES_CC_SRCT = 8,
283 };
284 
285 /* Bit set if the global variable is live after setting CC_OP to X.  */
286 static const uint8_t cc_op_live[CC_OP_NB] = {
287     [CC_OP_DYNAMIC] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
288     [CC_OP_EFLAGS] = USES_CC_SRC,
289     [CC_OP_MULB ... CC_OP_MULQ] = USES_CC_DST | USES_CC_SRC,
290     [CC_OP_ADDB ... CC_OP_ADDQ] = USES_CC_DST | USES_CC_SRC,
291     [CC_OP_ADCB ... CC_OP_ADCQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
292     [CC_OP_SUBB ... CC_OP_SUBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRCT,
293     [CC_OP_SBBB ... CC_OP_SBBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
294     [CC_OP_LOGICB ... CC_OP_LOGICQ] = USES_CC_DST,
295     [CC_OP_INCB ... CC_OP_INCQ] = USES_CC_DST | USES_CC_SRC,
296     [CC_OP_DECB ... CC_OP_DECQ] = USES_CC_DST | USES_CC_SRC,
297     [CC_OP_SHLB ... CC_OP_SHLQ] = USES_CC_DST | USES_CC_SRC,
298     [CC_OP_SARB ... CC_OP_SARQ] = USES_CC_DST | USES_CC_SRC,
299     [CC_OP_BMILGB ... CC_OP_BMILGQ] = USES_CC_DST | USES_CC_SRC,
300     [CC_OP_ADCX] = USES_CC_DST | USES_CC_SRC,
301     [CC_OP_ADOX] = USES_CC_SRC | USES_CC_SRC2,
302     [CC_OP_ADCOX] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
303     [CC_OP_CLR] = 0,
304     [CC_OP_POPCNT] = USES_CC_SRC,
305 };
306 
307 static void set_cc_op(DisasContext *s, CCOp op)
308 {
309     int dead;
310 
311     if (s->cc_op == op) {
312         return;
313     }
314 
315     /* Discard CC computation that will no longer be used.  */
316     dead = cc_op_live[s->cc_op] & ~cc_op_live[op];
317     if (dead & USES_CC_DST) {
318         tcg_gen_discard_tl(cpu_cc_dst);
319     }
320     if (dead & USES_CC_SRC) {
321         tcg_gen_discard_tl(cpu_cc_src);
322     }
323     if (dead & USES_CC_SRC2) {
324         tcg_gen_discard_tl(cpu_cc_src2);
325     }
326     if (dead & USES_CC_SRCT) {
327         tcg_gen_discard_tl(s->cc_srcT);
328     }
329 
330     if (op == CC_OP_DYNAMIC) {
331         /* The DYNAMIC setting is translator only, and should never be
332            stored.  Thus we always consider it clean.  */
333         s->cc_op_dirty = false;
334     } else {
335         /* Discard any computed CC_OP value (see shifts).  */
336         if (s->cc_op == CC_OP_DYNAMIC) {
337             tcg_gen_discard_i32(cpu_cc_op);
338         }
339         s->cc_op_dirty = true;
340     }
341     s->cc_op = op;
342 }
343 
344 static void gen_update_cc_op(DisasContext *s)
345 {
346     if (s->cc_op_dirty) {
347         tcg_gen_movi_i32(cpu_cc_op, s->cc_op);
348         s->cc_op_dirty = false;
349     }
350 }
351 
352 #ifdef TARGET_X86_64
353 
354 #define NB_OP_SIZES 4
355 
356 #else /* !TARGET_X86_64 */
357 
358 #define NB_OP_SIZES 3
359 
360 #endif /* !TARGET_X86_64 */
361 
362 #if defined(HOST_WORDS_BIGENDIAN)
363 #define REG_B_OFFSET (sizeof(target_ulong) - 1)
364 #define REG_H_OFFSET (sizeof(target_ulong) - 2)
365 #define REG_W_OFFSET (sizeof(target_ulong) - 2)
366 #define REG_L_OFFSET (sizeof(target_ulong) - 4)
367 #define REG_LH_OFFSET (sizeof(target_ulong) - 8)
368 #else
369 #define REG_B_OFFSET 0
370 #define REG_H_OFFSET 1
371 #define REG_W_OFFSET 0
372 #define REG_L_OFFSET 0
373 #define REG_LH_OFFSET 4
374 #endif
375 
376 /* In instruction encodings for byte register accesses the
377  * register number usually indicates "low 8 bits of register N";
378  * however there are some special cases where N 4..7 indicates
379  * [AH, CH, DH, BH], ie "bits 15..8 of register N-4". Return
380  * true for this special case, false otherwise.
381  */
382 static inline bool byte_reg_is_xH(DisasContext *s, int reg)
383 {
384     /* Any time the REX prefix is present, byte registers are uniform */
385     if (reg < 4 || REX_PREFIX(s)) {
386         return false;
387     }
388     return true;
389 }
390 
391 /* Select the size of a push/pop operation.  */
392 static inline MemOp mo_pushpop(DisasContext *s, MemOp ot)
393 {
394     if (CODE64(s)) {
395         return ot == MO_16 ? MO_16 : MO_64;
396     } else {
397         return ot;
398     }
399 }
400 
401 /* Select the size of the stack pointer.  */
402 static inline MemOp mo_stacksize(DisasContext *s)
403 {
404     return CODE64(s) ? MO_64 : SS32(s) ? MO_32 : MO_16;
405 }
406 
407 /* Select only size 64 else 32.  Used for SSE operand sizes.  */
408 static inline MemOp mo_64_32(MemOp ot)
409 {
410 #ifdef TARGET_X86_64
411     return ot == MO_64 ? MO_64 : MO_32;
412 #else
413     return MO_32;
414 #endif
415 }
416 
417 /* Select size 8 if lsb of B is clear, else OT.  Used for decoding
418    byte vs word opcodes.  */
419 static inline MemOp mo_b_d(int b, MemOp ot)
420 {
421     return b & 1 ? ot : MO_8;
422 }
423 
424 /* Select size 8 if lsb of B is clear, else OT capped at 32.
425    Used for decoding operand size of port opcodes.  */
426 static inline MemOp mo_b_d32(int b, MemOp ot)
427 {
428     return b & 1 ? (ot == MO_16 ? MO_16 : MO_32) : MO_8;
429 }
430 
431 static void gen_op_mov_reg_v(DisasContext *s, MemOp ot, int reg, TCGv t0)
432 {
433     switch(ot) {
434     case MO_8:
435         if (!byte_reg_is_xH(s, reg)) {
436             tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 8);
437         } else {
438             tcg_gen_deposit_tl(cpu_regs[reg - 4], cpu_regs[reg - 4], t0, 8, 8);
439         }
440         break;
441     case MO_16:
442         tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 16);
443         break;
444     case MO_32:
445         /* For x86_64, this sets the higher half of register to zero.
446            For i386, this is equivalent to a mov. */
447         tcg_gen_ext32u_tl(cpu_regs[reg], t0);
448         break;
449 #ifdef TARGET_X86_64
450     case MO_64:
451         tcg_gen_mov_tl(cpu_regs[reg], t0);
452         break;
453 #endif
454     default:
455         tcg_abort();
456     }
457 }
458 
459 static inline
460 void gen_op_mov_v_reg(DisasContext *s, MemOp ot, TCGv t0, int reg)
461 {
462     if (ot == MO_8 && byte_reg_is_xH(s, reg)) {
463         tcg_gen_extract_tl(t0, cpu_regs[reg - 4], 8, 8);
464     } else {
465         tcg_gen_mov_tl(t0, cpu_regs[reg]);
466     }
467 }
468 
469 static void gen_add_A0_im(DisasContext *s, int val)
470 {
471     tcg_gen_addi_tl(s->A0, s->A0, val);
472     if (!CODE64(s)) {
473         tcg_gen_ext32u_tl(s->A0, s->A0);
474     }
475 }
476 
477 static inline void gen_op_jmp_v(TCGv dest)
478 {
479     tcg_gen_st_tl(dest, cpu_env, offsetof(CPUX86State, eip));
480 }
481 
482 static inline
483 void gen_op_add_reg_im(DisasContext *s, MemOp size, int reg, int32_t val)
484 {
485     tcg_gen_addi_tl(s->tmp0, cpu_regs[reg], val);
486     gen_op_mov_reg_v(s, size, reg, s->tmp0);
487 }
488 
489 static inline void gen_op_add_reg_T0(DisasContext *s, MemOp size, int reg)
490 {
491     tcg_gen_add_tl(s->tmp0, cpu_regs[reg], s->T0);
492     gen_op_mov_reg_v(s, size, reg, s->tmp0);
493 }
494 
495 static inline void gen_op_ld_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
496 {
497     tcg_gen_qemu_ld_tl(t0, a0, s->mem_index, idx | MO_LE);
498 }
499 
500 static inline void gen_op_st_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
501 {
502     tcg_gen_qemu_st_tl(t0, a0, s->mem_index, idx | MO_LE);
503 }
504 
505 static inline void gen_op_st_rm_T0_A0(DisasContext *s, int idx, int d)
506 {
507     if (d == OR_TMP0) {
508         gen_op_st_v(s, idx, s->T0, s->A0);
509     } else {
510         gen_op_mov_reg_v(s, idx, d, s->T0);
511     }
512 }
513 
514 static inline void gen_jmp_im(DisasContext *s, target_ulong pc)
515 {
516     tcg_gen_movi_tl(s->tmp0, pc);
517     gen_op_jmp_v(s->tmp0);
518 }
519 
520 /* Compute SEG:REG into A0.  SEG is selected from the override segment
521    (OVR_SEG) and the default segment (DEF_SEG).  OVR_SEG may be -1 to
522    indicate no override.  */
523 static void gen_lea_v_seg(DisasContext *s, MemOp aflag, TCGv a0,
524                           int def_seg, int ovr_seg)
525 {
526     switch (aflag) {
527 #ifdef TARGET_X86_64
528     case MO_64:
529         if (ovr_seg < 0) {
530             tcg_gen_mov_tl(s->A0, a0);
531             return;
532         }
533         break;
534 #endif
535     case MO_32:
536         /* 32 bit address */
537         if (ovr_seg < 0 && ADDSEG(s)) {
538             ovr_seg = def_seg;
539         }
540         if (ovr_seg < 0) {
541             tcg_gen_ext32u_tl(s->A0, a0);
542             return;
543         }
544         break;
545     case MO_16:
546         /* 16 bit address */
547         tcg_gen_ext16u_tl(s->A0, a0);
548         a0 = s->A0;
549         if (ovr_seg < 0) {
550             if (ADDSEG(s)) {
551                 ovr_seg = def_seg;
552             } else {
553                 return;
554             }
555         }
556         break;
557     default:
558         tcg_abort();
559     }
560 
561     if (ovr_seg >= 0) {
562         TCGv seg = cpu_seg_base[ovr_seg];
563 
564         if (aflag == MO_64) {
565             tcg_gen_add_tl(s->A0, a0, seg);
566         } else if (CODE64(s)) {
567             tcg_gen_ext32u_tl(s->A0, a0);
568             tcg_gen_add_tl(s->A0, s->A0, seg);
569         } else {
570             tcg_gen_add_tl(s->A0, a0, seg);
571             tcg_gen_ext32u_tl(s->A0, s->A0);
572         }
573     }
574 }
575 
576 static inline void gen_string_movl_A0_ESI(DisasContext *s)
577 {
578     gen_lea_v_seg(s, s->aflag, cpu_regs[R_ESI], R_DS, s->override);
579 }
580 
581 static inline void gen_string_movl_A0_EDI(DisasContext *s)
582 {
583     gen_lea_v_seg(s, s->aflag, cpu_regs[R_EDI], R_ES, -1);
584 }
585 
586 static inline void gen_op_movl_T0_Dshift(DisasContext *s, MemOp ot)
587 {
588     tcg_gen_ld32s_tl(s->T0, cpu_env, offsetof(CPUX86State, df));
589     tcg_gen_shli_tl(s->T0, s->T0, ot);
590 };
591 
592 static TCGv gen_ext_tl(TCGv dst, TCGv src, MemOp size, bool sign)
593 {
594     switch (size) {
595     case MO_8:
596         if (sign) {
597             tcg_gen_ext8s_tl(dst, src);
598         } else {
599             tcg_gen_ext8u_tl(dst, src);
600         }
601         return dst;
602     case MO_16:
603         if (sign) {
604             tcg_gen_ext16s_tl(dst, src);
605         } else {
606             tcg_gen_ext16u_tl(dst, src);
607         }
608         return dst;
609 #ifdef TARGET_X86_64
610     case MO_32:
611         if (sign) {
612             tcg_gen_ext32s_tl(dst, src);
613         } else {
614             tcg_gen_ext32u_tl(dst, src);
615         }
616         return dst;
617 #endif
618     default:
619         return src;
620     }
621 }
622 
623 static void gen_extu(MemOp ot, TCGv reg)
624 {
625     gen_ext_tl(reg, reg, ot, false);
626 }
627 
628 static void gen_exts(MemOp ot, TCGv reg)
629 {
630     gen_ext_tl(reg, reg, ot, true);
631 }
632 
633 static inline
634 void gen_op_jnz_ecx(DisasContext *s, MemOp size, TCGLabel *label1)
635 {
636     tcg_gen_mov_tl(s->tmp0, cpu_regs[R_ECX]);
637     gen_extu(size, s->tmp0);
638     tcg_gen_brcondi_tl(TCG_COND_NE, s->tmp0, 0, label1);
639 }
640 
641 static inline
642 void gen_op_jz_ecx(DisasContext *s, MemOp size, TCGLabel *label1)
643 {
644     tcg_gen_mov_tl(s->tmp0, cpu_regs[R_ECX]);
645     gen_extu(size, s->tmp0);
646     tcg_gen_brcondi_tl(TCG_COND_EQ, s->tmp0, 0, label1);
647 }
648 
649 static void gen_helper_in_func(MemOp ot, TCGv v, TCGv_i32 n)
650 {
651     switch (ot) {
652     case MO_8:
653         gen_helper_inb(v, cpu_env, n);
654         break;
655     case MO_16:
656         gen_helper_inw(v, cpu_env, n);
657         break;
658     case MO_32:
659         gen_helper_inl(v, cpu_env, n);
660         break;
661     default:
662         tcg_abort();
663     }
664 }
665 
666 static void gen_helper_out_func(MemOp ot, TCGv_i32 v, TCGv_i32 n)
667 {
668     switch (ot) {
669     case MO_8:
670         gen_helper_outb(cpu_env, v, n);
671         break;
672     case MO_16:
673         gen_helper_outw(cpu_env, v, n);
674         break;
675     case MO_32:
676         gen_helper_outl(cpu_env, v, n);
677         break;
678     default:
679         tcg_abort();
680     }
681 }
682 
683 /*
684  * Validate that access to [port, port + 1<<ot) is allowed.
685  * Raise #GP, or VMM exit if not.
686  */
687 static bool gen_check_io(DisasContext *s, MemOp ot, TCGv_i32 port,
688                          uint32_t svm_flags)
689 {
690 #ifdef CONFIG_USER_ONLY
691     /*
692      * We do not implement the ioperm(2) syscall, so the TSS check
693      * will always fail.
694      */
695     gen_exception_gpf(s);
696     return false;
697 #else
698     if (PE(s) && (CPL(s) > IOPL(s) || VM86(s))) {
699         gen_helper_check_io(cpu_env, port, tcg_constant_i32(1 << ot));
700     }
701     if (GUEST(s)) {
702         target_ulong cur_eip = s->base.pc_next - s->cs_base;
703         target_ulong next_eip = s->pc - s->cs_base;
704 
705         gen_update_cc_op(s);
706         gen_jmp_im(s, cur_eip);
707         if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
708             svm_flags |= SVM_IOIO_REP_MASK;
709         }
710         svm_flags |= 1 << (SVM_IOIO_SIZE_SHIFT + ot);
711         gen_helper_svm_check_io(cpu_env, port,
712                                 tcg_constant_i32(svm_flags),
713                                 tcg_constant_i32(next_eip - cur_eip));
714     }
715     return true;
716 #endif
717 }
718 
719 static inline void gen_movs(DisasContext *s, MemOp ot)
720 {
721     gen_string_movl_A0_ESI(s);
722     gen_op_ld_v(s, ot, s->T0, s->A0);
723     gen_string_movl_A0_EDI(s);
724     gen_op_st_v(s, ot, s->T0, s->A0);
725     gen_op_movl_T0_Dshift(s, ot);
726     gen_op_add_reg_T0(s, s->aflag, R_ESI);
727     gen_op_add_reg_T0(s, s->aflag, R_EDI);
728 }
729 
730 static void gen_op_update1_cc(DisasContext *s)
731 {
732     tcg_gen_mov_tl(cpu_cc_dst, s->T0);
733 }
734 
735 static void gen_op_update2_cc(DisasContext *s)
736 {
737     tcg_gen_mov_tl(cpu_cc_src, s->T1);
738     tcg_gen_mov_tl(cpu_cc_dst, s->T0);
739 }
740 
741 static void gen_op_update3_cc(DisasContext *s, TCGv reg)
742 {
743     tcg_gen_mov_tl(cpu_cc_src2, reg);
744     tcg_gen_mov_tl(cpu_cc_src, s->T1);
745     tcg_gen_mov_tl(cpu_cc_dst, s->T0);
746 }
747 
748 static inline void gen_op_testl_T0_T1_cc(DisasContext *s)
749 {
750     tcg_gen_and_tl(cpu_cc_dst, s->T0, s->T1);
751 }
752 
753 static void gen_op_update_neg_cc(DisasContext *s)
754 {
755     tcg_gen_mov_tl(cpu_cc_dst, s->T0);
756     tcg_gen_neg_tl(cpu_cc_src, s->T0);
757     tcg_gen_movi_tl(s->cc_srcT, 0);
758 }
759 
760 /* compute all eflags to cc_src */
761 static void gen_compute_eflags(DisasContext *s)
762 {
763     TCGv zero, dst, src1, src2;
764     int live, dead;
765 
766     if (s->cc_op == CC_OP_EFLAGS) {
767         return;
768     }
769     if (s->cc_op == CC_OP_CLR) {
770         tcg_gen_movi_tl(cpu_cc_src, CC_Z | CC_P);
771         set_cc_op(s, CC_OP_EFLAGS);
772         return;
773     }
774 
775     zero = NULL;
776     dst = cpu_cc_dst;
777     src1 = cpu_cc_src;
778     src2 = cpu_cc_src2;
779 
780     /* Take care to not read values that are not live.  */
781     live = cc_op_live[s->cc_op] & ~USES_CC_SRCT;
782     dead = live ^ (USES_CC_DST | USES_CC_SRC | USES_CC_SRC2);
783     if (dead) {
784         zero = tcg_const_tl(0);
785         if (dead & USES_CC_DST) {
786             dst = zero;
787         }
788         if (dead & USES_CC_SRC) {
789             src1 = zero;
790         }
791         if (dead & USES_CC_SRC2) {
792             src2 = zero;
793         }
794     }
795 
796     gen_update_cc_op(s);
797     gen_helper_cc_compute_all(cpu_cc_src, dst, src1, src2, cpu_cc_op);
798     set_cc_op(s, CC_OP_EFLAGS);
799 
800     if (dead) {
801         tcg_temp_free(zero);
802     }
803 }
804 
805 typedef struct CCPrepare {
806     TCGCond cond;
807     TCGv reg;
808     TCGv reg2;
809     target_ulong imm;
810     target_ulong mask;
811     bool use_reg2;
812     bool no_setcond;
813 } CCPrepare;
814 
815 /* compute eflags.C to reg */
816 static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
817 {
818     TCGv t0, t1;
819     int size, shift;
820 
821     switch (s->cc_op) {
822     case CC_OP_SUBB ... CC_OP_SUBQ:
823         /* (DATA_TYPE)CC_SRCT < (DATA_TYPE)CC_SRC */
824         size = s->cc_op - CC_OP_SUBB;
825         t1 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
826         /* If no temporary was used, be careful not to alias t1 and t0.  */
827         t0 = t1 == cpu_cc_src ? s->tmp0 : reg;
828         tcg_gen_mov_tl(t0, s->cc_srcT);
829         gen_extu(size, t0);
830         goto add_sub;
831 
832     case CC_OP_ADDB ... CC_OP_ADDQ:
833         /* (DATA_TYPE)CC_DST < (DATA_TYPE)CC_SRC */
834         size = s->cc_op - CC_OP_ADDB;
835         t1 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
836         t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
837     add_sub:
838         return (CCPrepare) { .cond = TCG_COND_LTU, .reg = t0,
839                              .reg2 = t1, .mask = -1, .use_reg2 = true };
840 
841     case CC_OP_LOGICB ... CC_OP_LOGICQ:
842     case CC_OP_CLR:
843     case CC_OP_POPCNT:
844         return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
845 
846     case CC_OP_INCB ... CC_OP_INCQ:
847     case CC_OP_DECB ... CC_OP_DECQ:
848         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
849                              .mask = -1, .no_setcond = true };
850 
851     case CC_OP_SHLB ... CC_OP_SHLQ:
852         /* (CC_SRC >> (DATA_BITS - 1)) & 1 */
853         size = s->cc_op - CC_OP_SHLB;
854         shift = (8 << size) - 1;
855         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
856                              .mask = (target_ulong)1 << shift };
857 
858     case CC_OP_MULB ... CC_OP_MULQ:
859         return (CCPrepare) { .cond = TCG_COND_NE,
860                              .reg = cpu_cc_src, .mask = -1 };
861 
862     case CC_OP_BMILGB ... CC_OP_BMILGQ:
863         size = s->cc_op - CC_OP_BMILGB;
864         t0 = gen_ext_tl(reg, cpu_cc_src, size, false);
865         return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
866 
867     case CC_OP_ADCX:
868     case CC_OP_ADCOX:
869         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_dst,
870                              .mask = -1, .no_setcond = true };
871 
872     case CC_OP_EFLAGS:
873     case CC_OP_SARB ... CC_OP_SARQ:
874         /* CC_SRC & 1 */
875         return (CCPrepare) { .cond = TCG_COND_NE,
876                              .reg = cpu_cc_src, .mask = CC_C };
877 
878     default:
879        /* The need to compute only C from CC_OP_DYNAMIC is important
880           in efficiently implementing e.g. INC at the start of a TB.  */
881        gen_update_cc_op(s);
882        gen_helper_cc_compute_c(reg, cpu_cc_dst, cpu_cc_src,
883                                cpu_cc_src2, cpu_cc_op);
884        return (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
885                             .mask = -1, .no_setcond = true };
886     }
887 }
888 
889 /* compute eflags.P to reg */
890 static CCPrepare gen_prepare_eflags_p(DisasContext *s, TCGv reg)
891 {
892     gen_compute_eflags(s);
893     return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
894                          .mask = CC_P };
895 }
896 
897 /* compute eflags.S to reg */
898 static CCPrepare gen_prepare_eflags_s(DisasContext *s, TCGv reg)
899 {
900     switch (s->cc_op) {
901     case CC_OP_DYNAMIC:
902         gen_compute_eflags(s);
903         /* FALLTHRU */
904     case CC_OP_EFLAGS:
905     case CC_OP_ADCX:
906     case CC_OP_ADOX:
907     case CC_OP_ADCOX:
908         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
909                              .mask = CC_S };
910     case CC_OP_CLR:
911     case CC_OP_POPCNT:
912         return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
913     default:
914         {
915             MemOp size = (s->cc_op - CC_OP_ADDB) & 3;
916             TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, true);
917             return (CCPrepare) { .cond = TCG_COND_LT, .reg = t0, .mask = -1 };
918         }
919     }
920 }
921 
922 /* compute eflags.O to reg */
923 static CCPrepare gen_prepare_eflags_o(DisasContext *s, TCGv reg)
924 {
925     switch (s->cc_op) {
926     case CC_OP_ADOX:
927     case CC_OP_ADCOX:
928         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src2,
929                              .mask = -1, .no_setcond = true };
930     case CC_OP_CLR:
931     case CC_OP_POPCNT:
932         return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
933     default:
934         gen_compute_eflags(s);
935         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
936                              .mask = CC_O };
937     }
938 }
939 
940 /* compute eflags.Z to reg */
941 static CCPrepare gen_prepare_eflags_z(DisasContext *s, TCGv reg)
942 {
943     switch (s->cc_op) {
944     case CC_OP_DYNAMIC:
945         gen_compute_eflags(s);
946         /* FALLTHRU */
947     case CC_OP_EFLAGS:
948     case CC_OP_ADCX:
949     case CC_OP_ADOX:
950     case CC_OP_ADCOX:
951         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
952                              .mask = CC_Z };
953     case CC_OP_CLR:
954         return (CCPrepare) { .cond = TCG_COND_ALWAYS, .mask = -1 };
955     case CC_OP_POPCNT:
956         return (CCPrepare) { .cond = TCG_COND_EQ, .reg = cpu_cc_src,
957                              .mask = -1 };
958     default:
959         {
960             MemOp size = (s->cc_op - CC_OP_ADDB) & 3;
961             TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
962             return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
963         }
964     }
965 }
966 
967 /* perform a conditional store into register 'reg' according to jump opcode
968    value 'b'. In the fast case, T0 is guaranted not to be used. */
969 static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
970 {
971     int inv, jcc_op, cond;
972     MemOp size;
973     CCPrepare cc;
974     TCGv t0;
975 
976     inv = b & 1;
977     jcc_op = (b >> 1) & 7;
978 
979     switch (s->cc_op) {
980     case CC_OP_SUBB ... CC_OP_SUBQ:
981         /* We optimize relational operators for the cmp/jcc case.  */
982         size = s->cc_op - CC_OP_SUBB;
983         switch (jcc_op) {
984         case JCC_BE:
985             tcg_gen_mov_tl(s->tmp4, s->cc_srcT);
986             gen_extu(size, s->tmp4);
987             t0 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
988             cc = (CCPrepare) { .cond = TCG_COND_LEU, .reg = s->tmp4,
989                                .reg2 = t0, .mask = -1, .use_reg2 = true };
990             break;
991 
992         case JCC_L:
993             cond = TCG_COND_LT;
994             goto fast_jcc_l;
995         case JCC_LE:
996             cond = TCG_COND_LE;
997         fast_jcc_l:
998             tcg_gen_mov_tl(s->tmp4, s->cc_srcT);
999             gen_exts(size, s->tmp4);
1000             t0 = gen_ext_tl(s->tmp0, cpu_cc_src, size, true);
1001             cc = (CCPrepare) { .cond = cond, .reg = s->tmp4,
1002                                .reg2 = t0, .mask = -1, .use_reg2 = true };
1003             break;
1004 
1005         default:
1006             goto slow_jcc;
1007         }
1008         break;
1009 
1010     default:
1011     slow_jcc:
1012         /* This actually generates good code for JC, JZ and JS.  */
1013         switch (jcc_op) {
1014         case JCC_O:
1015             cc = gen_prepare_eflags_o(s, reg);
1016             break;
1017         case JCC_B:
1018             cc = gen_prepare_eflags_c(s, reg);
1019             break;
1020         case JCC_Z:
1021             cc = gen_prepare_eflags_z(s, reg);
1022             break;
1023         case JCC_BE:
1024             gen_compute_eflags(s);
1025             cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
1026                                .mask = CC_Z | CC_C };
1027             break;
1028         case JCC_S:
1029             cc = gen_prepare_eflags_s(s, reg);
1030             break;
1031         case JCC_P:
1032             cc = gen_prepare_eflags_p(s, reg);
1033             break;
1034         case JCC_L:
1035             gen_compute_eflags(s);
1036             if (reg == cpu_cc_src) {
1037                 reg = s->tmp0;
1038             }
1039             tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
1040             tcg_gen_xor_tl(reg, reg, cpu_cc_src);
1041             cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
1042                                .mask = CC_S };
1043             break;
1044         default:
1045         case JCC_LE:
1046             gen_compute_eflags(s);
1047             if (reg == cpu_cc_src) {
1048                 reg = s->tmp0;
1049             }
1050             tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
1051             tcg_gen_xor_tl(reg, reg, cpu_cc_src);
1052             cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
1053                                .mask = CC_S | CC_Z };
1054             break;
1055         }
1056         break;
1057     }
1058 
1059     if (inv) {
1060         cc.cond = tcg_invert_cond(cc.cond);
1061     }
1062     return cc;
1063 }
1064 
1065 static void gen_setcc1(DisasContext *s, int b, TCGv reg)
1066 {
1067     CCPrepare cc = gen_prepare_cc(s, b, reg);
1068 
1069     if (cc.no_setcond) {
1070         if (cc.cond == TCG_COND_EQ) {
1071             tcg_gen_xori_tl(reg, cc.reg, 1);
1072         } else {
1073             tcg_gen_mov_tl(reg, cc.reg);
1074         }
1075         return;
1076     }
1077 
1078     if (cc.cond == TCG_COND_NE && !cc.use_reg2 && cc.imm == 0 &&
1079         cc.mask != 0 && (cc.mask & (cc.mask - 1)) == 0) {
1080         tcg_gen_shri_tl(reg, cc.reg, ctztl(cc.mask));
1081         tcg_gen_andi_tl(reg, reg, 1);
1082         return;
1083     }
1084     if (cc.mask != -1) {
1085         tcg_gen_andi_tl(reg, cc.reg, cc.mask);
1086         cc.reg = reg;
1087     }
1088     if (cc.use_reg2) {
1089         tcg_gen_setcond_tl(cc.cond, reg, cc.reg, cc.reg2);
1090     } else {
1091         tcg_gen_setcondi_tl(cc.cond, reg, cc.reg, cc.imm);
1092     }
1093 }
1094 
1095 static inline void gen_compute_eflags_c(DisasContext *s, TCGv reg)
1096 {
1097     gen_setcc1(s, JCC_B << 1, reg);
1098 }
1099 
1100 /* generate a conditional jump to label 'l1' according to jump opcode
1101    value 'b'. In the fast case, T0 is guaranted not to be used. */
1102 static inline void gen_jcc1_noeob(DisasContext *s, int b, TCGLabel *l1)
1103 {
1104     CCPrepare cc = gen_prepare_cc(s, b, s->T0);
1105 
1106     if (cc.mask != -1) {
1107         tcg_gen_andi_tl(s->T0, cc.reg, cc.mask);
1108         cc.reg = s->T0;
1109     }
1110     if (cc.use_reg2) {
1111         tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
1112     } else {
1113         tcg_gen_brcondi_tl(cc.cond, cc.reg, cc.imm, l1);
1114     }
1115 }
1116 
1117 /* Generate a conditional jump to label 'l1' according to jump opcode
1118    value 'b'. In the fast case, T0 is guaranted not to be used.
1119    A translation block must end soon.  */
1120 static inline void gen_jcc1(DisasContext *s, int b, TCGLabel *l1)
1121 {
1122     CCPrepare cc = gen_prepare_cc(s, b, s->T0);
1123 
1124     gen_update_cc_op(s);
1125     if (cc.mask != -1) {
1126         tcg_gen_andi_tl(s->T0, cc.reg, cc.mask);
1127         cc.reg = s->T0;
1128     }
1129     set_cc_op(s, CC_OP_DYNAMIC);
1130     if (cc.use_reg2) {
1131         tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
1132     } else {
1133         tcg_gen_brcondi_tl(cc.cond, cc.reg, cc.imm, l1);
1134     }
1135 }
1136 
1137 /* XXX: does not work with gdbstub "ice" single step - not a
1138    serious problem */
1139 static TCGLabel *gen_jz_ecx_string(DisasContext *s, target_ulong next_eip)
1140 {
1141     TCGLabel *l1 = gen_new_label();
1142     TCGLabel *l2 = gen_new_label();
1143     gen_op_jnz_ecx(s, s->aflag, l1);
1144     gen_set_label(l2);
1145     gen_jmp_tb(s, next_eip, 1);
1146     gen_set_label(l1);
1147     return l2;
1148 }
1149 
1150 static inline void gen_stos(DisasContext *s, MemOp ot)
1151 {
1152     gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
1153     gen_string_movl_A0_EDI(s);
1154     gen_op_st_v(s, ot, s->T0, s->A0);
1155     gen_op_movl_T0_Dshift(s, ot);
1156     gen_op_add_reg_T0(s, s->aflag, R_EDI);
1157 }
1158 
1159 static inline void gen_lods(DisasContext *s, MemOp ot)
1160 {
1161     gen_string_movl_A0_ESI(s);
1162     gen_op_ld_v(s, ot, s->T0, s->A0);
1163     gen_op_mov_reg_v(s, ot, R_EAX, s->T0);
1164     gen_op_movl_T0_Dshift(s, ot);
1165     gen_op_add_reg_T0(s, s->aflag, R_ESI);
1166 }
1167 
1168 static inline void gen_scas(DisasContext *s, MemOp ot)
1169 {
1170     gen_string_movl_A0_EDI(s);
1171     gen_op_ld_v(s, ot, s->T1, s->A0);
1172     gen_op(s, OP_CMPL, ot, R_EAX);
1173     gen_op_movl_T0_Dshift(s, ot);
1174     gen_op_add_reg_T0(s, s->aflag, R_EDI);
1175 }
1176 
1177 static inline void gen_cmps(DisasContext *s, MemOp ot)
1178 {
1179     gen_string_movl_A0_EDI(s);
1180     gen_op_ld_v(s, ot, s->T1, s->A0);
1181     gen_string_movl_A0_ESI(s);
1182     gen_op(s, OP_CMPL, ot, OR_TMP0);
1183     gen_op_movl_T0_Dshift(s, ot);
1184     gen_op_add_reg_T0(s, s->aflag, R_ESI);
1185     gen_op_add_reg_T0(s, s->aflag, R_EDI);
1186 }
1187 
1188 static void gen_bpt_io(DisasContext *s, TCGv_i32 t_port, int ot)
1189 {
1190     if (s->flags & HF_IOBPT_MASK) {
1191 #ifdef CONFIG_USER_ONLY
1192         /* user-mode cpu should not be in IOBPT mode */
1193         g_assert_not_reached();
1194 #else
1195         TCGv_i32 t_size = tcg_const_i32(1 << ot);
1196         TCGv t_next = tcg_const_tl(s->pc - s->cs_base);
1197 
1198         gen_helper_bpt_io(cpu_env, t_port, t_size, t_next);
1199         tcg_temp_free_i32(t_size);
1200         tcg_temp_free(t_next);
1201 #endif /* CONFIG_USER_ONLY */
1202     }
1203 }
1204 
1205 static inline void gen_ins(DisasContext *s, MemOp ot)
1206 {
1207     gen_string_movl_A0_EDI(s);
1208     /* Note: we must do this dummy write first to be restartable in
1209        case of page fault. */
1210     tcg_gen_movi_tl(s->T0, 0);
1211     gen_op_st_v(s, ot, s->T0, s->A0);
1212     tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
1213     tcg_gen_andi_i32(s->tmp2_i32, s->tmp2_i32, 0xffff);
1214     gen_helper_in_func(ot, s->T0, s->tmp2_i32);
1215     gen_op_st_v(s, ot, s->T0, s->A0);
1216     gen_op_movl_T0_Dshift(s, ot);
1217     gen_op_add_reg_T0(s, s->aflag, R_EDI);
1218     gen_bpt_io(s, s->tmp2_i32, ot);
1219 }
1220 
1221 static inline void gen_outs(DisasContext *s, MemOp ot)
1222 {
1223     gen_string_movl_A0_ESI(s);
1224     gen_op_ld_v(s, ot, s->T0, s->A0);
1225 
1226     tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
1227     tcg_gen_andi_i32(s->tmp2_i32, s->tmp2_i32, 0xffff);
1228     tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T0);
1229     gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32);
1230     gen_op_movl_T0_Dshift(s, ot);
1231     gen_op_add_reg_T0(s, s->aflag, R_ESI);
1232     gen_bpt_io(s, s->tmp2_i32, ot);
1233 }
1234 
1235 /* same method as Valgrind : we generate jumps to current or next
1236    instruction */
1237 #define GEN_REPZ(op)                                                          \
1238 static inline void gen_repz_ ## op(DisasContext *s, MemOp ot,              \
1239                                  target_ulong cur_eip, target_ulong next_eip) \
1240 {                                                                             \
1241     TCGLabel *l2;                                                             \
1242     gen_update_cc_op(s);                                                      \
1243     l2 = gen_jz_ecx_string(s, next_eip);                                      \
1244     gen_ ## op(s, ot);                                                        \
1245     gen_op_add_reg_im(s, s->aflag, R_ECX, -1);                                \
1246     /* a loop would cause two single step exceptions if ECX = 1               \
1247        before rep string_insn */                                              \
1248     if (s->repz_opt)                                                          \
1249         gen_op_jz_ecx(s, s->aflag, l2);                                       \
1250     gen_jmp(s, cur_eip);                                                      \
1251 }
1252 
1253 #define GEN_REPZ2(op)                                                         \
1254 static inline void gen_repz_ ## op(DisasContext *s, MemOp ot,              \
1255                                    target_ulong cur_eip,                      \
1256                                    target_ulong next_eip,                     \
1257                                    int nz)                                    \
1258 {                                                                             \
1259     TCGLabel *l2;                                                             \
1260     gen_update_cc_op(s);                                                      \
1261     l2 = gen_jz_ecx_string(s, next_eip);                                      \
1262     gen_ ## op(s, ot);                                                        \
1263     gen_op_add_reg_im(s, s->aflag, R_ECX, -1);                                \
1264     gen_update_cc_op(s);                                                      \
1265     gen_jcc1(s, (JCC_Z << 1) | (nz ^ 1), l2);                                 \
1266     if (s->repz_opt)                                                          \
1267         gen_op_jz_ecx(s, s->aflag, l2);                                       \
1268     gen_jmp(s, cur_eip);                                                      \
1269 }
1270 
1271 GEN_REPZ(movs)
1272 GEN_REPZ(stos)
1273 GEN_REPZ(lods)
1274 GEN_REPZ(ins)
1275 GEN_REPZ(outs)
1276 GEN_REPZ2(scas)
1277 GEN_REPZ2(cmps)
1278 
1279 static void gen_helper_fp_arith_ST0_FT0(int op)
1280 {
1281     switch (op) {
1282     case 0:
1283         gen_helper_fadd_ST0_FT0(cpu_env);
1284         break;
1285     case 1:
1286         gen_helper_fmul_ST0_FT0(cpu_env);
1287         break;
1288     case 2:
1289         gen_helper_fcom_ST0_FT0(cpu_env);
1290         break;
1291     case 3:
1292         gen_helper_fcom_ST0_FT0(cpu_env);
1293         break;
1294     case 4:
1295         gen_helper_fsub_ST0_FT0(cpu_env);
1296         break;
1297     case 5:
1298         gen_helper_fsubr_ST0_FT0(cpu_env);
1299         break;
1300     case 6:
1301         gen_helper_fdiv_ST0_FT0(cpu_env);
1302         break;
1303     case 7:
1304         gen_helper_fdivr_ST0_FT0(cpu_env);
1305         break;
1306     }
1307 }
1308 
1309 /* NOTE the exception in "r" op ordering */
1310 static void gen_helper_fp_arith_STN_ST0(int op, int opreg)
1311 {
1312     TCGv_i32 tmp = tcg_const_i32(opreg);
1313     switch (op) {
1314     case 0:
1315         gen_helper_fadd_STN_ST0(cpu_env, tmp);
1316         break;
1317     case 1:
1318         gen_helper_fmul_STN_ST0(cpu_env, tmp);
1319         break;
1320     case 4:
1321         gen_helper_fsubr_STN_ST0(cpu_env, tmp);
1322         break;
1323     case 5:
1324         gen_helper_fsub_STN_ST0(cpu_env, tmp);
1325         break;
1326     case 6:
1327         gen_helper_fdivr_STN_ST0(cpu_env, tmp);
1328         break;
1329     case 7:
1330         gen_helper_fdiv_STN_ST0(cpu_env, tmp);
1331         break;
1332     }
1333 }
1334 
1335 static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip)
1336 {
1337     gen_update_cc_op(s);
1338     gen_jmp_im(s, cur_eip);
1339     gen_helper_raise_exception(cpu_env, tcg_const_i32(trapno));
1340     s->base.is_jmp = DISAS_NORETURN;
1341 }
1342 
1343 /* Generate #UD for the current instruction.  The assumption here is that
1344    the instruction is known, but it isn't allowed in the current cpu mode.  */
1345 static void gen_illegal_opcode(DisasContext *s)
1346 {
1347     gen_exception(s, EXCP06_ILLOP, s->pc_start - s->cs_base);
1348 }
1349 
1350 /* Generate #GP for the current instruction. */
1351 static void gen_exception_gpf(DisasContext *s)
1352 {
1353     gen_exception(s, EXCP0D_GPF, s->pc_start - s->cs_base);
1354 }
1355 
1356 /* Check for cpl == 0; if not, raise #GP and return false. */
1357 static bool check_cpl0(DisasContext *s)
1358 {
1359     if (CPL(s) == 0) {
1360         return true;
1361     }
1362     gen_exception_gpf(s);
1363     return false;
1364 }
1365 
1366 /* If vm86, check for iopl == 3; if not, raise #GP and return false. */
1367 static bool check_vm86_iopl(DisasContext *s)
1368 {
1369     if (!VM86(s) || IOPL(s) == 3) {
1370         return true;
1371     }
1372     gen_exception_gpf(s);
1373     return false;
1374 }
1375 
1376 /* Check for iopl allowing access; if not, raise #GP and return false. */
1377 static bool check_iopl(DisasContext *s)
1378 {
1379     if (VM86(s) ? IOPL(s) == 3 : CPL(s) <= IOPL(s)) {
1380         return true;
1381     }
1382     gen_exception_gpf(s);
1383     return false;
1384 }
1385 
1386 /* if d == OR_TMP0, it means memory operand (address in A0) */
1387 static void gen_op(DisasContext *s1, int op, MemOp ot, int d)
1388 {
1389     if (d != OR_TMP0) {
1390         if (s1->prefix & PREFIX_LOCK) {
1391             /* Lock prefix when destination is not memory.  */
1392             gen_illegal_opcode(s1);
1393             return;
1394         }
1395         gen_op_mov_v_reg(s1, ot, s1->T0, d);
1396     } else if (!(s1->prefix & PREFIX_LOCK)) {
1397         gen_op_ld_v(s1, ot, s1->T0, s1->A0);
1398     }
1399     switch(op) {
1400     case OP_ADCL:
1401         gen_compute_eflags_c(s1, s1->tmp4);
1402         if (s1->prefix & PREFIX_LOCK) {
1403             tcg_gen_add_tl(s1->T0, s1->tmp4, s1->T1);
1404             tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
1405                                         s1->mem_index, ot | MO_LE);
1406         } else {
1407             tcg_gen_add_tl(s1->T0, s1->T0, s1->T1);
1408             tcg_gen_add_tl(s1->T0, s1->T0, s1->tmp4);
1409             gen_op_st_rm_T0_A0(s1, ot, d);
1410         }
1411         gen_op_update3_cc(s1, s1->tmp4);
1412         set_cc_op(s1, CC_OP_ADCB + ot);
1413         break;
1414     case OP_SBBL:
1415         gen_compute_eflags_c(s1, s1->tmp4);
1416         if (s1->prefix & PREFIX_LOCK) {
1417             tcg_gen_add_tl(s1->T0, s1->T1, s1->tmp4);
1418             tcg_gen_neg_tl(s1->T0, s1->T0);
1419             tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
1420                                         s1->mem_index, ot | MO_LE);
1421         } else {
1422             tcg_gen_sub_tl(s1->T0, s1->T0, s1->T1);
1423             tcg_gen_sub_tl(s1->T0, s1->T0, s1->tmp4);
1424             gen_op_st_rm_T0_A0(s1, ot, d);
1425         }
1426         gen_op_update3_cc(s1, s1->tmp4);
1427         set_cc_op(s1, CC_OP_SBBB + ot);
1428         break;
1429     case OP_ADDL:
1430         if (s1->prefix & PREFIX_LOCK) {
1431             tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T1,
1432                                         s1->mem_index, ot | MO_LE);
1433         } else {
1434             tcg_gen_add_tl(s1->T0, s1->T0, s1->T1);
1435             gen_op_st_rm_T0_A0(s1, ot, d);
1436         }
1437         gen_op_update2_cc(s1);
1438         set_cc_op(s1, CC_OP_ADDB + ot);
1439         break;
1440     case OP_SUBL:
1441         if (s1->prefix & PREFIX_LOCK) {
1442             tcg_gen_neg_tl(s1->T0, s1->T1);
1443             tcg_gen_atomic_fetch_add_tl(s1->cc_srcT, s1->A0, s1->T0,
1444                                         s1->mem_index, ot | MO_LE);
1445             tcg_gen_sub_tl(s1->T0, s1->cc_srcT, s1->T1);
1446         } else {
1447             tcg_gen_mov_tl(s1->cc_srcT, s1->T0);
1448             tcg_gen_sub_tl(s1->T0, s1->T0, s1->T1);
1449             gen_op_st_rm_T0_A0(s1, ot, d);
1450         }
1451         gen_op_update2_cc(s1);
1452         set_cc_op(s1, CC_OP_SUBB + ot);
1453         break;
1454     default:
1455     case OP_ANDL:
1456         if (s1->prefix & PREFIX_LOCK) {
1457             tcg_gen_atomic_and_fetch_tl(s1->T0, s1->A0, s1->T1,
1458                                         s1->mem_index, ot | MO_LE);
1459         } else {
1460             tcg_gen_and_tl(s1->T0, s1->T0, s1->T1);
1461             gen_op_st_rm_T0_A0(s1, ot, d);
1462         }
1463         gen_op_update1_cc(s1);
1464         set_cc_op(s1, CC_OP_LOGICB + ot);
1465         break;
1466     case OP_ORL:
1467         if (s1->prefix & PREFIX_LOCK) {
1468             tcg_gen_atomic_or_fetch_tl(s1->T0, s1->A0, s1->T1,
1469                                        s1->mem_index, ot | MO_LE);
1470         } else {
1471             tcg_gen_or_tl(s1->T0, s1->T0, s1->T1);
1472             gen_op_st_rm_T0_A0(s1, ot, d);
1473         }
1474         gen_op_update1_cc(s1);
1475         set_cc_op(s1, CC_OP_LOGICB + ot);
1476         break;
1477     case OP_XORL:
1478         if (s1->prefix & PREFIX_LOCK) {
1479             tcg_gen_atomic_xor_fetch_tl(s1->T0, s1->A0, s1->T1,
1480                                         s1->mem_index, ot | MO_LE);
1481         } else {
1482             tcg_gen_xor_tl(s1->T0, s1->T0, s1->T1);
1483             gen_op_st_rm_T0_A0(s1, ot, d);
1484         }
1485         gen_op_update1_cc(s1);
1486         set_cc_op(s1, CC_OP_LOGICB + ot);
1487         break;
1488     case OP_CMPL:
1489         tcg_gen_mov_tl(cpu_cc_src, s1->T1);
1490         tcg_gen_mov_tl(s1->cc_srcT, s1->T0);
1491         tcg_gen_sub_tl(cpu_cc_dst, s1->T0, s1->T1);
1492         set_cc_op(s1, CC_OP_SUBB + ot);
1493         break;
1494     }
1495 }
1496 
1497 /* if d == OR_TMP0, it means memory operand (address in A0) */
1498 static void gen_inc(DisasContext *s1, MemOp ot, int d, int c)
1499 {
1500     if (s1->prefix & PREFIX_LOCK) {
1501         if (d != OR_TMP0) {
1502             /* Lock prefix when destination is not memory */
1503             gen_illegal_opcode(s1);
1504             return;
1505         }
1506         tcg_gen_movi_tl(s1->T0, c > 0 ? 1 : -1);
1507         tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
1508                                     s1->mem_index, ot | MO_LE);
1509     } else {
1510         if (d != OR_TMP0) {
1511             gen_op_mov_v_reg(s1, ot, s1->T0, d);
1512         } else {
1513             gen_op_ld_v(s1, ot, s1->T0, s1->A0);
1514         }
1515         tcg_gen_addi_tl(s1->T0, s1->T0, (c > 0 ? 1 : -1));
1516         gen_op_st_rm_T0_A0(s1, ot, d);
1517     }
1518 
1519     gen_compute_eflags_c(s1, cpu_cc_src);
1520     tcg_gen_mov_tl(cpu_cc_dst, s1->T0);
1521     set_cc_op(s1, (c > 0 ? CC_OP_INCB : CC_OP_DECB) + ot);
1522 }
1523 
1524 static void gen_shift_flags(DisasContext *s, MemOp ot, TCGv result,
1525                             TCGv shm1, TCGv count, bool is_right)
1526 {
1527     TCGv_i32 z32, s32, oldop;
1528     TCGv z_tl;
1529 
1530     /* Store the results into the CC variables.  If we know that the
1531        variable must be dead, store unconditionally.  Otherwise we'll
1532        need to not disrupt the current contents.  */
1533     z_tl = tcg_const_tl(0);
1534     if (cc_op_live[s->cc_op] & USES_CC_DST) {
1535         tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_dst, count, z_tl,
1536                            result, cpu_cc_dst);
1537     } else {
1538         tcg_gen_mov_tl(cpu_cc_dst, result);
1539     }
1540     if (cc_op_live[s->cc_op] & USES_CC_SRC) {
1541         tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_src, count, z_tl,
1542                            shm1, cpu_cc_src);
1543     } else {
1544         tcg_gen_mov_tl(cpu_cc_src, shm1);
1545     }
1546     tcg_temp_free(z_tl);
1547 
1548     /* Get the two potential CC_OP values into temporaries.  */
1549     tcg_gen_movi_i32(s->tmp2_i32, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
1550     if (s->cc_op == CC_OP_DYNAMIC) {
1551         oldop = cpu_cc_op;
1552     } else {
1553         tcg_gen_movi_i32(s->tmp3_i32, s->cc_op);
1554         oldop = s->tmp3_i32;
1555     }
1556 
1557     /* Conditionally store the CC_OP value.  */
1558     z32 = tcg_const_i32(0);
1559     s32 = tcg_temp_new_i32();
1560     tcg_gen_trunc_tl_i32(s32, count);
1561     tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, s32, z32, s->tmp2_i32, oldop);
1562     tcg_temp_free_i32(z32);
1563     tcg_temp_free_i32(s32);
1564 
1565     /* The CC_OP value is no longer predictable.  */
1566     set_cc_op(s, CC_OP_DYNAMIC);
1567 }
1568 
1569 static void gen_shift_rm_T1(DisasContext *s, MemOp ot, int op1,
1570                             int is_right, int is_arith)
1571 {
1572     target_ulong mask = (ot == MO_64 ? 0x3f : 0x1f);
1573 
1574     /* load */
1575     if (op1 == OR_TMP0) {
1576         gen_op_ld_v(s, ot, s->T0, s->A0);
1577     } else {
1578         gen_op_mov_v_reg(s, ot, s->T0, op1);
1579     }
1580 
1581     tcg_gen_andi_tl(s->T1, s->T1, mask);
1582     tcg_gen_subi_tl(s->tmp0, s->T1, 1);
1583 
1584     if (is_right) {
1585         if (is_arith) {
1586             gen_exts(ot, s->T0);
1587             tcg_gen_sar_tl(s->tmp0, s->T0, s->tmp0);
1588             tcg_gen_sar_tl(s->T0, s->T0, s->T1);
1589         } else {
1590             gen_extu(ot, s->T0);
1591             tcg_gen_shr_tl(s->tmp0, s->T0, s->tmp0);
1592             tcg_gen_shr_tl(s->T0, s->T0, s->T1);
1593         }
1594     } else {
1595         tcg_gen_shl_tl(s->tmp0, s->T0, s->tmp0);
1596         tcg_gen_shl_tl(s->T0, s->T0, s->T1);
1597     }
1598 
1599     /* store */
1600     gen_op_st_rm_T0_A0(s, ot, op1);
1601 
1602     gen_shift_flags(s, ot, s->T0, s->tmp0, s->T1, is_right);
1603 }
1604 
1605 static void gen_shift_rm_im(DisasContext *s, MemOp ot, int op1, int op2,
1606                             int is_right, int is_arith)
1607 {
1608     int mask = (ot == MO_64 ? 0x3f : 0x1f);
1609 
1610     /* load */
1611     if (op1 == OR_TMP0)
1612         gen_op_ld_v(s, ot, s->T0, s->A0);
1613     else
1614         gen_op_mov_v_reg(s, ot, s->T0, op1);
1615 
1616     op2 &= mask;
1617     if (op2 != 0) {
1618         if (is_right) {
1619             if (is_arith) {
1620                 gen_exts(ot, s->T0);
1621                 tcg_gen_sari_tl(s->tmp4, s->T0, op2 - 1);
1622                 tcg_gen_sari_tl(s->T0, s->T0, op2);
1623             } else {
1624                 gen_extu(ot, s->T0);
1625                 tcg_gen_shri_tl(s->tmp4, s->T0, op2 - 1);
1626                 tcg_gen_shri_tl(s->T0, s->T0, op2);
1627             }
1628         } else {
1629             tcg_gen_shli_tl(s->tmp4, s->T0, op2 - 1);
1630             tcg_gen_shli_tl(s->T0, s->T0, op2);
1631         }
1632     }
1633 
1634     /* store */
1635     gen_op_st_rm_T0_A0(s, ot, op1);
1636 
1637     /* update eflags if non zero shift */
1638     if (op2 != 0) {
1639         tcg_gen_mov_tl(cpu_cc_src, s->tmp4);
1640         tcg_gen_mov_tl(cpu_cc_dst, s->T0);
1641         set_cc_op(s, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
1642     }
1643 }
1644 
1645 static void gen_rot_rm_T1(DisasContext *s, MemOp ot, int op1, int is_right)
1646 {
1647     target_ulong mask = (ot == MO_64 ? 0x3f : 0x1f);
1648     TCGv_i32 t0, t1;
1649 
1650     /* load */
1651     if (op1 == OR_TMP0) {
1652         gen_op_ld_v(s, ot, s->T0, s->A0);
1653     } else {
1654         gen_op_mov_v_reg(s, ot, s->T0, op1);
1655     }
1656 
1657     tcg_gen_andi_tl(s->T1, s->T1, mask);
1658 
1659     switch (ot) {
1660     case MO_8:
1661         /* Replicate the 8-bit input so that a 32-bit rotate works.  */
1662         tcg_gen_ext8u_tl(s->T0, s->T0);
1663         tcg_gen_muli_tl(s->T0, s->T0, 0x01010101);
1664         goto do_long;
1665     case MO_16:
1666         /* Replicate the 16-bit input so that a 32-bit rotate works.  */
1667         tcg_gen_deposit_tl(s->T0, s->T0, s->T0, 16, 16);
1668         goto do_long;
1669     do_long:
1670 #ifdef TARGET_X86_64
1671     case MO_32:
1672         tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
1673         tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
1674         if (is_right) {
1675             tcg_gen_rotr_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
1676         } else {
1677             tcg_gen_rotl_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
1678         }
1679         tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
1680         break;
1681 #endif
1682     default:
1683         if (is_right) {
1684             tcg_gen_rotr_tl(s->T0, s->T0, s->T1);
1685         } else {
1686             tcg_gen_rotl_tl(s->T0, s->T0, s->T1);
1687         }
1688         break;
1689     }
1690 
1691     /* store */
1692     gen_op_st_rm_T0_A0(s, ot, op1);
1693 
1694     /* We'll need the flags computed into CC_SRC.  */
1695     gen_compute_eflags(s);
1696 
1697     /* The value that was "rotated out" is now present at the other end
1698        of the word.  Compute C into CC_DST and O into CC_SRC2.  Note that
1699        since we've computed the flags into CC_SRC, these variables are
1700        currently dead.  */
1701     if (is_right) {
1702         tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask - 1);
1703         tcg_gen_shri_tl(cpu_cc_dst, s->T0, mask);
1704         tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
1705     } else {
1706         tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask);
1707         tcg_gen_andi_tl(cpu_cc_dst, s->T0, 1);
1708     }
1709     tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
1710     tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
1711 
1712     /* Now conditionally store the new CC_OP value.  If the shift count
1713        is 0 we keep the CC_OP_EFLAGS setting so that only CC_SRC is live.
1714        Otherwise reuse CC_OP_ADCOX which have the C and O flags split out
1715        exactly as we computed above.  */
1716     t0 = tcg_const_i32(0);
1717     t1 = tcg_temp_new_i32();
1718     tcg_gen_trunc_tl_i32(t1, s->T1);
1719     tcg_gen_movi_i32(s->tmp2_i32, CC_OP_ADCOX);
1720     tcg_gen_movi_i32(s->tmp3_i32, CC_OP_EFLAGS);
1721     tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, t1, t0,
1722                         s->tmp2_i32, s->tmp3_i32);
1723     tcg_temp_free_i32(t0);
1724     tcg_temp_free_i32(t1);
1725 
1726     /* The CC_OP value is no longer predictable.  */
1727     set_cc_op(s, CC_OP_DYNAMIC);
1728 }
1729 
1730 static void gen_rot_rm_im(DisasContext *s, MemOp ot, int op1, int op2,
1731                           int is_right)
1732 {
1733     int mask = (ot == MO_64 ? 0x3f : 0x1f);
1734     int shift;
1735 
1736     /* load */
1737     if (op1 == OR_TMP0) {
1738         gen_op_ld_v(s, ot, s->T0, s->A0);
1739     } else {
1740         gen_op_mov_v_reg(s, ot, s->T0, op1);
1741     }
1742 
1743     op2 &= mask;
1744     if (op2 != 0) {
1745         switch (ot) {
1746 #ifdef TARGET_X86_64
1747         case MO_32:
1748             tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
1749             if (is_right) {
1750                 tcg_gen_rotri_i32(s->tmp2_i32, s->tmp2_i32, op2);
1751             } else {
1752                 tcg_gen_rotli_i32(s->tmp2_i32, s->tmp2_i32, op2);
1753             }
1754             tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
1755             break;
1756 #endif
1757         default:
1758             if (is_right) {
1759                 tcg_gen_rotri_tl(s->T0, s->T0, op2);
1760             } else {
1761                 tcg_gen_rotli_tl(s->T0, s->T0, op2);
1762             }
1763             break;
1764         case MO_8:
1765             mask = 7;
1766             goto do_shifts;
1767         case MO_16:
1768             mask = 15;
1769         do_shifts:
1770             shift = op2 & mask;
1771             if (is_right) {
1772                 shift = mask + 1 - shift;
1773             }
1774             gen_extu(ot, s->T0);
1775             tcg_gen_shli_tl(s->tmp0, s->T0, shift);
1776             tcg_gen_shri_tl(s->T0, s->T0, mask + 1 - shift);
1777             tcg_gen_or_tl(s->T0, s->T0, s->tmp0);
1778             break;
1779         }
1780     }
1781 
1782     /* store */
1783     gen_op_st_rm_T0_A0(s, ot, op1);
1784 
1785     if (op2 != 0) {
1786         /* Compute the flags into CC_SRC.  */
1787         gen_compute_eflags(s);
1788 
1789         /* The value that was "rotated out" is now present at the other end
1790            of the word.  Compute C into CC_DST and O into CC_SRC2.  Note that
1791            since we've computed the flags into CC_SRC, these variables are
1792            currently dead.  */
1793         if (is_right) {
1794             tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask - 1);
1795             tcg_gen_shri_tl(cpu_cc_dst, s->T0, mask);
1796             tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
1797         } else {
1798             tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask);
1799             tcg_gen_andi_tl(cpu_cc_dst, s->T0, 1);
1800         }
1801         tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
1802         tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
1803         set_cc_op(s, CC_OP_ADCOX);
1804     }
1805 }
1806 
1807 /* XXX: add faster immediate = 1 case */
1808 static void gen_rotc_rm_T1(DisasContext *s, MemOp ot, int op1,
1809                            int is_right)
1810 {
1811     gen_compute_eflags(s);
1812     assert(s->cc_op == CC_OP_EFLAGS);
1813 
1814     /* load */
1815     if (op1 == OR_TMP0)
1816         gen_op_ld_v(s, ot, s->T0, s->A0);
1817     else
1818         gen_op_mov_v_reg(s, ot, s->T0, op1);
1819 
1820     if (is_right) {
1821         switch (ot) {
1822         case MO_8:
1823             gen_helper_rcrb(s->T0, cpu_env, s->T0, s->T1);
1824             break;
1825         case MO_16:
1826             gen_helper_rcrw(s->T0, cpu_env, s->T0, s->T1);
1827             break;
1828         case MO_32:
1829             gen_helper_rcrl(s->T0, cpu_env, s->T0, s->T1);
1830             break;
1831 #ifdef TARGET_X86_64
1832         case MO_64:
1833             gen_helper_rcrq(s->T0, cpu_env, s->T0, s->T1);
1834             break;
1835 #endif
1836         default:
1837             tcg_abort();
1838         }
1839     } else {
1840         switch (ot) {
1841         case MO_8:
1842             gen_helper_rclb(s->T0, cpu_env, s->T0, s->T1);
1843             break;
1844         case MO_16:
1845             gen_helper_rclw(s->T0, cpu_env, s->T0, s->T1);
1846             break;
1847         case MO_32:
1848             gen_helper_rcll(s->T0, cpu_env, s->T0, s->T1);
1849             break;
1850 #ifdef TARGET_X86_64
1851         case MO_64:
1852             gen_helper_rclq(s->T0, cpu_env, s->T0, s->T1);
1853             break;
1854 #endif
1855         default:
1856             tcg_abort();
1857         }
1858     }
1859     /* store */
1860     gen_op_st_rm_T0_A0(s, ot, op1);
1861 }
1862 
1863 /* XXX: add faster immediate case */
1864 static void gen_shiftd_rm_T1(DisasContext *s, MemOp ot, int op1,
1865                              bool is_right, TCGv count_in)
1866 {
1867     target_ulong mask = (ot == MO_64 ? 63 : 31);
1868     TCGv count;
1869 
1870     /* load */
1871     if (op1 == OR_TMP0) {
1872         gen_op_ld_v(s, ot, s->T0, s->A0);
1873     } else {
1874         gen_op_mov_v_reg(s, ot, s->T0, op1);
1875     }
1876 
1877     count = tcg_temp_new();
1878     tcg_gen_andi_tl(count, count_in, mask);
1879 
1880     switch (ot) {
1881     case MO_16:
1882         /* Note: we implement the Intel behaviour for shift count > 16.
1883            This means "shrdw C, B, A" shifts A:B:A >> C.  Build the B:A
1884            portion by constructing it as a 32-bit value.  */
1885         if (is_right) {
1886             tcg_gen_deposit_tl(s->tmp0, s->T0, s->T1, 16, 16);
1887             tcg_gen_mov_tl(s->T1, s->T0);
1888             tcg_gen_mov_tl(s->T0, s->tmp0);
1889         } else {
1890             tcg_gen_deposit_tl(s->T1, s->T0, s->T1, 16, 16);
1891         }
1892         /*
1893          * If TARGET_X86_64 defined then fall through into MO_32 case,
1894          * otherwise fall through default case.
1895          */
1896     case MO_32:
1897 #ifdef TARGET_X86_64
1898         /* Concatenate the two 32-bit values and use a 64-bit shift.  */
1899         tcg_gen_subi_tl(s->tmp0, count, 1);
1900         if (is_right) {
1901             tcg_gen_concat_tl_i64(s->T0, s->T0, s->T1);
1902             tcg_gen_shr_i64(s->tmp0, s->T0, s->tmp0);
1903             tcg_gen_shr_i64(s->T0, s->T0, count);
1904         } else {
1905             tcg_gen_concat_tl_i64(s->T0, s->T1, s->T0);
1906             tcg_gen_shl_i64(s->tmp0, s->T0, s->tmp0);
1907             tcg_gen_shl_i64(s->T0, s->T0, count);
1908             tcg_gen_shri_i64(s->tmp0, s->tmp0, 32);
1909             tcg_gen_shri_i64(s->T0, s->T0, 32);
1910         }
1911         break;
1912 #endif
1913     default:
1914         tcg_gen_subi_tl(s->tmp0, count, 1);
1915         if (is_right) {
1916             tcg_gen_shr_tl(s->tmp0, s->T0, s->tmp0);
1917 
1918             tcg_gen_subfi_tl(s->tmp4, mask + 1, count);
1919             tcg_gen_shr_tl(s->T0, s->T0, count);
1920             tcg_gen_shl_tl(s->T1, s->T1, s->tmp4);
1921         } else {
1922             tcg_gen_shl_tl(s->tmp0, s->T0, s->tmp0);
1923             if (ot == MO_16) {
1924                 /* Only needed if count > 16, for Intel behaviour.  */
1925                 tcg_gen_subfi_tl(s->tmp4, 33, count);
1926                 tcg_gen_shr_tl(s->tmp4, s->T1, s->tmp4);
1927                 tcg_gen_or_tl(s->tmp0, s->tmp0, s->tmp4);
1928             }
1929 
1930             tcg_gen_subfi_tl(s->tmp4, mask + 1, count);
1931             tcg_gen_shl_tl(s->T0, s->T0, count);
1932             tcg_gen_shr_tl(s->T1, s->T1, s->tmp4);
1933         }
1934         tcg_gen_movi_tl(s->tmp4, 0);
1935         tcg_gen_movcond_tl(TCG_COND_EQ, s->T1, count, s->tmp4,
1936                            s->tmp4, s->T1);
1937         tcg_gen_or_tl(s->T0, s->T0, s->T1);
1938         break;
1939     }
1940 
1941     /* store */
1942     gen_op_st_rm_T0_A0(s, ot, op1);
1943 
1944     gen_shift_flags(s, ot, s->T0, s->tmp0, count, is_right);
1945     tcg_temp_free(count);
1946 }
1947 
1948 static void gen_shift(DisasContext *s1, int op, MemOp ot, int d, int s)
1949 {
1950     if (s != OR_TMP1)
1951         gen_op_mov_v_reg(s1, ot, s1->T1, s);
1952     switch(op) {
1953     case OP_ROL:
1954         gen_rot_rm_T1(s1, ot, d, 0);
1955         break;
1956     case OP_ROR:
1957         gen_rot_rm_T1(s1, ot, d, 1);
1958         break;
1959     case OP_SHL:
1960     case OP_SHL1:
1961         gen_shift_rm_T1(s1, ot, d, 0, 0);
1962         break;
1963     case OP_SHR:
1964         gen_shift_rm_T1(s1, ot, d, 1, 0);
1965         break;
1966     case OP_SAR:
1967         gen_shift_rm_T1(s1, ot, d, 1, 1);
1968         break;
1969     case OP_RCL:
1970         gen_rotc_rm_T1(s1, ot, d, 0);
1971         break;
1972     case OP_RCR:
1973         gen_rotc_rm_T1(s1, ot, d, 1);
1974         break;
1975     }
1976 }
1977 
1978 static void gen_shifti(DisasContext *s1, int op, MemOp ot, int d, int c)
1979 {
1980     switch(op) {
1981     case OP_ROL:
1982         gen_rot_rm_im(s1, ot, d, c, 0);
1983         break;
1984     case OP_ROR:
1985         gen_rot_rm_im(s1, ot, d, c, 1);
1986         break;
1987     case OP_SHL:
1988     case OP_SHL1:
1989         gen_shift_rm_im(s1, ot, d, c, 0, 0);
1990         break;
1991     case OP_SHR:
1992         gen_shift_rm_im(s1, ot, d, c, 1, 0);
1993         break;
1994     case OP_SAR:
1995         gen_shift_rm_im(s1, ot, d, c, 1, 1);
1996         break;
1997     default:
1998         /* currently not optimized */
1999         tcg_gen_movi_tl(s1->T1, c);
2000         gen_shift(s1, op, ot, d, OR_TMP1);
2001         break;
2002     }
2003 }
2004 
2005 #define X86_MAX_INSN_LENGTH 15
2006 
2007 static uint64_t advance_pc(CPUX86State *env, DisasContext *s, int num_bytes)
2008 {
2009     uint64_t pc = s->pc;
2010 
2011     s->pc += num_bytes;
2012     if (unlikely(s->pc - s->pc_start > X86_MAX_INSN_LENGTH)) {
2013         /* If the instruction's 16th byte is on a different page than the 1st, a
2014          * page fault on the second page wins over the general protection fault
2015          * caused by the instruction being too long.
2016          * This can happen even if the operand is only one byte long!
2017          */
2018         if (((s->pc - 1) ^ (pc - 1)) & TARGET_PAGE_MASK) {
2019             volatile uint8_t unused =
2020                 cpu_ldub_code(env, (s->pc - 1) & TARGET_PAGE_MASK);
2021             (void) unused;
2022         }
2023         siglongjmp(s->jmpbuf, 1);
2024     }
2025 
2026     return pc;
2027 }
2028 
2029 static inline uint8_t x86_ldub_code(CPUX86State *env, DisasContext *s)
2030 {
2031     return translator_ldub(env, advance_pc(env, s, 1));
2032 }
2033 
2034 static inline int16_t x86_ldsw_code(CPUX86State *env, DisasContext *s)
2035 {
2036     return translator_ldsw(env, advance_pc(env, s, 2));
2037 }
2038 
2039 static inline uint16_t x86_lduw_code(CPUX86State *env, DisasContext *s)
2040 {
2041     return translator_lduw(env, advance_pc(env, s, 2));
2042 }
2043 
2044 static inline uint32_t x86_ldl_code(CPUX86State *env, DisasContext *s)
2045 {
2046     return translator_ldl(env, advance_pc(env, s, 4));
2047 }
2048 
2049 #ifdef TARGET_X86_64
2050 static inline uint64_t x86_ldq_code(CPUX86State *env, DisasContext *s)
2051 {
2052     return translator_ldq(env, advance_pc(env, s, 8));
2053 }
2054 #endif
2055 
2056 /* Decompose an address.  */
2057 
2058 typedef struct AddressParts {
2059     int def_seg;
2060     int base;
2061     int index;
2062     int scale;
2063     target_long disp;
2064 } AddressParts;
2065 
2066 static AddressParts gen_lea_modrm_0(CPUX86State *env, DisasContext *s,
2067                                     int modrm)
2068 {
2069     int def_seg, base, index, scale, mod, rm;
2070     target_long disp;
2071     bool havesib;
2072 
2073     def_seg = R_DS;
2074     index = -1;
2075     scale = 0;
2076     disp = 0;
2077 
2078     mod = (modrm >> 6) & 3;
2079     rm = modrm & 7;
2080     base = rm | REX_B(s);
2081 
2082     if (mod == 3) {
2083         /* Normally filtered out earlier, but including this path
2084            simplifies multi-byte nop, as well as bndcl, bndcu, bndcn.  */
2085         goto done;
2086     }
2087 
2088     switch (s->aflag) {
2089     case MO_64:
2090     case MO_32:
2091         havesib = 0;
2092         if (rm == 4) {
2093             int code = x86_ldub_code(env, s);
2094             scale = (code >> 6) & 3;
2095             index = ((code >> 3) & 7) | REX_X(s);
2096             if (index == 4) {
2097                 index = -1;  /* no index */
2098             }
2099             base = (code & 7) | REX_B(s);
2100             havesib = 1;
2101         }
2102 
2103         switch (mod) {
2104         case 0:
2105             if ((base & 7) == 5) {
2106                 base = -1;
2107                 disp = (int32_t)x86_ldl_code(env, s);
2108                 if (CODE64(s) && !havesib) {
2109                     base = -2;
2110                     disp += s->pc + s->rip_offset;
2111                 }
2112             }
2113             break;
2114         case 1:
2115             disp = (int8_t)x86_ldub_code(env, s);
2116             break;
2117         default:
2118         case 2:
2119             disp = (int32_t)x86_ldl_code(env, s);
2120             break;
2121         }
2122 
2123         /* For correct popl handling with esp.  */
2124         if (base == R_ESP && s->popl_esp_hack) {
2125             disp += s->popl_esp_hack;
2126         }
2127         if (base == R_EBP || base == R_ESP) {
2128             def_seg = R_SS;
2129         }
2130         break;
2131 
2132     case MO_16:
2133         if (mod == 0) {
2134             if (rm == 6) {
2135                 base = -1;
2136                 disp = x86_lduw_code(env, s);
2137                 break;
2138             }
2139         } else if (mod == 1) {
2140             disp = (int8_t)x86_ldub_code(env, s);
2141         } else {
2142             disp = (int16_t)x86_lduw_code(env, s);
2143         }
2144 
2145         switch (rm) {
2146         case 0:
2147             base = R_EBX;
2148             index = R_ESI;
2149             break;
2150         case 1:
2151             base = R_EBX;
2152             index = R_EDI;
2153             break;
2154         case 2:
2155             base = R_EBP;
2156             index = R_ESI;
2157             def_seg = R_SS;
2158             break;
2159         case 3:
2160             base = R_EBP;
2161             index = R_EDI;
2162             def_seg = R_SS;
2163             break;
2164         case 4:
2165             base = R_ESI;
2166             break;
2167         case 5:
2168             base = R_EDI;
2169             break;
2170         case 6:
2171             base = R_EBP;
2172             def_seg = R_SS;
2173             break;
2174         default:
2175         case 7:
2176             base = R_EBX;
2177             break;
2178         }
2179         break;
2180 
2181     default:
2182         tcg_abort();
2183     }
2184 
2185  done:
2186     return (AddressParts){ def_seg, base, index, scale, disp };
2187 }
2188 
2189 /* Compute the address, with a minimum number of TCG ops.  */
2190 static TCGv gen_lea_modrm_1(DisasContext *s, AddressParts a)
2191 {
2192     TCGv ea = NULL;
2193 
2194     if (a.index >= 0) {
2195         if (a.scale == 0) {
2196             ea = cpu_regs[a.index];
2197         } else {
2198             tcg_gen_shli_tl(s->A0, cpu_regs[a.index], a.scale);
2199             ea = s->A0;
2200         }
2201         if (a.base >= 0) {
2202             tcg_gen_add_tl(s->A0, ea, cpu_regs[a.base]);
2203             ea = s->A0;
2204         }
2205     } else if (a.base >= 0) {
2206         ea = cpu_regs[a.base];
2207     }
2208     if (!ea) {
2209         tcg_gen_movi_tl(s->A0, a.disp);
2210         ea = s->A0;
2211     } else if (a.disp != 0) {
2212         tcg_gen_addi_tl(s->A0, ea, a.disp);
2213         ea = s->A0;
2214     }
2215 
2216     return ea;
2217 }
2218 
2219 static void gen_lea_modrm(CPUX86State *env, DisasContext *s, int modrm)
2220 {
2221     AddressParts a = gen_lea_modrm_0(env, s, modrm);
2222     TCGv ea = gen_lea_modrm_1(s, a);
2223     gen_lea_v_seg(s, s->aflag, ea, a.def_seg, s->override);
2224 }
2225 
2226 static void gen_nop_modrm(CPUX86State *env, DisasContext *s, int modrm)
2227 {
2228     (void)gen_lea_modrm_0(env, s, modrm);
2229 }
2230 
2231 /* Used for BNDCL, BNDCU, BNDCN.  */
2232 static void gen_bndck(CPUX86State *env, DisasContext *s, int modrm,
2233                       TCGCond cond, TCGv_i64 bndv)
2234 {
2235     TCGv ea = gen_lea_modrm_1(s, gen_lea_modrm_0(env, s, modrm));
2236 
2237     tcg_gen_extu_tl_i64(s->tmp1_i64, ea);
2238     if (!CODE64(s)) {
2239         tcg_gen_ext32u_i64(s->tmp1_i64, s->tmp1_i64);
2240     }
2241     tcg_gen_setcond_i64(cond, s->tmp1_i64, s->tmp1_i64, bndv);
2242     tcg_gen_extrl_i64_i32(s->tmp2_i32, s->tmp1_i64);
2243     gen_helper_bndck(cpu_env, s->tmp2_i32);
2244 }
2245 
2246 /* used for LEA and MOV AX, mem */
2247 static void gen_add_A0_ds_seg(DisasContext *s)
2248 {
2249     gen_lea_v_seg(s, s->aflag, s->A0, R_DS, s->override);
2250 }
2251 
2252 /* generate modrm memory load or store of 'reg'. TMP0 is used if reg ==
2253    OR_TMP0 */
2254 static void gen_ldst_modrm(CPUX86State *env, DisasContext *s, int modrm,
2255                            MemOp ot, int reg, int is_store)
2256 {
2257     int mod, rm;
2258 
2259     mod = (modrm >> 6) & 3;
2260     rm = (modrm & 7) | REX_B(s);
2261     if (mod == 3) {
2262         if (is_store) {
2263             if (reg != OR_TMP0)
2264                 gen_op_mov_v_reg(s, ot, s->T0, reg);
2265             gen_op_mov_reg_v(s, ot, rm, s->T0);
2266         } else {
2267             gen_op_mov_v_reg(s, ot, s->T0, rm);
2268             if (reg != OR_TMP0)
2269                 gen_op_mov_reg_v(s, ot, reg, s->T0);
2270         }
2271     } else {
2272         gen_lea_modrm(env, s, modrm);
2273         if (is_store) {
2274             if (reg != OR_TMP0)
2275                 gen_op_mov_v_reg(s, ot, s->T0, reg);
2276             gen_op_st_v(s, ot, s->T0, s->A0);
2277         } else {
2278             gen_op_ld_v(s, ot, s->T0, s->A0);
2279             if (reg != OR_TMP0)
2280                 gen_op_mov_reg_v(s, ot, reg, s->T0);
2281         }
2282     }
2283 }
2284 
2285 static inline uint32_t insn_get(CPUX86State *env, DisasContext *s, MemOp ot)
2286 {
2287     uint32_t ret;
2288 
2289     switch (ot) {
2290     case MO_8:
2291         ret = x86_ldub_code(env, s);
2292         break;
2293     case MO_16:
2294         ret = x86_lduw_code(env, s);
2295         break;
2296     case MO_32:
2297 #ifdef TARGET_X86_64
2298     case MO_64:
2299 #endif
2300         ret = x86_ldl_code(env, s);
2301         break;
2302     default:
2303         tcg_abort();
2304     }
2305     return ret;
2306 }
2307 
2308 static inline int insn_const_size(MemOp ot)
2309 {
2310     if (ot <= MO_32) {
2311         return 1 << ot;
2312     } else {
2313         return 4;
2314     }
2315 }
2316 
2317 static void gen_goto_tb(DisasContext *s, int tb_num, target_ulong eip)
2318 {
2319     target_ulong pc = s->cs_base + eip;
2320 
2321     if (translator_use_goto_tb(&s->base, pc))  {
2322         /* jump to same page: we can use a direct jump */
2323         tcg_gen_goto_tb(tb_num);
2324         gen_jmp_im(s, eip);
2325         tcg_gen_exit_tb(s->base.tb, tb_num);
2326         s->base.is_jmp = DISAS_NORETURN;
2327     } else {
2328         /* jump to another page */
2329         gen_jmp_im(s, eip);
2330         gen_jr(s, s->tmp0);
2331     }
2332 }
2333 
2334 static inline void gen_jcc(DisasContext *s, int b,
2335                            target_ulong val, target_ulong next_eip)
2336 {
2337     TCGLabel *l1, *l2;
2338 
2339     if (s->jmp_opt) {
2340         l1 = gen_new_label();
2341         gen_jcc1(s, b, l1);
2342 
2343         gen_goto_tb(s, 0, next_eip);
2344 
2345         gen_set_label(l1);
2346         gen_goto_tb(s, 1, val);
2347     } else {
2348         l1 = gen_new_label();
2349         l2 = gen_new_label();
2350         gen_jcc1(s, b, l1);
2351 
2352         gen_jmp_im(s, next_eip);
2353         tcg_gen_br(l2);
2354 
2355         gen_set_label(l1);
2356         gen_jmp_im(s, val);
2357         gen_set_label(l2);
2358         gen_eob(s);
2359     }
2360 }
2361 
2362 static void gen_cmovcc1(CPUX86State *env, DisasContext *s, MemOp ot, int b,
2363                         int modrm, int reg)
2364 {
2365     CCPrepare cc;
2366 
2367     gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
2368 
2369     cc = gen_prepare_cc(s, b, s->T1);
2370     if (cc.mask != -1) {
2371         TCGv t0 = tcg_temp_new();
2372         tcg_gen_andi_tl(t0, cc.reg, cc.mask);
2373         cc.reg = t0;
2374     }
2375     if (!cc.use_reg2) {
2376         cc.reg2 = tcg_const_tl(cc.imm);
2377     }
2378 
2379     tcg_gen_movcond_tl(cc.cond, s->T0, cc.reg, cc.reg2,
2380                        s->T0, cpu_regs[reg]);
2381     gen_op_mov_reg_v(s, ot, reg, s->T0);
2382 
2383     if (cc.mask != -1) {
2384         tcg_temp_free(cc.reg);
2385     }
2386     if (!cc.use_reg2) {
2387         tcg_temp_free(cc.reg2);
2388     }
2389 }
2390 
2391 static inline void gen_op_movl_T0_seg(DisasContext *s, X86Seg seg_reg)
2392 {
2393     tcg_gen_ld32u_tl(s->T0, cpu_env,
2394                      offsetof(CPUX86State,segs[seg_reg].selector));
2395 }
2396 
2397 static inline void gen_op_movl_seg_T0_vm(DisasContext *s, X86Seg seg_reg)
2398 {
2399     tcg_gen_ext16u_tl(s->T0, s->T0);
2400     tcg_gen_st32_tl(s->T0, cpu_env,
2401                     offsetof(CPUX86State,segs[seg_reg].selector));
2402     tcg_gen_shli_tl(cpu_seg_base[seg_reg], s->T0, 4);
2403 }
2404 
2405 /* move T0 to seg_reg and compute if the CPU state may change. Never
2406    call this function with seg_reg == R_CS */
2407 static void gen_movl_seg_T0(DisasContext *s, X86Seg seg_reg)
2408 {
2409     if (PE(s) && !VM86(s)) {
2410         tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
2411         gen_helper_load_seg(cpu_env, tcg_const_i32(seg_reg), s->tmp2_i32);
2412         /* abort translation because the addseg value may change or
2413            because ss32 may change. For R_SS, translation must always
2414            stop as a special handling must be done to disable hardware
2415            interrupts for the next instruction */
2416         if (seg_reg == R_SS || (CODE32(s) && seg_reg < R_FS)) {
2417             s->base.is_jmp = DISAS_TOO_MANY;
2418         }
2419     } else {
2420         gen_op_movl_seg_T0_vm(s, seg_reg);
2421         if (seg_reg == R_SS) {
2422             s->base.is_jmp = DISAS_TOO_MANY;
2423         }
2424     }
2425 }
2426 
2427 static void gen_svm_check_intercept(DisasContext *s, uint32_t type)
2428 {
2429     /* no SVM activated; fast case */
2430     if (likely(!GUEST(s))) {
2431         return;
2432     }
2433     gen_helper_svm_check_intercept(cpu_env, tcg_constant_i32(type));
2434 }
2435 
2436 static inline void gen_stack_update(DisasContext *s, int addend)
2437 {
2438     gen_op_add_reg_im(s, mo_stacksize(s), R_ESP, addend);
2439 }
2440 
2441 /* Generate a push. It depends on ss32, addseg and dflag.  */
2442 static void gen_push_v(DisasContext *s, TCGv val)
2443 {
2444     MemOp d_ot = mo_pushpop(s, s->dflag);
2445     MemOp a_ot = mo_stacksize(s);
2446     int size = 1 << d_ot;
2447     TCGv new_esp = s->A0;
2448 
2449     tcg_gen_subi_tl(s->A0, cpu_regs[R_ESP], size);
2450 
2451     if (!CODE64(s)) {
2452         if (ADDSEG(s)) {
2453             new_esp = s->tmp4;
2454             tcg_gen_mov_tl(new_esp, s->A0);
2455         }
2456         gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2457     }
2458 
2459     gen_op_st_v(s, d_ot, val, s->A0);
2460     gen_op_mov_reg_v(s, a_ot, R_ESP, new_esp);
2461 }
2462 
2463 /* two step pop is necessary for precise exceptions */
2464 static MemOp gen_pop_T0(DisasContext *s)
2465 {
2466     MemOp d_ot = mo_pushpop(s, s->dflag);
2467 
2468     gen_lea_v_seg(s, mo_stacksize(s), cpu_regs[R_ESP], R_SS, -1);
2469     gen_op_ld_v(s, d_ot, s->T0, s->A0);
2470 
2471     return d_ot;
2472 }
2473 
2474 static inline void gen_pop_update(DisasContext *s, MemOp ot)
2475 {
2476     gen_stack_update(s, 1 << ot);
2477 }
2478 
2479 static inline void gen_stack_A0(DisasContext *s)
2480 {
2481     gen_lea_v_seg(s, SS32(s) ? MO_32 : MO_16, cpu_regs[R_ESP], R_SS, -1);
2482 }
2483 
2484 static void gen_pusha(DisasContext *s)
2485 {
2486     MemOp s_ot = SS32(s) ? MO_32 : MO_16;
2487     MemOp d_ot = s->dflag;
2488     int size = 1 << d_ot;
2489     int i;
2490 
2491     for (i = 0; i < 8; i++) {
2492         tcg_gen_addi_tl(s->A0, cpu_regs[R_ESP], (i - 8) * size);
2493         gen_lea_v_seg(s, s_ot, s->A0, R_SS, -1);
2494         gen_op_st_v(s, d_ot, cpu_regs[7 - i], s->A0);
2495     }
2496 
2497     gen_stack_update(s, -8 * size);
2498 }
2499 
2500 static void gen_popa(DisasContext *s)
2501 {
2502     MemOp s_ot = SS32(s) ? MO_32 : MO_16;
2503     MemOp d_ot = s->dflag;
2504     int size = 1 << d_ot;
2505     int i;
2506 
2507     for (i = 0; i < 8; i++) {
2508         /* ESP is not reloaded */
2509         if (7 - i == R_ESP) {
2510             continue;
2511         }
2512         tcg_gen_addi_tl(s->A0, cpu_regs[R_ESP], i * size);
2513         gen_lea_v_seg(s, s_ot, s->A0, R_SS, -1);
2514         gen_op_ld_v(s, d_ot, s->T0, s->A0);
2515         gen_op_mov_reg_v(s, d_ot, 7 - i, s->T0);
2516     }
2517 
2518     gen_stack_update(s, 8 * size);
2519 }
2520 
2521 static void gen_enter(DisasContext *s, int esp_addend, int level)
2522 {
2523     MemOp d_ot = mo_pushpop(s, s->dflag);
2524     MemOp a_ot = CODE64(s) ? MO_64 : SS32(s) ? MO_32 : MO_16;
2525     int size = 1 << d_ot;
2526 
2527     /* Push BP; compute FrameTemp into T1.  */
2528     tcg_gen_subi_tl(s->T1, cpu_regs[R_ESP], size);
2529     gen_lea_v_seg(s, a_ot, s->T1, R_SS, -1);
2530     gen_op_st_v(s, d_ot, cpu_regs[R_EBP], s->A0);
2531 
2532     level &= 31;
2533     if (level != 0) {
2534         int i;
2535 
2536         /* Copy level-1 pointers from the previous frame.  */
2537         for (i = 1; i < level; ++i) {
2538             tcg_gen_subi_tl(s->A0, cpu_regs[R_EBP], size * i);
2539             gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2540             gen_op_ld_v(s, d_ot, s->tmp0, s->A0);
2541 
2542             tcg_gen_subi_tl(s->A0, s->T1, size * i);
2543             gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2544             gen_op_st_v(s, d_ot, s->tmp0, s->A0);
2545         }
2546 
2547         /* Push the current FrameTemp as the last level.  */
2548         tcg_gen_subi_tl(s->A0, s->T1, size * level);
2549         gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2550         gen_op_st_v(s, d_ot, s->T1, s->A0);
2551     }
2552 
2553     /* Copy the FrameTemp value to EBP.  */
2554     gen_op_mov_reg_v(s, a_ot, R_EBP, s->T1);
2555 
2556     /* Compute the final value of ESP.  */
2557     tcg_gen_subi_tl(s->T1, s->T1, esp_addend + size * level);
2558     gen_op_mov_reg_v(s, a_ot, R_ESP, s->T1);
2559 }
2560 
2561 static void gen_leave(DisasContext *s)
2562 {
2563     MemOp d_ot = mo_pushpop(s, s->dflag);
2564     MemOp a_ot = mo_stacksize(s);
2565 
2566     gen_lea_v_seg(s, a_ot, cpu_regs[R_EBP], R_SS, -1);
2567     gen_op_ld_v(s, d_ot, s->T0, s->A0);
2568 
2569     tcg_gen_addi_tl(s->T1, cpu_regs[R_EBP], 1 << d_ot);
2570 
2571     gen_op_mov_reg_v(s, d_ot, R_EBP, s->T0);
2572     gen_op_mov_reg_v(s, a_ot, R_ESP, s->T1);
2573 }
2574 
2575 /* Similarly, except that the assumption here is that we don't decode
2576    the instruction at all -- either a missing opcode, an unimplemented
2577    feature, or just a bogus instruction stream.  */
2578 static void gen_unknown_opcode(CPUX86State *env, DisasContext *s)
2579 {
2580     gen_illegal_opcode(s);
2581 
2582     if (qemu_loglevel_mask(LOG_UNIMP)) {
2583         FILE *logfile = qemu_log_lock();
2584         target_ulong pc = s->pc_start, end = s->pc;
2585 
2586         qemu_log("ILLOPC: " TARGET_FMT_lx ":", pc);
2587         for (; pc < end; ++pc) {
2588             qemu_log(" %02x", cpu_ldub_code(env, pc));
2589         }
2590         qemu_log("\n");
2591         qemu_log_unlock(logfile);
2592     }
2593 }
2594 
2595 /* an interrupt is different from an exception because of the
2596    privilege checks */
2597 static void gen_interrupt(DisasContext *s, int intno,
2598                           target_ulong cur_eip, target_ulong next_eip)
2599 {
2600     gen_update_cc_op(s);
2601     gen_jmp_im(s, cur_eip);
2602     gen_helper_raise_interrupt(cpu_env, tcg_const_i32(intno),
2603                                tcg_const_i32(next_eip - cur_eip));
2604     s->base.is_jmp = DISAS_NORETURN;
2605 }
2606 
2607 static void gen_set_hflag(DisasContext *s, uint32_t mask)
2608 {
2609     if ((s->flags & mask) == 0) {
2610         TCGv_i32 t = tcg_temp_new_i32();
2611         tcg_gen_ld_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2612         tcg_gen_ori_i32(t, t, mask);
2613         tcg_gen_st_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2614         tcg_temp_free_i32(t);
2615         s->flags |= mask;
2616     }
2617 }
2618 
2619 static void gen_reset_hflag(DisasContext *s, uint32_t mask)
2620 {
2621     if (s->flags & mask) {
2622         TCGv_i32 t = tcg_temp_new_i32();
2623         tcg_gen_ld_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2624         tcg_gen_andi_i32(t, t, ~mask);
2625         tcg_gen_st_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2626         tcg_temp_free_i32(t);
2627         s->flags &= ~mask;
2628     }
2629 }
2630 
2631 /* Clear BND registers during legacy branches.  */
2632 static void gen_bnd_jmp(DisasContext *s)
2633 {
2634     /* Clear the registers only if BND prefix is missing, MPX is enabled,
2635        and if the BNDREGs are known to be in use (non-zero) already.
2636        The helper itself will check BNDPRESERVE at runtime.  */
2637     if ((s->prefix & PREFIX_REPNZ) == 0
2638         && (s->flags & HF_MPX_EN_MASK) != 0
2639         && (s->flags & HF_MPX_IU_MASK) != 0) {
2640         gen_helper_bnd_jmp(cpu_env);
2641     }
2642 }
2643 
2644 /* Generate an end of block. Trace exception is also generated if needed.
2645    If INHIBIT, set HF_INHIBIT_IRQ_MASK if it isn't already set.
2646    If RECHECK_TF, emit a rechecking helper for #DB, ignoring the state of
2647    S->TF.  This is used by the syscall/sysret insns.  */
2648 static void
2649 do_gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf, bool jr)
2650 {
2651     gen_update_cc_op(s);
2652 
2653     /* If several instructions disable interrupts, only the first does it.  */
2654     if (inhibit && !(s->flags & HF_INHIBIT_IRQ_MASK)) {
2655         gen_set_hflag(s, HF_INHIBIT_IRQ_MASK);
2656     } else {
2657         gen_reset_hflag(s, HF_INHIBIT_IRQ_MASK);
2658     }
2659 
2660     if (s->base.tb->flags & HF_RF_MASK) {
2661         gen_helper_reset_rf(cpu_env);
2662     }
2663     if (s->base.singlestep_enabled) {
2664         gen_helper_debug(cpu_env);
2665     } else if (recheck_tf) {
2666         gen_helper_rechecking_single_step(cpu_env);
2667         tcg_gen_exit_tb(NULL, 0);
2668     } else if (s->flags & HF_TF_MASK) {
2669         gen_helper_single_step(cpu_env);
2670     } else if (jr) {
2671         tcg_gen_lookup_and_goto_ptr();
2672     } else {
2673         tcg_gen_exit_tb(NULL, 0);
2674     }
2675     s->base.is_jmp = DISAS_NORETURN;
2676 }
2677 
2678 static inline void
2679 gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf)
2680 {
2681     do_gen_eob_worker(s, inhibit, recheck_tf, false);
2682 }
2683 
2684 /* End of block.
2685    If INHIBIT, set HF_INHIBIT_IRQ_MASK if it isn't already set.  */
2686 static void gen_eob_inhibit_irq(DisasContext *s, bool inhibit)
2687 {
2688     gen_eob_worker(s, inhibit, false);
2689 }
2690 
2691 /* End of block, resetting the inhibit irq flag.  */
2692 static void gen_eob(DisasContext *s)
2693 {
2694     gen_eob_worker(s, false, false);
2695 }
2696 
2697 /* Jump to register */
2698 static void gen_jr(DisasContext *s, TCGv dest)
2699 {
2700     do_gen_eob_worker(s, false, false, true);
2701 }
2702 
2703 /* generate a jump to eip. No segment change must happen before as a
2704    direct call to the next block may occur */
2705 static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num)
2706 {
2707     gen_update_cc_op(s);
2708     set_cc_op(s, CC_OP_DYNAMIC);
2709     if (s->jmp_opt) {
2710         gen_goto_tb(s, tb_num, eip);
2711     } else {
2712         gen_jmp_im(s, eip);
2713         gen_eob(s);
2714     }
2715 }
2716 
2717 static void gen_jmp(DisasContext *s, target_ulong eip)
2718 {
2719     gen_jmp_tb(s, eip, 0);
2720 }
2721 
2722 static inline void gen_ldq_env_A0(DisasContext *s, int offset)
2723 {
2724     tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ);
2725     tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset);
2726 }
2727 
2728 static inline void gen_stq_env_A0(DisasContext *s, int offset)
2729 {
2730     tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset);
2731     tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ);
2732 }
2733 
2734 static inline void gen_ldo_env_A0(DisasContext *s, int offset)
2735 {
2736     int mem_index = s->mem_index;
2737     tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, mem_index, MO_LEQ);
2738     tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
2739     tcg_gen_addi_tl(s->tmp0, s->A0, 8);
2740     tcg_gen_qemu_ld_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEQ);
2741     tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
2742 }
2743 
2744 static inline void gen_sto_env_A0(DisasContext *s, int offset)
2745 {
2746     int mem_index = s->mem_index;
2747     tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
2748     tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, mem_index, MO_LEQ);
2749     tcg_gen_addi_tl(s->tmp0, s->A0, 8);
2750     tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
2751     tcg_gen_qemu_st_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEQ);
2752 }
2753 
2754 static inline void gen_op_movo(DisasContext *s, int d_offset, int s_offset)
2755 {
2756     tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(0)));
2757     tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(0)));
2758     tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(1)));
2759     tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(1)));
2760 }
2761 
2762 static inline void gen_op_movq(DisasContext *s, int d_offset, int s_offset)
2763 {
2764     tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset);
2765     tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset);
2766 }
2767 
2768 static inline void gen_op_movl(DisasContext *s, int d_offset, int s_offset)
2769 {
2770     tcg_gen_ld_i32(s->tmp2_i32, cpu_env, s_offset);
2771     tcg_gen_st_i32(s->tmp2_i32, cpu_env, d_offset);
2772 }
2773 
2774 static inline void gen_op_movq_env_0(DisasContext *s, int d_offset)
2775 {
2776     tcg_gen_movi_i64(s->tmp1_i64, 0);
2777     tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset);
2778 }
2779 
2780 typedef void (*SSEFunc_i_ep)(TCGv_i32 val, TCGv_ptr env, TCGv_ptr reg);
2781 typedef void (*SSEFunc_l_ep)(TCGv_i64 val, TCGv_ptr env, TCGv_ptr reg);
2782 typedef void (*SSEFunc_0_epi)(TCGv_ptr env, TCGv_ptr reg, TCGv_i32 val);
2783 typedef void (*SSEFunc_0_epl)(TCGv_ptr env, TCGv_ptr reg, TCGv_i64 val);
2784 typedef void (*SSEFunc_0_epp)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b);
2785 typedef void (*SSEFunc_0_eppi)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b,
2786                                TCGv_i32 val);
2787 typedef void (*SSEFunc_0_ppi)(TCGv_ptr reg_a, TCGv_ptr reg_b, TCGv_i32 val);
2788 typedef void (*SSEFunc_0_eppt)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b,
2789                                TCGv val);
2790 
2791 #define SSE_SPECIAL ((void *)1)
2792 #define SSE_DUMMY ((void *)2)
2793 
2794 #define MMX_OP2(x) { gen_helper_ ## x ## _mmx, gen_helper_ ## x ## _xmm }
2795 #define SSE_FOP(x) { gen_helper_ ## x ## ps, gen_helper_ ## x ## pd, \
2796                      gen_helper_ ## x ## ss, gen_helper_ ## x ## sd, }
2797 
2798 static const SSEFunc_0_epp sse_op_table1[256][4] = {
2799     /* 3DNow! extensions */
2800     [0x0e] = { SSE_DUMMY }, /* femms */
2801     [0x0f] = { SSE_DUMMY }, /* pf... */
2802     /* pure SSE operations */
2803     [0x10] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
2804     [0x11] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
2805     [0x12] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movlps, movlpd, movsldup, movddup */
2806     [0x13] = { SSE_SPECIAL, SSE_SPECIAL },  /* movlps, movlpd */
2807     [0x14] = { gen_helper_punpckldq_xmm, gen_helper_punpcklqdq_xmm },
2808     [0x15] = { gen_helper_punpckhdq_xmm, gen_helper_punpckhqdq_xmm },
2809     [0x16] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },  /* movhps, movhpd, movshdup */
2810     [0x17] = { SSE_SPECIAL, SSE_SPECIAL },  /* movhps, movhpd */
2811 
2812     [0x28] = { SSE_SPECIAL, SSE_SPECIAL },  /* movaps, movapd */
2813     [0x29] = { SSE_SPECIAL, SSE_SPECIAL },  /* movaps, movapd */
2814     [0x2a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtpi2ps, cvtpi2pd, cvtsi2ss, cvtsi2sd */
2815     [0x2b] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movntps, movntpd, movntss, movntsd */
2816     [0x2c] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvttps2pi, cvttpd2pi, cvttsd2si, cvttss2si */
2817     [0x2d] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtps2pi, cvtpd2pi, cvtsd2si, cvtss2si */
2818     [0x2e] = { gen_helper_ucomiss, gen_helper_ucomisd },
2819     [0x2f] = { gen_helper_comiss, gen_helper_comisd },
2820     [0x50] = { SSE_SPECIAL, SSE_SPECIAL }, /* movmskps, movmskpd */
2821     [0x51] = SSE_FOP(sqrt),
2822     [0x52] = { gen_helper_rsqrtps, NULL, gen_helper_rsqrtss, NULL },
2823     [0x53] = { gen_helper_rcpps, NULL, gen_helper_rcpss, NULL },
2824     [0x54] = { gen_helper_pand_xmm, gen_helper_pand_xmm }, /* andps, andpd */
2825     [0x55] = { gen_helper_pandn_xmm, gen_helper_pandn_xmm }, /* andnps, andnpd */
2826     [0x56] = { gen_helper_por_xmm, gen_helper_por_xmm }, /* orps, orpd */
2827     [0x57] = { gen_helper_pxor_xmm, gen_helper_pxor_xmm }, /* xorps, xorpd */
2828     [0x58] = SSE_FOP(add),
2829     [0x59] = SSE_FOP(mul),
2830     [0x5a] = { gen_helper_cvtps2pd, gen_helper_cvtpd2ps,
2831                gen_helper_cvtss2sd, gen_helper_cvtsd2ss },
2832     [0x5b] = { gen_helper_cvtdq2ps, gen_helper_cvtps2dq, gen_helper_cvttps2dq },
2833     [0x5c] = SSE_FOP(sub),
2834     [0x5d] = SSE_FOP(min),
2835     [0x5e] = SSE_FOP(div),
2836     [0x5f] = SSE_FOP(max),
2837 
2838     [0xc2] = SSE_FOP(cmpeq),
2839     [0xc6] = { (SSEFunc_0_epp)gen_helper_shufps,
2840                (SSEFunc_0_epp)gen_helper_shufpd }, /* XXX: casts */
2841 
2842     /* SSSE3, SSE4, MOVBE, CRC32, BMI1, BMI2, ADX.  */
2843     [0x38] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2844     [0x3a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2845 
2846     /* MMX ops and their SSE extensions */
2847     [0x60] = MMX_OP2(punpcklbw),
2848     [0x61] = MMX_OP2(punpcklwd),
2849     [0x62] = MMX_OP2(punpckldq),
2850     [0x63] = MMX_OP2(packsswb),
2851     [0x64] = MMX_OP2(pcmpgtb),
2852     [0x65] = MMX_OP2(pcmpgtw),
2853     [0x66] = MMX_OP2(pcmpgtl),
2854     [0x67] = MMX_OP2(packuswb),
2855     [0x68] = MMX_OP2(punpckhbw),
2856     [0x69] = MMX_OP2(punpckhwd),
2857     [0x6a] = MMX_OP2(punpckhdq),
2858     [0x6b] = MMX_OP2(packssdw),
2859     [0x6c] = { NULL, gen_helper_punpcklqdq_xmm },
2860     [0x6d] = { NULL, gen_helper_punpckhqdq_xmm },
2861     [0x6e] = { SSE_SPECIAL, SSE_SPECIAL }, /* movd mm, ea */
2862     [0x6f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, , movqdu */
2863     [0x70] = { (SSEFunc_0_epp)gen_helper_pshufw_mmx,
2864                (SSEFunc_0_epp)gen_helper_pshufd_xmm,
2865                (SSEFunc_0_epp)gen_helper_pshufhw_xmm,
2866                (SSEFunc_0_epp)gen_helper_pshuflw_xmm }, /* XXX: casts */
2867     [0x71] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftw */
2868     [0x72] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftd */
2869     [0x73] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftq */
2870     [0x74] = MMX_OP2(pcmpeqb),
2871     [0x75] = MMX_OP2(pcmpeqw),
2872     [0x76] = MMX_OP2(pcmpeql),
2873     [0x77] = { SSE_DUMMY }, /* emms */
2874     [0x78] = { NULL, SSE_SPECIAL, NULL, SSE_SPECIAL }, /* extrq_i, insertq_i */
2875     [0x79] = { NULL, gen_helper_extrq_r, NULL, gen_helper_insertq_r },
2876     [0x7c] = { NULL, gen_helper_haddpd, NULL, gen_helper_haddps },
2877     [0x7d] = { NULL, gen_helper_hsubpd, NULL, gen_helper_hsubps },
2878     [0x7e] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movd, movd, , movq */
2879     [0x7f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, movdqu */
2880     [0xc4] = { SSE_SPECIAL, SSE_SPECIAL }, /* pinsrw */
2881     [0xc5] = { SSE_SPECIAL, SSE_SPECIAL }, /* pextrw */
2882     [0xd0] = { NULL, gen_helper_addsubpd, NULL, gen_helper_addsubps },
2883     [0xd1] = MMX_OP2(psrlw),
2884     [0xd2] = MMX_OP2(psrld),
2885     [0xd3] = MMX_OP2(psrlq),
2886     [0xd4] = MMX_OP2(paddq),
2887     [0xd5] = MMX_OP2(pmullw),
2888     [0xd6] = { NULL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2889     [0xd7] = { SSE_SPECIAL, SSE_SPECIAL }, /* pmovmskb */
2890     [0xd8] = MMX_OP2(psubusb),
2891     [0xd9] = MMX_OP2(psubusw),
2892     [0xda] = MMX_OP2(pminub),
2893     [0xdb] = MMX_OP2(pand),
2894     [0xdc] = MMX_OP2(paddusb),
2895     [0xdd] = MMX_OP2(paddusw),
2896     [0xde] = MMX_OP2(pmaxub),
2897     [0xdf] = MMX_OP2(pandn),
2898     [0xe0] = MMX_OP2(pavgb),
2899     [0xe1] = MMX_OP2(psraw),
2900     [0xe2] = MMX_OP2(psrad),
2901     [0xe3] = MMX_OP2(pavgw),
2902     [0xe4] = MMX_OP2(pmulhuw),
2903     [0xe5] = MMX_OP2(pmulhw),
2904     [0xe6] = { NULL, gen_helper_cvttpd2dq, gen_helper_cvtdq2pd, gen_helper_cvtpd2dq },
2905     [0xe7] = { SSE_SPECIAL , SSE_SPECIAL },  /* movntq, movntq */
2906     [0xe8] = MMX_OP2(psubsb),
2907     [0xe9] = MMX_OP2(psubsw),
2908     [0xea] = MMX_OP2(pminsw),
2909     [0xeb] = MMX_OP2(por),
2910     [0xec] = MMX_OP2(paddsb),
2911     [0xed] = MMX_OP2(paddsw),
2912     [0xee] = MMX_OP2(pmaxsw),
2913     [0xef] = MMX_OP2(pxor),
2914     [0xf0] = { NULL, NULL, NULL, SSE_SPECIAL }, /* lddqu */
2915     [0xf1] = MMX_OP2(psllw),
2916     [0xf2] = MMX_OP2(pslld),
2917     [0xf3] = MMX_OP2(psllq),
2918     [0xf4] = MMX_OP2(pmuludq),
2919     [0xf5] = MMX_OP2(pmaddwd),
2920     [0xf6] = MMX_OP2(psadbw),
2921     [0xf7] = { (SSEFunc_0_epp)gen_helper_maskmov_mmx,
2922                (SSEFunc_0_epp)gen_helper_maskmov_xmm }, /* XXX: casts */
2923     [0xf8] = MMX_OP2(psubb),
2924     [0xf9] = MMX_OP2(psubw),
2925     [0xfa] = MMX_OP2(psubl),
2926     [0xfb] = MMX_OP2(psubq),
2927     [0xfc] = MMX_OP2(paddb),
2928     [0xfd] = MMX_OP2(paddw),
2929     [0xfe] = MMX_OP2(paddl),
2930 };
2931 
2932 static const SSEFunc_0_epp sse_op_table2[3 * 8][2] = {
2933     [0 + 2] = MMX_OP2(psrlw),
2934     [0 + 4] = MMX_OP2(psraw),
2935     [0 + 6] = MMX_OP2(psllw),
2936     [8 + 2] = MMX_OP2(psrld),
2937     [8 + 4] = MMX_OP2(psrad),
2938     [8 + 6] = MMX_OP2(pslld),
2939     [16 + 2] = MMX_OP2(psrlq),
2940     [16 + 3] = { NULL, gen_helper_psrldq_xmm },
2941     [16 + 6] = MMX_OP2(psllq),
2942     [16 + 7] = { NULL, gen_helper_pslldq_xmm },
2943 };
2944 
2945 static const SSEFunc_0_epi sse_op_table3ai[] = {
2946     gen_helper_cvtsi2ss,
2947     gen_helper_cvtsi2sd
2948 };
2949 
2950 #ifdef TARGET_X86_64
2951 static const SSEFunc_0_epl sse_op_table3aq[] = {
2952     gen_helper_cvtsq2ss,
2953     gen_helper_cvtsq2sd
2954 };
2955 #endif
2956 
2957 static const SSEFunc_i_ep sse_op_table3bi[] = {
2958     gen_helper_cvttss2si,
2959     gen_helper_cvtss2si,
2960     gen_helper_cvttsd2si,
2961     gen_helper_cvtsd2si
2962 };
2963 
2964 #ifdef TARGET_X86_64
2965 static const SSEFunc_l_ep sse_op_table3bq[] = {
2966     gen_helper_cvttss2sq,
2967     gen_helper_cvtss2sq,
2968     gen_helper_cvttsd2sq,
2969     gen_helper_cvtsd2sq
2970 };
2971 #endif
2972 
2973 static const SSEFunc_0_epp sse_op_table4[8][4] = {
2974     SSE_FOP(cmpeq),
2975     SSE_FOP(cmplt),
2976     SSE_FOP(cmple),
2977     SSE_FOP(cmpunord),
2978     SSE_FOP(cmpneq),
2979     SSE_FOP(cmpnlt),
2980     SSE_FOP(cmpnle),
2981     SSE_FOP(cmpord),
2982 };
2983 
2984 static const SSEFunc_0_epp sse_op_table5[256] = {
2985     [0x0c] = gen_helper_pi2fw,
2986     [0x0d] = gen_helper_pi2fd,
2987     [0x1c] = gen_helper_pf2iw,
2988     [0x1d] = gen_helper_pf2id,
2989     [0x8a] = gen_helper_pfnacc,
2990     [0x8e] = gen_helper_pfpnacc,
2991     [0x90] = gen_helper_pfcmpge,
2992     [0x94] = gen_helper_pfmin,
2993     [0x96] = gen_helper_pfrcp,
2994     [0x97] = gen_helper_pfrsqrt,
2995     [0x9a] = gen_helper_pfsub,
2996     [0x9e] = gen_helper_pfadd,
2997     [0xa0] = gen_helper_pfcmpgt,
2998     [0xa4] = gen_helper_pfmax,
2999     [0xa6] = gen_helper_movq, /* pfrcpit1; no need to actually increase precision */
3000     [0xa7] = gen_helper_movq, /* pfrsqit1 */
3001     [0xaa] = gen_helper_pfsubr,
3002     [0xae] = gen_helper_pfacc,
3003     [0xb0] = gen_helper_pfcmpeq,
3004     [0xb4] = gen_helper_pfmul,
3005     [0xb6] = gen_helper_movq, /* pfrcpit2 */
3006     [0xb7] = gen_helper_pmulhrw_mmx,
3007     [0xbb] = gen_helper_pswapd,
3008     [0xbf] = gen_helper_pavgb_mmx /* pavgusb */
3009 };
3010 
3011 struct SSEOpHelper_epp {
3012     SSEFunc_0_epp op[2];
3013     uint32_t ext_mask;
3014 };
3015 
3016 struct SSEOpHelper_eppi {
3017     SSEFunc_0_eppi op[2];
3018     uint32_t ext_mask;
3019 };
3020 
3021 #define SSSE3_OP(x) { MMX_OP2(x), CPUID_EXT_SSSE3 }
3022 #define SSE41_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE41 }
3023 #define SSE42_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE42 }
3024 #define SSE41_SPECIAL { { NULL, SSE_SPECIAL }, CPUID_EXT_SSE41 }
3025 #define PCLMULQDQ_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, \
3026         CPUID_EXT_PCLMULQDQ }
3027 #define AESNI_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_AES }
3028 
3029 static const struct SSEOpHelper_epp sse_op_table6[256] = {
3030     [0x00] = SSSE3_OP(pshufb),
3031     [0x01] = SSSE3_OP(phaddw),
3032     [0x02] = SSSE3_OP(phaddd),
3033     [0x03] = SSSE3_OP(phaddsw),
3034     [0x04] = SSSE3_OP(pmaddubsw),
3035     [0x05] = SSSE3_OP(phsubw),
3036     [0x06] = SSSE3_OP(phsubd),
3037     [0x07] = SSSE3_OP(phsubsw),
3038     [0x08] = SSSE3_OP(psignb),
3039     [0x09] = SSSE3_OP(psignw),
3040     [0x0a] = SSSE3_OP(psignd),
3041     [0x0b] = SSSE3_OP(pmulhrsw),
3042     [0x10] = SSE41_OP(pblendvb),
3043     [0x14] = SSE41_OP(blendvps),
3044     [0x15] = SSE41_OP(blendvpd),
3045     [0x17] = SSE41_OP(ptest),
3046     [0x1c] = SSSE3_OP(pabsb),
3047     [0x1d] = SSSE3_OP(pabsw),
3048     [0x1e] = SSSE3_OP(pabsd),
3049     [0x20] = SSE41_OP(pmovsxbw),
3050     [0x21] = SSE41_OP(pmovsxbd),
3051     [0x22] = SSE41_OP(pmovsxbq),
3052     [0x23] = SSE41_OP(pmovsxwd),
3053     [0x24] = SSE41_OP(pmovsxwq),
3054     [0x25] = SSE41_OP(pmovsxdq),
3055     [0x28] = SSE41_OP(pmuldq),
3056     [0x29] = SSE41_OP(pcmpeqq),
3057     [0x2a] = SSE41_SPECIAL, /* movntqda */
3058     [0x2b] = SSE41_OP(packusdw),
3059     [0x30] = SSE41_OP(pmovzxbw),
3060     [0x31] = SSE41_OP(pmovzxbd),
3061     [0x32] = SSE41_OP(pmovzxbq),
3062     [0x33] = SSE41_OP(pmovzxwd),
3063     [0x34] = SSE41_OP(pmovzxwq),
3064     [0x35] = SSE41_OP(pmovzxdq),
3065     [0x37] = SSE42_OP(pcmpgtq),
3066     [0x38] = SSE41_OP(pminsb),
3067     [0x39] = SSE41_OP(pminsd),
3068     [0x3a] = SSE41_OP(pminuw),
3069     [0x3b] = SSE41_OP(pminud),
3070     [0x3c] = SSE41_OP(pmaxsb),
3071     [0x3d] = SSE41_OP(pmaxsd),
3072     [0x3e] = SSE41_OP(pmaxuw),
3073     [0x3f] = SSE41_OP(pmaxud),
3074     [0x40] = SSE41_OP(pmulld),
3075     [0x41] = SSE41_OP(phminposuw),
3076     [0xdb] = AESNI_OP(aesimc),
3077     [0xdc] = AESNI_OP(aesenc),
3078     [0xdd] = AESNI_OP(aesenclast),
3079     [0xde] = AESNI_OP(aesdec),
3080     [0xdf] = AESNI_OP(aesdeclast),
3081 };
3082 
3083 static const struct SSEOpHelper_eppi sse_op_table7[256] = {
3084     [0x08] = SSE41_OP(roundps),
3085     [0x09] = SSE41_OP(roundpd),
3086     [0x0a] = SSE41_OP(roundss),
3087     [0x0b] = SSE41_OP(roundsd),
3088     [0x0c] = SSE41_OP(blendps),
3089     [0x0d] = SSE41_OP(blendpd),
3090     [0x0e] = SSE41_OP(pblendw),
3091     [0x0f] = SSSE3_OP(palignr),
3092     [0x14] = SSE41_SPECIAL, /* pextrb */
3093     [0x15] = SSE41_SPECIAL, /* pextrw */
3094     [0x16] = SSE41_SPECIAL, /* pextrd/pextrq */
3095     [0x17] = SSE41_SPECIAL, /* extractps */
3096     [0x20] = SSE41_SPECIAL, /* pinsrb */
3097     [0x21] = SSE41_SPECIAL, /* insertps */
3098     [0x22] = SSE41_SPECIAL, /* pinsrd/pinsrq */
3099     [0x40] = SSE41_OP(dpps),
3100     [0x41] = SSE41_OP(dppd),
3101     [0x42] = SSE41_OP(mpsadbw),
3102     [0x44] = PCLMULQDQ_OP(pclmulqdq),
3103     [0x60] = SSE42_OP(pcmpestrm),
3104     [0x61] = SSE42_OP(pcmpestri),
3105     [0x62] = SSE42_OP(pcmpistrm),
3106     [0x63] = SSE42_OP(pcmpistri),
3107     [0xdf] = AESNI_OP(aeskeygenassist),
3108 };
3109 
3110 static void gen_sse(CPUX86State *env, DisasContext *s, int b,
3111                     target_ulong pc_start)
3112 {
3113     int b1, op1_offset, op2_offset, is_xmm, val;
3114     int modrm, mod, rm, reg;
3115     SSEFunc_0_epp sse_fn_epp;
3116     SSEFunc_0_eppi sse_fn_eppi;
3117     SSEFunc_0_ppi sse_fn_ppi;
3118     SSEFunc_0_eppt sse_fn_eppt;
3119     MemOp ot;
3120 
3121     b &= 0xff;
3122     if (s->prefix & PREFIX_DATA)
3123         b1 = 1;
3124     else if (s->prefix & PREFIX_REPZ)
3125         b1 = 2;
3126     else if (s->prefix & PREFIX_REPNZ)
3127         b1 = 3;
3128     else
3129         b1 = 0;
3130     sse_fn_epp = sse_op_table1[b][b1];
3131     if (!sse_fn_epp) {
3132         goto unknown_op;
3133     }
3134     if ((b <= 0x5f && b >= 0x10) || b == 0xc6 || b == 0xc2) {
3135         is_xmm = 1;
3136     } else {
3137         if (b1 == 0) {
3138             /* MMX case */
3139             is_xmm = 0;
3140         } else {
3141             is_xmm = 1;
3142         }
3143     }
3144     /* simple MMX/SSE operation */
3145     if (s->flags & HF_TS_MASK) {
3146         gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
3147         return;
3148     }
3149     if (s->flags & HF_EM_MASK) {
3150     illegal_op:
3151         gen_illegal_opcode(s);
3152         return;
3153     }
3154     if (is_xmm
3155         && !(s->flags & HF_OSFXSR_MASK)
3156         && (b != 0x38 && b != 0x3a)) {
3157         goto unknown_op;
3158     }
3159     if (b == 0x0e) {
3160         if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) {
3161             /* If we were fully decoding this we might use illegal_op.  */
3162             goto unknown_op;
3163         }
3164         /* femms */
3165         gen_helper_emms(cpu_env);
3166         return;
3167     }
3168     if (b == 0x77) {
3169         /* emms */
3170         gen_helper_emms(cpu_env);
3171         return;
3172     }
3173     /* prepare MMX state (XXX: optimize by storing fptt and fptags in
3174        the static cpu state) */
3175     if (!is_xmm) {
3176         gen_helper_enter_mmx(cpu_env);
3177     }
3178 
3179     modrm = x86_ldub_code(env, s);
3180     reg = ((modrm >> 3) & 7);
3181     if (is_xmm) {
3182         reg |= REX_R(s);
3183     }
3184     mod = (modrm >> 6) & 3;
3185     if (sse_fn_epp == SSE_SPECIAL) {
3186         b |= (b1 << 8);
3187         switch(b) {
3188         case 0x0e7: /* movntq */
3189             if (mod == 3) {
3190                 goto illegal_op;
3191             }
3192             gen_lea_modrm(env, s, modrm);
3193             gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3194             break;
3195         case 0x1e7: /* movntdq */
3196         case 0x02b: /* movntps */
3197         case 0x12b: /* movntps */
3198             if (mod == 3)
3199                 goto illegal_op;
3200             gen_lea_modrm(env, s, modrm);
3201             gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3202             break;
3203         case 0x3f0: /* lddqu */
3204             if (mod == 3)
3205                 goto illegal_op;
3206             gen_lea_modrm(env, s, modrm);
3207             gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3208             break;
3209         case 0x22b: /* movntss */
3210         case 0x32b: /* movntsd */
3211             if (mod == 3)
3212                 goto illegal_op;
3213             gen_lea_modrm(env, s, modrm);
3214             if (b1 & 1) {
3215                 gen_stq_env_A0(s, offsetof(CPUX86State,
3216                                            xmm_regs[reg].ZMM_Q(0)));
3217             } else {
3218                 tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State,
3219                     xmm_regs[reg].ZMM_L(0)));
3220                 gen_op_st_v(s, MO_32, s->T0, s->A0);
3221             }
3222             break;
3223         case 0x6e: /* movd mm, ea */
3224 #ifdef TARGET_X86_64
3225             if (s->dflag == MO_64) {
3226                 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
3227                 tcg_gen_st_tl(s->T0, cpu_env,
3228                               offsetof(CPUX86State, fpregs[reg].mmx));
3229             } else
3230 #endif
3231             {
3232                 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
3233                 tcg_gen_addi_ptr(s->ptr0, cpu_env,
3234                                  offsetof(CPUX86State,fpregs[reg].mmx));
3235                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
3236                 gen_helper_movl_mm_T0_mmx(s->ptr0, s->tmp2_i32);
3237             }
3238             break;
3239         case 0x16e: /* movd xmm, ea */
3240 #ifdef TARGET_X86_64
3241             if (s->dflag == MO_64) {
3242                 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
3243                 tcg_gen_addi_ptr(s->ptr0, cpu_env,
3244                                  offsetof(CPUX86State,xmm_regs[reg]));
3245                 gen_helper_movq_mm_T0_xmm(s->ptr0, s->T0);
3246             } else
3247 #endif
3248             {
3249                 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
3250                 tcg_gen_addi_ptr(s->ptr0, cpu_env,
3251                                  offsetof(CPUX86State,xmm_regs[reg]));
3252                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
3253                 gen_helper_movl_mm_T0_xmm(s->ptr0, s->tmp2_i32);
3254             }
3255             break;
3256         case 0x6f: /* movq mm, ea */
3257             if (mod != 3) {
3258                 gen_lea_modrm(env, s, modrm);
3259                 gen_ldq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3260             } else {
3261                 rm = (modrm & 7);
3262                 tcg_gen_ld_i64(s->tmp1_i64, cpu_env,
3263                                offsetof(CPUX86State,fpregs[rm].mmx));
3264                 tcg_gen_st_i64(s->tmp1_i64, cpu_env,
3265                                offsetof(CPUX86State,fpregs[reg].mmx));
3266             }
3267             break;
3268         case 0x010: /* movups */
3269         case 0x110: /* movupd */
3270         case 0x028: /* movaps */
3271         case 0x128: /* movapd */
3272         case 0x16f: /* movdqa xmm, ea */
3273         case 0x26f: /* movdqu xmm, ea */
3274             if (mod != 3) {
3275                 gen_lea_modrm(env, s, modrm);
3276                 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3277             } else {
3278                 rm = (modrm & 7) | REX_B(s);
3279                 gen_op_movo(s, offsetof(CPUX86State, xmm_regs[reg]),
3280                             offsetof(CPUX86State,xmm_regs[rm]));
3281             }
3282             break;
3283         case 0x210: /* movss xmm, ea */
3284             if (mod != 3) {
3285                 gen_lea_modrm(env, s, modrm);
3286                 gen_op_ld_v(s, MO_32, s->T0, s->A0);
3287                 tcg_gen_st32_tl(s->T0, cpu_env,
3288                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)));
3289                 tcg_gen_movi_tl(s->T0, 0);
3290                 tcg_gen_st32_tl(s->T0, cpu_env,
3291                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1)));
3292                 tcg_gen_st32_tl(s->T0, cpu_env,
3293                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)));
3294                 tcg_gen_st32_tl(s->T0, cpu_env,
3295                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)));
3296             } else {
3297                 rm = (modrm & 7) | REX_B(s);
3298                 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)),
3299                             offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)));
3300             }
3301             break;
3302         case 0x310: /* movsd xmm, ea */
3303             if (mod != 3) {
3304                 gen_lea_modrm(env, s, modrm);
3305                 gen_ldq_env_A0(s, offsetof(CPUX86State,
3306                                            xmm_regs[reg].ZMM_Q(0)));
3307                 tcg_gen_movi_tl(s->T0, 0);
3308                 tcg_gen_st32_tl(s->T0, cpu_env,
3309                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)));
3310                 tcg_gen_st32_tl(s->T0, cpu_env,
3311                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)));
3312             } else {
3313                 rm = (modrm & 7) | REX_B(s);
3314                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3315                             offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3316             }
3317             break;
3318         case 0x012: /* movlps */
3319         case 0x112: /* movlpd */
3320             if (mod != 3) {
3321                 gen_lea_modrm(env, s, modrm);
3322                 gen_ldq_env_A0(s, offsetof(CPUX86State,
3323                                            xmm_regs[reg].ZMM_Q(0)));
3324             } else {
3325                 /* movhlps */
3326                 rm = (modrm & 7) | REX_B(s);
3327                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3328                             offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(1)));
3329             }
3330             break;
3331         case 0x212: /* movsldup */
3332             if (mod != 3) {
3333                 gen_lea_modrm(env, s, modrm);
3334                 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3335             } else {
3336                 rm = (modrm & 7) | REX_B(s);
3337                 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)),
3338                             offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)));
3339                 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)),
3340                             offsetof(CPUX86State,xmm_regs[rm].ZMM_L(2)));
3341             }
3342             gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1)),
3343                         offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3344             gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)),
3345                         offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)));
3346             break;
3347         case 0x312: /* movddup */
3348             if (mod != 3) {
3349                 gen_lea_modrm(env, s, modrm);
3350                 gen_ldq_env_A0(s, offsetof(CPUX86State,
3351                                            xmm_regs[reg].ZMM_Q(0)));
3352             } else {
3353                 rm = (modrm & 7) | REX_B(s);
3354                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3355                             offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3356             }
3357             gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)),
3358                         offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3359             break;
3360         case 0x016: /* movhps */
3361         case 0x116: /* movhpd */
3362             if (mod != 3) {
3363                 gen_lea_modrm(env, s, modrm);
3364                 gen_ldq_env_A0(s, offsetof(CPUX86State,
3365                                            xmm_regs[reg].ZMM_Q(1)));
3366             } else {
3367                 /* movlhps */
3368                 rm = (modrm & 7) | REX_B(s);
3369                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)),
3370                             offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3371             }
3372             break;
3373         case 0x216: /* movshdup */
3374             if (mod != 3) {
3375                 gen_lea_modrm(env, s, modrm);
3376                 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3377             } else {
3378                 rm = (modrm & 7) | REX_B(s);
3379                 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1)),
3380                             offsetof(CPUX86State,xmm_regs[rm].ZMM_L(1)));
3381                 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)),
3382                             offsetof(CPUX86State,xmm_regs[rm].ZMM_L(3)));
3383             }
3384             gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)),
3385                         offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)));
3386             gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)),
3387                         offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)));
3388             break;
3389         case 0x178:
3390         case 0x378:
3391             {
3392                 int bit_index, field_length;
3393 
3394                 if (b1 == 1 && reg != 0)
3395                     goto illegal_op;
3396                 field_length = x86_ldub_code(env, s) & 0x3F;
3397                 bit_index = x86_ldub_code(env, s) & 0x3F;
3398                 tcg_gen_addi_ptr(s->ptr0, cpu_env,
3399                     offsetof(CPUX86State,xmm_regs[reg]));
3400                 if (b1 == 1)
3401                     gen_helper_extrq_i(cpu_env, s->ptr0,
3402                                        tcg_const_i32(bit_index),
3403                                        tcg_const_i32(field_length));
3404                 else
3405                     gen_helper_insertq_i(cpu_env, s->ptr0,
3406                                          tcg_const_i32(bit_index),
3407                                          tcg_const_i32(field_length));
3408             }
3409             break;
3410         case 0x7e: /* movd ea, mm */
3411 #ifdef TARGET_X86_64
3412             if (s->dflag == MO_64) {
3413                 tcg_gen_ld_i64(s->T0, cpu_env,
3414                                offsetof(CPUX86State,fpregs[reg].mmx));
3415                 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
3416             } else
3417 #endif
3418             {
3419                 tcg_gen_ld32u_tl(s->T0, cpu_env,
3420                                  offsetof(CPUX86State,fpregs[reg].mmx.MMX_L(0)));
3421                 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
3422             }
3423             break;
3424         case 0x17e: /* movd ea, xmm */
3425 #ifdef TARGET_X86_64
3426             if (s->dflag == MO_64) {
3427                 tcg_gen_ld_i64(s->T0, cpu_env,
3428                                offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3429                 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
3430             } else
3431 #endif
3432             {
3433                 tcg_gen_ld32u_tl(s->T0, cpu_env,
3434                                  offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3435                 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
3436             }
3437             break;
3438         case 0x27e: /* movq xmm, ea */
3439             if (mod != 3) {
3440                 gen_lea_modrm(env, s, modrm);
3441                 gen_ldq_env_A0(s, offsetof(CPUX86State,
3442                                            xmm_regs[reg].ZMM_Q(0)));
3443             } else {
3444                 rm = (modrm & 7) | REX_B(s);
3445                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3446                             offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3447             }
3448             gen_op_movq_env_0(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)));
3449             break;
3450         case 0x7f: /* movq ea, mm */
3451             if (mod != 3) {
3452                 gen_lea_modrm(env, s, modrm);
3453                 gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3454             } else {
3455                 rm = (modrm & 7);
3456                 gen_op_movq(s, offsetof(CPUX86State, fpregs[rm].mmx),
3457                             offsetof(CPUX86State,fpregs[reg].mmx));
3458             }
3459             break;
3460         case 0x011: /* movups */
3461         case 0x111: /* movupd */
3462         case 0x029: /* movaps */
3463         case 0x129: /* movapd */
3464         case 0x17f: /* movdqa ea, xmm */
3465         case 0x27f: /* movdqu ea, xmm */
3466             if (mod != 3) {
3467                 gen_lea_modrm(env, s, modrm);
3468                 gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3469             } else {
3470                 rm = (modrm & 7) | REX_B(s);
3471                 gen_op_movo(s, offsetof(CPUX86State, xmm_regs[rm]),
3472                             offsetof(CPUX86State,xmm_regs[reg]));
3473             }
3474             break;
3475         case 0x211: /* movss ea, xmm */
3476             if (mod != 3) {
3477                 gen_lea_modrm(env, s, modrm);
3478                 tcg_gen_ld32u_tl(s->T0, cpu_env,
3479                                  offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)));
3480                 gen_op_st_v(s, MO_32, s->T0, s->A0);
3481             } else {
3482                 rm = (modrm & 7) | REX_B(s);
3483                 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[rm].ZMM_L(0)),
3484                             offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3485             }
3486             break;
3487         case 0x311: /* movsd ea, xmm */
3488             if (mod != 3) {
3489                 gen_lea_modrm(env, s, modrm);
3490                 gen_stq_env_A0(s, offsetof(CPUX86State,
3491                                            xmm_regs[reg].ZMM_Q(0)));
3492             } else {
3493                 rm = (modrm & 7) | REX_B(s);
3494                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[rm].ZMM_Q(0)),
3495                             offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3496             }
3497             break;
3498         case 0x013: /* movlps */
3499         case 0x113: /* movlpd */
3500             if (mod != 3) {
3501                 gen_lea_modrm(env, s, modrm);
3502                 gen_stq_env_A0(s, offsetof(CPUX86State,
3503                                            xmm_regs[reg].ZMM_Q(0)));
3504             } else {
3505                 goto illegal_op;
3506             }
3507             break;
3508         case 0x017: /* movhps */
3509         case 0x117: /* movhpd */
3510             if (mod != 3) {
3511                 gen_lea_modrm(env, s, modrm);
3512                 gen_stq_env_A0(s, offsetof(CPUX86State,
3513                                            xmm_regs[reg].ZMM_Q(1)));
3514             } else {
3515                 goto illegal_op;
3516             }
3517             break;
3518         case 0x71: /* shift mm, im */
3519         case 0x72:
3520         case 0x73:
3521         case 0x171: /* shift xmm, im */
3522         case 0x172:
3523         case 0x173:
3524             if (b1 >= 2) {
3525                 goto unknown_op;
3526             }
3527             val = x86_ldub_code(env, s);
3528             if (is_xmm) {
3529                 tcg_gen_movi_tl(s->T0, val);
3530                 tcg_gen_st32_tl(s->T0, cpu_env,
3531                                 offsetof(CPUX86State, xmm_t0.ZMM_L(0)));
3532                 tcg_gen_movi_tl(s->T0, 0);
3533                 tcg_gen_st32_tl(s->T0, cpu_env,
3534                                 offsetof(CPUX86State, xmm_t0.ZMM_L(1)));
3535                 op1_offset = offsetof(CPUX86State,xmm_t0);
3536             } else {
3537                 tcg_gen_movi_tl(s->T0, val);
3538                 tcg_gen_st32_tl(s->T0, cpu_env,
3539                                 offsetof(CPUX86State, mmx_t0.MMX_L(0)));
3540                 tcg_gen_movi_tl(s->T0, 0);
3541                 tcg_gen_st32_tl(s->T0, cpu_env,
3542                                 offsetof(CPUX86State, mmx_t0.MMX_L(1)));
3543                 op1_offset = offsetof(CPUX86State,mmx_t0);
3544             }
3545             sse_fn_epp = sse_op_table2[((b - 1) & 3) * 8 +
3546                                        (((modrm >> 3)) & 7)][b1];
3547             if (!sse_fn_epp) {
3548                 goto unknown_op;
3549             }
3550             if (is_xmm) {
3551                 rm = (modrm & 7) | REX_B(s);
3552                 op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3553             } else {
3554                 rm = (modrm & 7);
3555                 op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3556             }
3557             tcg_gen_addi_ptr(s->ptr0, cpu_env, op2_offset);
3558             tcg_gen_addi_ptr(s->ptr1, cpu_env, op1_offset);
3559             sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
3560             break;
3561         case 0x050: /* movmskps */
3562             rm = (modrm & 7) | REX_B(s);
3563             tcg_gen_addi_ptr(s->ptr0, cpu_env,
3564                              offsetof(CPUX86State,xmm_regs[rm]));
3565             gen_helper_movmskps(s->tmp2_i32, cpu_env, s->ptr0);
3566             tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
3567             break;
3568         case 0x150: /* movmskpd */
3569             rm = (modrm & 7) | REX_B(s);
3570             tcg_gen_addi_ptr(s->ptr0, cpu_env,
3571                              offsetof(CPUX86State,xmm_regs[rm]));
3572             gen_helper_movmskpd(s->tmp2_i32, cpu_env, s->ptr0);
3573             tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
3574             break;
3575         case 0x02a: /* cvtpi2ps */
3576         case 0x12a: /* cvtpi2pd */
3577             gen_helper_enter_mmx(cpu_env);
3578             if (mod != 3) {
3579                 gen_lea_modrm(env, s, modrm);
3580                 op2_offset = offsetof(CPUX86State,mmx_t0);
3581                 gen_ldq_env_A0(s, op2_offset);
3582             } else {
3583                 rm = (modrm & 7);
3584                 op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3585             }
3586             op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3587             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
3588             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
3589             switch(b >> 8) {
3590             case 0x0:
3591                 gen_helper_cvtpi2ps(cpu_env, s->ptr0, s->ptr1);
3592                 break;
3593             default:
3594             case 0x1:
3595                 gen_helper_cvtpi2pd(cpu_env, s->ptr0, s->ptr1);
3596                 break;
3597             }
3598             break;
3599         case 0x22a: /* cvtsi2ss */
3600         case 0x32a: /* cvtsi2sd */
3601             ot = mo_64_32(s->dflag);
3602             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3603             op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3604             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
3605             if (ot == MO_32) {
3606                 SSEFunc_0_epi sse_fn_epi = sse_op_table3ai[(b >> 8) & 1];
3607                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
3608                 sse_fn_epi(cpu_env, s->ptr0, s->tmp2_i32);
3609             } else {
3610 #ifdef TARGET_X86_64
3611                 SSEFunc_0_epl sse_fn_epl = sse_op_table3aq[(b >> 8) & 1];
3612                 sse_fn_epl(cpu_env, s->ptr0, s->T0);
3613 #else
3614                 goto illegal_op;
3615 #endif
3616             }
3617             break;
3618         case 0x02c: /* cvttps2pi */
3619         case 0x12c: /* cvttpd2pi */
3620         case 0x02d: /* cvtps2pi */
3621         case 0x12d: /* cvtpd2pi */
3622             gen_helper_enter_mmx(cpu_env);
3623             if (mod != 3) {
3624                 gen_lea_modrm(env, s, modrm);
3625                 op2_offset = offsetof(CPUX86State,xmm_t0);
3626                 gen_ldo_env_A0(s, op2_offset);
3627             } else {
3628                 rm = (modrm & 7) | REX_B(s);
3629                 op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3630             }
3631             op1_offset = offsetof(CPUX86State,fpregs[reg & 7].mmx);
3632             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
3633             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
3634             switch(b) {
3635             case 0x02c:
3636                 gen_helper_cvttps2pi(cpu_env, s->ptr0, s->ptr1);
3637                 break;
3638             case 0x12c:
3639                 gen_helper_cvttpd2pi(cpu_env, s->ptr0, s->ptr1);
3640                 break;
3641             case 0x02d:
3642                 gen_helper_cvtps2pi(cpu_env, s->ptr0, s->ptr1);
3643                 break;
3644             case 0x12d:
3645                 gen_helper_cvtpd2pi(cpu_env, s->ptr0, s->ptr1);
3646                 break;
3647             }
3648             break;
3649         case 0x22c: /* cvttss2si */
3650         case 0x32c: /* cvttsd2si */
3651         case 0x22d: /* cvtss2si */
3652         case 0x32d: /* cvtsd2si */
3653             ot = mo_64_32(s->dflag);
3654             if (mod != 3) {
3655                 gen_lea_modrm(env, s, modrm);
3656                 if ((b >> 8) & 1) {
3657                     gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.ZMM_Q(0)));
3658                 } else {
3659                     gen_op_ld_v(s, MO_32, s->T0, s->A0);
3660                     tcg_gen_st32_tl(s->T0, cpu_env,
3661                                     offsetof(CPUX86State, xmm_t0.ZMM_L(0)));
3662                 }
3663                 op2_offset = offsetof(CPUX86State,xmm_t0);
3664             } else {
3665                 rm = (modrm & 7) | REX_B(s);
3666                 op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3667             }
3668             tcg_gen_addi_ptr(s->ptr0, cpu_env, op2_offset);
3669             if (ot == MO_32) {
3670                 SSEFunc_i_ep sse_fn_i_ep =
3671                     sse_op_table3bi[((b >> 7) & 2) | (b & 1)];
3672                 sse_fn_i_ep(s->tmp2_i32, cpu_env, s->ptr0);
3673                 tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
3674             } else {
3675 #ifdef TARGET_X86_64
3676                 SSEFunc_l_ep sse_fn_l_ep =
3677                     sse_op_table3bq[((b >> 7) & 2) | (b & 1)];
3678                 sse_fn_l_ep(s->T0, cpu_env, s->ptr0);
3679 #else
3680                 goto illegal_op;
3681 #endif
3682             }
3683             gen_op_mov_reg_v(s, ot, reg, s->T0);
3684             break;
3685         case 0xc4: /* pinsrw */
3686         case 0x1c4:
3687             s->rip_offset = 1;
3688             gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
3689             val = x86_ldub_code(env, s);
3690             if (b1) {
3691                 val &= 7;
3692                 tcg_gen_st16_tl(s->T0, cpu_env,
3693                                 offsetof(CPUX86State,xmm_regs[reg].ZMM_W(val)));
3694             } else {
3695                 val &= 3;
3696                 tcg_gen_st16_tl(s->T0, cpu_env,
3697                                 offsetof(CPUX86State,fpregs[reg].mmx.MMX_W(val)));
3698             }
3699             break;
3700         case 0xc5: /* pextrw */
3701         case 0x1c5:
3702             if (mod != 3)
3703                 goto illegal_op;
3704             ot = mo_64_32(s->dflag);
3705             val = x86_ldub_code(env, s);
3706             if (b1) {
3707                 val &= 7;
3708                 rm = (modrm & 7) | REX_B(s);
3709                 tcg_gen_ld16u_tl(s->T0, cpu_env,
3710                                  offsetof(CPUX86State,xmm_regs[rm].ZMM_W(val)));
3711             } else {
3712                 val &= 3;
3713                 rm = (modrm & 7);
3714                 tcg_gen_ld16u_tl(s->T0, cpu_env,
3715                                 offsetof(CPUX86State,fpregs[rm].mmx.MMX_W(val)));
3716             }
3717             reg = ((modrm >> 3) & 7) | REX_R(s);
3718             gen_op_mov_reg_v(s, ot, reg, s->T0);
3719             break;
3720         case 0x1d6: /* movq ea, xmm */
3721             if (mod != 3) {
3722                 gen_lea_modrm(env, s, modrm);
3723                 gen_stq_env_A0(s, offsetof(CPUX86State,
3724                                            xmm_regs[reg].ZMM_Q(0)));
3725             } else {
3726                 rm = (modrm & 7) | REX_B(s);
3727                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[rm].ZMM_Q(0)),
3728                             offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3729                 gen_op_movq_env_0(s,
3730                                   offsetof(CPUX86State, xmm_regs[rm].ZMM_Q(1)));
3731             }
3732             break;
3733         case 0x2d6: /* movq2dq */
3734             gen_helper_enter_mmx(cpu_env);
3735             rm = (modrm & 7);
3736             gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3737                         offsetof(CPUX86State,fpregs[rm].mmx));
3738             gen_op_movq_env_0(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)));
3739             break;
3740         case 0x3d6: /* movdq2q */
3741             gen_helper_enter_mmx(cpu_env);
3742             rm = (modrm & 7) | REX_B(s);
3743             gen_op_movq(s, offsetof(CPUX86State, fpregs[reg & 7].mmx),
3744                         offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3745             break;
3746         case 0xd7: /* pmovmskb */
3747         case 0x1d7:
3748             if (mod != 3)
3749                 goto illegal_op;
3750             if (b1) {
3751                 rm = (modrm & 7) | REX_B(s);
3752                 tcg_gen_addi_ptr(s->ptr0, cpu_env,
3753                                  offsetof(CPUX86State, xmm_regs[rm]));
3754                 gen_helper_pmovmskb_xmm(s->tmp2_i32, cpu_env, s->ptr0);
3755             } else {
3756                 rm = (modrm & 7);
3757                 tcg_gen_addi_ptr(s->ptr0, cpu_env,
3758                                  offsetof(CPUX86State, fpregs[rm].mmx));
3759                 gen_helper_pmovmskb_mmx(s->tmp2_i32, cpu_env, s->ptr0);
3760             }
3761             reg = ((modrm >> 3) & 7) | REX_R(s);
3762             tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
3763             break;
3764 
3765         case 0x138:
3766         case 0x038:
3767             b = modrm;
3768             if ((b & 0xf0) == 0xf0) {
3769                 goto do_0f_38_fx;
3770             }
3771             modrm = x86_ldub_code(env, s);
3772             rm = modrm & 7;
3773             reg = ((modrm >> 3) & 7) | REX_R(s);
3774             mod = (modrm >> 6) & 3;
3775             if (b1 >= 2) {
3776                 goto unknown_op;
3777             }
3778 
3779             sse_fn_epp = sse_op_table6[b].op[b1];
3780             if (!sse_fn_epp) {
3781                 goto unknown_op;
3782             }
3783             if (!(s->cpuid_ext_features & sse_op_table6[b].ext_mask))
3784                 goto illegal_op;
3785 
3786             if (b1) {
3787                 op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3788                 if (mod == 3) {
3789                     op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
3790                 } else {
3791                     op2_offset = offsetof(CPUX86State,xmm_t0);
3792                     gen_lea_modrm(env, s, modrm);
3793                     switch (b) {
3794                     case 0x20: case 0x30: /* pmovsxbw, pmovzxbw */
3795                     case 0x23: case 0x33: /* pmovsxwd, pmovzxwd */
3796                     case 0x25: case 0x35: /* pmovsxdq, pmovzxdq */
3797                         gen_ldq_env_A0(s, op2_offset +
3798                                         offsetof(ZMMReg, ZMM_Q(0)));
3799                         break;
3800                     case 0x21: case 0x31: /* pmovsxbd, pmovzxbd */
3801                     case 0x24: case 0x34: /* pmovsxwq, pmovzxwq */
3802                         tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
3803                                             s->mem_index, MO_LEUL);
3804                         tcg_gen_st_i32(s->tmp2_i32, cpu_env, op2_offset +
3805                                         offsetof(ZMMReg, ZMM_L(0)));
3806                         break;
3807                     case 0x22: case 0x32: /* pmovsxbq, pmovzxbq */
3808                         tcg_gen_qemu_ld_tl(s->tmp0, s->A0,
3809                                            s->mem_index, MO_LEUW);
3810                         tcg_gen_st16_tl(s->tmp0, cpu_env, op2_offset +
3811                                         offsetof(ZMMReg, ZMM_W(0)));
3812                         break;
3813                     case 0x2a:            /* movntqda */
3814                         gen_ldo_env_A0(s, op1_offset);
3815                         return;
3816                     default:
3817                         gen_ldo_env_A0(s, op2_offset);
3818                     }
3819                 }
3820             } else {
3821                 op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
3822                 if (mod == 3) {
3823                     op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3824                 } else {
3825                     op2_offset = offsetof(CPUX86State,mmx_t0);
3826                     gen_lea_modrm(env, s, modrm);
3827                     gen_ldq_env_A0(s, op2_offset);
3828                 }
3829             }
3830             if (sse_fn_epp == SSE_SPECIAL) {
3831                 goto unknown_op;
3832             }
3833 
3834             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
3835             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
3836             sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
3837 
3838             if (b == 0x17) {
3839                 set_cc_op(s, CC_OP_EFLAGS);
3840             }
3841             break;
3842 
3843         case 0x238:
3844         case 0x338:
3845         do_0f_38_fx:
3846             /* Various integer extensions at 0f 38 f[0-f].  */
3847             b = modrm | (b1 << 8);
3848             modrm = x86_ldub_code(env, s);
3849             reg = ((modrm >> 3) & 7) | REX_R(s);
3850 
3851             switch (b) {
3852             case 0x3f0: /* crc32 Gd,Eb */
3853             case 0x3f1: /* crc32 Gd,Ey */
3854             do_crc32:
3855                 if (!(s->cpuid_ext_features & CPUID_EXT_SSE42)) {
3856                     goto illegal_op;
3857                 }
3858                 if ((b & 0xff) == 0xf0) {
3859                     ot = MO_8;
3860                 } else if (s->dflag != MO_64) {
3861                     ot = (s->prefix & PREFIX_DATA ? MO_16 : MO_32);
3862                 } else {
3863                     ot = MO_64;
3864                 }
3865 
3866                 tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[reg]);
3867                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3868                 gen_helper_crc32(s->T0, s->tmp2_i32,
3869                                  s->T0, tcg_const_i32(8 << ot));
3870 
3871                 ot = mo_64_32(s->dflag);
3872                 gen_op_mov_reg_v(s, ot, reg, s->T0);
3873                 break;
3874 
3875             case 0x1f0: /* crc32 or movbe */
3876             case 0x1f1:
3877                 /* For these insns, the f3 prefix is supposed to have priority
3878                    over the 66 prefix, but that's not what we implement above
3879                    setting b1.  */
3880                 if (s->prefix & PREFIX_REPNZ) {
3881                     goto do_crc32;
3882                 }
3883                 /* FALLTHRU */
3884             case 0x0f0: /* movbe Gy,My */
3885             case 0x0f1: /* movbe My,Gy */
3886                 if (!(s->cpuid_ext_features & CPUID_EXT_MOVBE)) {
3887                     goto illegal_op;
3888                 }
3889                 if (s->dflag != MO_64) {
3890                     ot = (s->prefix & PREFIX_DATA ? MO_16 : MO_32);
3891                 } else {
3892                     ot = MO_64;
3893                 }
3894 
3895                 gen_lea_modrm(env, s, modrm);
3896                 if ((b & 1) == 0) {
3897                     tcg_gen_qemu_ld_tl(s->T0, s->A0,
3898                                        s->mem_index, ot | MO_BE);
3899                     gen_op_mov_reg_v(s, ot, reg, s->T0);
3900                 } else {
3901                     tcg_gen_qemu_st_tl(cpu_regs[reg], s->A0,
3902                                        s->mem_index, ot | MO_BE);
3903                 }
3904                 break;
3905 
3906             case 0x0f2: /* andn Gy, By, Ey */
3907                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
3908                     || !(s->prefix & PREFIX_VEX)
3909                     || s->vex_l != 0) {
3910                     goto illegal_op;
3911                 }
3912                 ot = mo_64_32(s->dflag);
3913                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3914                 tcg_gen_andc_tl(s->T0, s->T0, cpu_regs[s->vex_v]);
3915                 gen_op_mov_reg_v(s, ot, reg, s->T0);
3916                 gen_op_update1_cc(s);
3917                 set_cc_op(s, CC_OP_LOGICB + ot);
3918                 break;
3919 
3920             case 0x0f7: /* bextr Gy, Ey, By */
3921                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
3922                     || !(s->prefix & PREFIX_VEX)
3923                     || s->vex_l != 0) {
3924                     goto illegal_op;
3925                 }
3926                 ot = mo_64_32(s->dflag);
3927                 {
3928                     TCGv bound, zero;
3929 
3930                     gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3931                     /* Extract START, and shift the operand.
3932                        Shifts larger than operand size get zeros.  */
3933                     tcg_gen_ext8u_tl(s->A0, cpu_regs[s->vex_v]);
3934                     tcg_gen_shr_tl(s->T0, s->T0, s->A0);
3935 
3936                     bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
3937                     zero = tcg_const_tl(0);
3938                     tcg_gen_movcond_tl(TCG_COND_LEU, s->T0, s->A0, bound,
3939                                        s->T0, zero);
3940                     tcg_temp_free(zero);
3941 
3942                     /* Extract the LEN into a mask.  Lengths larger than
3943                        operand size get all ones.  */
3944                     tcg_gen_extract_tl(s->A0, cpu_regs[s->vex_v], 8, 8);
3945                     tcg_gen_movcond_tl(TCG_COND_LEU, s->A0, s->A0, bound,
3946                                        s->A0, bound);
3947                     tcg_temp_free(bound);
3948                     tcg_gen_movi_tl(s->T1, 1);
3949                     tcg_gen_shl_tl(s->T1, s->T1, s->A0);
3950                     tcg_gen_subi_tl(s->T1, s->T1, 1);
3951                     tcg_gen_and_tl(s->T0, s->T0, s->T1);
3952 
3953                     gen_op_mov_reg_v(s, ot, reg, s->T0);
3954                     gen_op_update1_cc(s);
3955                     set_cc_op(s, CC_OP_LOGICB + ot);
3956                 }
3957                 break;
3958 
3959             case 0x0f5: /* bzhi Gy, Ey, By */
3960                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3961                     || !(s->prefix & PREFIX_VEX)
3962                     || s->vex_l != 0) {
3963                     goto illegal_op;
3964                 }
3965                 ot = mo_64_32(s->dflag);
3966                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3967                 tcg_gen_ext8u_tl(s->T1, cpu_regs[s->vex_v]);
3968                 {
3969                     TCGv bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
3970                     /* Note that since we're using BMILG (in order to get O
3971                        cleared) we need to store the inverse into C.  */
3972                     tcg_gen_setcond_tl(TCG_COND_LT, cpu_cc_src,
3973                                        s->T1, bound);
3974                     tcg_gen_movcond_tl(TCG_COND_GT, s->T1, s->T1,
3975                                        bound, bound, s->T1);
3976                     tcg_temp_free(bound);
3977                 }
3978                 tcg_gen_movi_tl(s->A0, -1);
3979                 tcg_gen_shl_tl(s->A0, s->A0, s->T1);
3980                 tcg_gen_andc_tl(s->T0, s->T0, s->A0);
3981                 gen_op_mov_reg_v(s, ot, reg, s->T0);
3982                 gen_op_update1_cc(s);
3983                 set_cc_op(s, CC_OP_BMILGB + ot);
3984                 break;
3985 
3986             case 0x3f6: /* mulx By, Gy, rdx, Ey */
3987                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3988                     || !(s->prefix & PREFIX_VEX)
3989                     || s->vex_l != 0) {
3990                     goto illegal_op;
3991                 }
3992                 ot = mo_64_32(s->dflag);
3993                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3994                 switch (ot) {
3995                 default:
3996                     tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
3997                     tcg_gen_trunc_tl_i32(s->tmp3_i32, cpu_regs[R_EDX]);
3998                     tcg_gen_mulu2_i32(s->tmp2_i32, s->tmp3_i32,
3999                                       s->tmp2_i32, s->tmp3_i32);
4000                     tcg_gen_extu_i32_tl(cpu_regs[s->vex_v], s->tmp2_i32);
4001                     tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp3_i32);
4002                     break;
4003 #ifdef TARGET_X86_64
4004                 case MO_64:
4005                     tcg_gen_mulu2_i64(s->T0, s->T1,
4006                                       s->T0, cpu_regs[R_EDX]);
4007                     tcg_gen_mov_i64(cpu_regs[s->vex_v], s->T0);
4008                     tcg_gen_mov_i64(cpu_regs[reg], s->T1);
4009                     break;
4010 #endif
4011                 }
4012                 break;
4013 
4014             case 0x3f5: /* pdep Gy, By, Ey */
4015                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
4016                     || !(s->prefix & PREFIX_VEX)
4017                     || s->vex_l != 0) {
4018                     goto illegal_op;
4019                 }
4020                 ot = mo_64_32(s->dflag);
4021                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4022                 /* Note that by zero-extending the source operand, we
4023                    automatically handle zero-extending the result.  */
4024                 if (ot == MO_64) {
4025                     tcg_gen_mov_tl(s->T1, cpu_regs[s->vex_v]);
4026                 } else {
4027                     tcg_gen_ext32u_tl(s->T1, cpu_regs[s->vex_v]);
4028                 }
4029                 gen_helper_pdep(cpu_regs[reg], s->T1, s->T0);
4030                 break;
4031 
4032             case 0x2f5: /* pext Gy, By, Ey */
4033                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
4034                     || !(s->prefix & PREFIX_VEX)
4035                     || s->vex_l != 0) {
4036                     goto illegal_op;
4037                 }
4038                 ot = mo_64_32(s->dflag);
4039                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4040                 /* Note that by zero-extending the source operand, we
4041                    automatically handle zero-extending the result.  */
4042                 if (ot == MO_64) {
4043                     tcg_gen_mov_tl(s->T1, cpu_regs[s->vex_v]);
4044                 } else {
4045                     tcg_gen_ext32u_tl(s->T1, cpu_regs[s->vex_v]);
4046                 }
4047                 gen_helper_pext(cpu_regs[reg], s->T1, s->T0);
4048                 break;
4049 
4050             case 0x1f6: /* adcx Gy, Ey */
4051             case 0x2f6: /* adox Gy, Ey */
4052                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_ADX)) {
4053                     goto illegal_op;
4054                 } else {
4055                     TCGv carry_in, carry_out, zero;
4056                     int end_op;
4057 
4058                     ot = mo_64_32(s->dflag);
4059                     gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4060 
4061                     /* Re-use the carry-out from a previous round.  */
4062                     carry_in = NULL;
4063                     carry_out = (b == 0x1f6 ? cpu_cc_dst : cpu_cc_src2);
4064                     switch (s->cc_op) {
4065                     case CC_OP_ADCX:
4066                         if (b == 0x1f6) {
4067                             carry_in = cpu_cc_dst;
4068                             end_op = CC_OP_ADCX;
4069                         } else {
4070                             end_op = CC_OP_ADCOX;
4071                         }
4072                         break;
4073                     case CC_OP_ADOX:
4074                         if (b == 0x1f6) {
4075                             end_op = CC_OP_ADCOX;
4076                         } else {
4077                             carry_in = cpu_cc_src2;
4078                             end_op = CC_OP_ADOX;
4079                         }
4080                         break;
4081                     case CC_OP_ADCOX:
4082                         end_op = CC_OP_ADCOX;
4083                         carry_in = carry_out;
4084                         break;
4085                     default:
4086                         end_op = (b == 0x1f6 ? CC_OP_ADCX : CC_OP_ADOX);
4087                         break;
4088                     }
4089                     /* If we can't reuse carry-out, get it out of EFLAGS.  */
4090                     if (!carry_in) {
4091                         if (s->cc_op != CC_OP_ADCX && s->cc_op != CC_OP_ADOX) {
4092                             gen_compute_eflags(s);
4093                         }
4094                         carry_in = s->tmp0;
4095                         tcg_gen_extract_tl(carry_in, cpu_cc_src,
4096                                            ctz32(b == 0x1f6 ? CC_C : CC_O), 1);
4097                     }
4098 
4099                     switch (ot) {
4100 #ifdef TARGET_X86_64
4101                     case MO_32:
4102                         /* If we know TL is 64-bit, and we want a 32-bit
4103                            result, just do everything in 64-bit arithmetic.  */
4104                         tcg_gen_ext32u_i64(cpu_regs[reg], cpu_regs[reg]);
4105                         tcg_gen_ext32u_i64(s->T0, s->T0);
4106                         tcg_gen_add_i64(s->T0, s->T0, cpu_regs[reg]);
4107                         tcg_gen_add_i64(s->T0, s->T0, carry_in);
4108                         tcg_gen_ext32u_i64(cpu_regs[reg], s->T0);
4109                         tcg_gen_shri_i64(carry_out, s->T0, 32);
4110                         break;
4111 #endif
4112                     default:
4113                         /* Otherwise compute the carry-out in two steps.  */
4114                         zero = tcg_const_tl(0);
4115                         tcg_gen_add2_tl(s->T0, carry_out,
4116                                         s->T0, zero,
4117                                         carry_in, zero);
4118                         tcg_gen_add2_tl(cpu_regs[reg], carry_out,
4119                                         cpu_regs[reg], carry_out,
4120                                         s->T0, zero);
4121                         tcg_temp_free(zero);
4122                         break;
4123                     }
4124                     set_cc_op(s, end_op);
4125                 }
4126                 break;
4127 
4128             case 0x1f7: /* shlx Gy, Ey, By */
4129             case 0x2f7: /* sarx Gy, Ey, By */
4130             case 0x3f7: /* shrx Gy, Ey, By */
4131                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
4132                     || !(s->prefix & PREFIX_VEX)
4133                     || s->vex_l != 0) {
4134                     goto illegal_op;
4135                 }
4136                 ot = mo_64_32(s->dflag);
4137                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4138                 if (ot == MO_64) {
4139                     tcg_gen_andi_tl(s->T1, cpu_regs[s->vex_v], 63);
4140                 } else {
4141                     tcg_gen_andi_tl(s->T1, cpu_regs[s->vex_v], 31);
4142                 }
4143                 if (b == 0x1f7) {
4144                     tcg_gen_shl_tl(s->T0, s->T0, s->T1);
4145                 } else if (b == 0x2f7) {
4146                     if (ot != MO_64) {
4147                         tcg_gen_ext32s_tl(s->T0, s->T0);
4148                     }
4149                     tcg_gen_sar_tl(s->T0, s->T0, s->T1);
4150                 } else {
4151                     if (ot != MO_64) {
4152                         tcg_gen_ext32u_tl(s->T0, s->T0);
4153                     }
4154                     tcg_gen_shr_tl(s->T0, s->T0, s->T1);
4155                 }
4156                 gen_op_mov_reg_v(s, ot, reg, s->T0);
4157                 break;
4158 
4159             case 0x0f3:
4160             case 0x1f3:
4161             case 0x2f3:
4162             case 0x3f3: /* Group 17 */
4163                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
4164                     || !(s->prefix & PREFIX_VEX)
4165                     || s->vex_l != 0) {
4166                     goto illegal_op;
4167                 }
4168                 ot = mo_64_32(s->dflag);
4169                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4170 
4171                 tcg_gen_mov_tl(cpu_cc_src, s->T0);
4172                 switch (reg & 7) {
4173                 case 1: /* blsr By,Ey */
4174                     tcg_gen_subi_tl(s->T1, s->T0, 1);
4175                     tcg_gen_and_tl(s->T0, s->T0, s->T1);
4176                     break;
4177                 case 2: /* blsmsk By,Ey */
4178                     tcg_gen_subi_tl(s->T1, s->T0, 1);
4179                     tcg_gen_xor_tl(s->T0, s->T0, s->T1);
4180                     break;
4181                 case 3: /* blsi By, Ey */
4182                     tcg_gen_neg_tl(s->T1, s->T0);
4183                     tcg_gen_and_tl(s->T0, s->T0, s->T1);
4184                     break;
4185                 default:
4186                     goto unknown_op;
4187                 }
4188                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
4189                 gen_op_mov_reg_v(s, ot, s->vex_v, s->T0);
4190                 set_cc_op(s, CC_OP_BMILGB + ot);
4191                 break;
4192 
4193             default:
4194                 goto unknown_op;
4195             }
4196             break;
4197 
4198         case 0x03a:
4199         case 0x13a:
4200             b = modrm;
4201             modrm = x86_ldub_code(env, s);
4202             rm = modrm & 7;
4203             reg = ((modrm >> 3) & 7) | REX_R(s);
4204             mod = (modrm >> 6) & 3;
4205             if (b1 >= 2) {
4206                 goto unknown_op;
4207             }
4208 
4209             sse_fn_eppi = sse_op_table7[b].op[b1];
4210             if (!sse_fn_eppi) {
4211                 goto unknown_op;
4212             }
4213             if (!(s->cpuid_ext_features & sse_op_table7[b].ext_mask))
4214                 goto illegal_op;
4215 
4216             s->rip_offset = 1;
4217 
4218             if (sse_fn_eppi == SSE_SPECIAL) {
4219                 ot = mo_64_32(s->dflag);
4220                 rm = (modrm & 7) | REX_B(s);
4221                 if (mod != 3)
4222                     gen_lea_modrm(env, s, modrm);
4223                 reg = ((modrm >> 3) & 7) | REX_R(s);
4224                 val = x86_ldub_code(env, s);
4225                 switch (b) {
4226                 case 0x14: /* pextrb */
4227                     tcg_gen_ld8u_tl(s->T0, cpu_env, offsetof(CPUX86State,
4228                                             xmm_regs[reg].ZMM_B(val & 15)));
4229                     if (mod == 3) {
4230                         gen_op_mov_reg_v(s, ot, rm, s->T0);
4231                     } else {
4232                         tcg_gen_qemu_st_tl(s->T0, s->A0,
4233                                            s->mem_index, MO_UB);
4234                     }
4235                     break;
4236                 case 0x15: /* pextrw */
4237                     tcg_gen_ld16u_tl(s->T0, cpu_env, offsetof(CPUX86State,
4238                                             xmm_regs[reg].ZMM_W(val & 7)));
4239                     if (mod == 3) {
4240                         gen_op_mov_reg_v(s, ot, rm, s->T0);
4241                     } else {
4242                         tcg_gen_qemu_st_tl(s->T0, s->A0,
4243                                            s->mem_index, MO_LEUW);
4244                     }
4245                     break;
4246                 case 0x16:
4247                     if (ot == MO_32) { /* pextrd */
4248                         tcg_gen_ld_i32(s->tmp2_i32, cpu_env,
4249                                         offsetof(CPUX86State,
4250                                                 xmm_regs[reg].ZMM_L(val & 3)));
4251                         if (mod == 3) {
4252                             tcg_gen_extu_i32_tl(cpu_regs[rm], s->tmp2_i32);
4253                         } else {
4254                             tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
4255                                                 s->mem_index, MO_LEUL);
4256                         }
4257                     } else { /* pextrq */
4258 #ifdef TARGET_X86_64
4259                         tcg_gen_ld_i64(s->tmp1_i64, cpu_env,
4260                                         offsetof(CPUX86State,
4261                                                 xmm_regs[reg].ZMM_Q(val & 1)));
4262                         if (mod == 3) {
4263                             tcg_gen_mov_i64(cpu_regs[rm], s->tmp1_i64);
4264                         } else {
4265                             tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
4266                                                 s->mem_index, MO_LEQ);
4267                         }
4268 #else
4269                         goto illegal_op;
4270 #endif
4271                     }
4272                     break;
4273                 case 0x17: /* extractps */
4274                     tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State,
4275                                             xmm_regs[reg].ZMM_L(val & 3)));
4276                     if (mod == 3) {
4277                         gen_op_mov_reg_v(s, ot, rm, s->T0);
4278                     } else {
4279                         tcg_gen_qemu_st_tl(s->T0, s->A0,
4280                                            s->mem_index, MO_LEUL);
4281                     }
4282                     break;
4283                 case 0x20: /* pinsrb */
4284                     if (mod == 3) {
4285                         gen_op_mov_v_reg(s, MO_32, s->T0, rm);
4286                     } else {
4287                         tcg_gen_qemu_ld_tl(s->T0, s->A0,
4288                                            s->mem_index, MO_UB);
4289                     }
4290                     tcg_gen_st8_tl(s->T0, cpu_env, offsetof(CPUX86State,
4291                                             xmm_regs[reg].ZMM_B(val & 15)));
4292                     break;
4293                 case 0x21: /* insertps */
4294                     if (mod == 3) {
4295                         tcg_gen_ld_i32(s->tmp2_i32, cpu_env,
4296                                         offsetof(CPUX86State,xmm_regs[rm]
4297                                                 .ZMM_L((val >> 6) & 3)));
4298                     } else {
4299                         tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
4300                                             s->mem_index, MO_LEUL);
4301                     }
4302                     tcg_gen_st_i32(s->tmp2_i32, cpu_env,
4303                                     offsetof(CPUX86State,xmm_regs[reg]
4304                                             .ZMM_L((val >> 4) & 3)));
4305                     if ((val >> 0) & 1)
4306                         tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4307                                         cpu_env, offsetof(CPUX86State,
4308                                                 xmm_regs[reg].ZMM_L(0)));
4309                     if ((val >> 1) & 1)
4310                         tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4311                                         cpu_env, offsetof(CPUX86State,
4312                                                 xmm_regs[reg].ZMM_L(1)));
4313                     if ((val >> 2) & 1)
4314                         tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4315                                         cpu_env, offsetof(CPUX86State,
4316                                                 xmm_regs[reg].ZMM_L(2)));
4317                     if ((val >> 3) & 1)
4318                         tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4319                                         cpu_env, offsetof(CPUX86State,
4320                                                 xmm_regs[reg].ZMM_L(3)));
4321                     break;
4322                 case 0x22:
4323                     if (ot == MO_32) { /* pinsrd */
4324                         if (mod == 3) {
4325                             tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[rm]);
4326                         } else {
4327                             tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
4328                                                 s->mem_index, MO_LEUL);
4329                         }
4330                         tcg_gen_st_i32(s->tmp2_i32, cpu_env,
4331                                         offsetof(CPUX86State,
4332                                                 xmm_regs[reg].ZMM_L(val & 3)));
4333                     } else { /* pinsrq */
4334 #ifdef TARGET_X86_64
4335                         if (mod == 3) {
4336                             gen_op_mov_v_reg(s, ot, s->tmp1_i64, rm);
4337                         } else {
4338                             tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
4339                                                 s->mem_index, MO_LEQ);
4340                         }
4341                         tcg_gen_st_i64(s->tmp1_i64, cpu_env,
4342                                         offsetof(CPUX86State,
4343                                                 xmm_regs[reg].ZMM_Q(val & 1)));
4344 #else
4345                         goto illegal_op;
4346 #endif
4347                     }
4348                     break;
4349                 }
4350                 return;
4351             }
4352 
4353             if (b1) {
4354                 op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
4355                 if (mod == 3) {
4356                     op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
4357                 } else {
4358                     op2_offset = offsetof(CPUX86State,xmm_t0);
4359                     gen_lea_modrm(env, s, modrm);
4360                     gen_ldo_env_A0(s, op2_offset);
4361                 }
4362             } else {
4363                 op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
4364                 if (mod == 3) {
4365                     op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
4366                 } else {
4367                     op2_offset = offsetof(CPUX86State,mmx_t0);
4368                     gen_lea_modrm(env, s, modrm);
4369                     gen_ldq_env_A0(s, op2_offset);
4370                 }
4371             }
4372             val = x86_ldub_code(env, s);
4373 
4374             if ((b & 0xfc) == 0x60) { /* pcmpXstrX */
4375                 set_cc_op(s, CC_OP_EFLAGS);
4376 
4377                 if (s->dflag == MO_64) {
4378                     /* The helper must use entire 64-bit gp registers */
4379                     val |= 1 << 8;
4380                 }
4381             }
4382 
4383             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4384             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4385             sse_fn_eppi(cpu_env, s->ptr0, s->ptr1, tcg_const_i32(val));
4386             break;
4387 
4388         case 0x33a:
4389             /* Various integer extensions at 0f 3a f[0-f].  */
4390             b = modrm | (b1 << 8);
4391             modrm = x86_ldub_code(env, s);
4392             reg = ((modrm >> 3) & 7) | REX_R(s);
4393 
4394             switch (b) {
4395             case 0x3f0: /* rorx Gy,Ey, Ib */
4396                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
4397                     || !(s->prefix & PREFIX_VEX)
4398                     || s->vex_l != 0) {
4399                     goto illegal_op;
4400                 }
4401                 ot = mo_64_32(s->dflag);
4402                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4403                 b = x86_ldub_code(env, s);
4404                 if (ot == MO_64) {
4405                     tcg_gen_rotri_tl(s->T0, s->T0, b & 63);
4406                 } else {
4407                     tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
4408                     tcg_gen_rotri_i32(s->tmp2_i32, s->tmp2_i32, b & 31);
4409                     tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
4410                 }
4411                 gen_op_mov_reg_v(s, ot, reg, s->T0);
4412                 break;
4413 
4414             default:
4415                 goto unknown_op;
4416             }
4417             break;
4418 
4419         default:
4420         unknown_op:
4421             gen_unknown_opcode(env, s);
4422             return;
4423         }
4424     } else {
4425         /* generic MMX or SSE operation */
4426         switch(b) {
4427         case 0x70: /* pshufx insn */
4428         case 0xc6: /* pshufx insn */
4429         case 0xc2: /* compare insns */
4430             s->rip_offset = 1;
4431             break;
4432         default:
4433             break;
4434         }
4435         if (is_xmm) {
4436             op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
4437             if (mod != 3) {
4438                 int sz = 4;
4439 
4440                 gen_lea_modrm(env, s, modrm);
4441                 op2_offset = offsetof(CPUX86State,xmm_t0);
4442 
4443                 switch (b) {
4444                 case 0x50 ... 0x5a:
4445                 case 0x5c ... 0x5f:
4446                 case 0xc2:
4447                     /* Most sse scalar operations.  */
4448                     if (b1 == 2) {
4449                         sz = 2;
4450                     } else if (b1 == 3) {
4451                         sz = 3;
4452                     }
4453                     break;
4454 
4455                 case 0x2e:  /* ucomis[sd] */
4456                 case 0x2f:  /* comis[sd] */
4457                     if (b1 == 0) {
4458                         sz = 2;
4459                     } else {
4460                         sz = 3;
4461                     }
4462                     break;
4463                 }
4464 
4465                 switch (sz) {
4466                 case 2:
4467                     /* 32 bit access */
4468                     gen_op_ld_v(s, MO_32, s->T0, s->A0);
4469                     tcg_gen_st32_tl(s->T0, cpu_env,
4470                                     offsetof(CPUX86State,xmm_t0.ZMM_L(0)));
4471                     break;
4472                 case 3:
4473                     /* 64 bit access */
4474                     gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.ZMM_D(0)));
4475                     break;
4476                 default:
4477                     /* 128 bit access */
4478                     gen_ldo_env_A0(s, op2_offset);
4479                     break;
4480                 }
4481             } else {
4482                 rm = (modrm & 7) | REX_B(s);
4483                 op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
4484             }
4485         } else {
4486             op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
4487             if (mod != 3) {
4488                 gen_lea_modrm(env, s, modrm);
4489                 op2_offset = offsetof(CPUX86State,mmx_t0);
4490                 gen_ldq_env_A0(s, op2_offset);
4491             } else {
4492                 rm = (modrm & 7);
4493                 op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
4494             }
4495         }
4496         switch(b) {
4497         case 0x0f: /* 3DNow! data insns */
4498             val = x86_ldub_code(env, s);
4499             sse_fn_epp = sse_op_table5[val];
4500             if (!sse_fn_epp) {
4501                 goto unknown_op;
4502             }
4503             if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) {
4504                 goto illegal_op;
4505             }
4506             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4507             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4508             sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
4509             break;
4510         case 0x70: /* pshufx insn */
4511         case 0xc6: /* pshufx insn */
4512             val = x86_ldub_code(env, s);
4513             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4514             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4515             /* XXX: introduce a new table? */
4516             sse_fn_ppi = (SSEFunc_0_ppi)sse_fn_epp;
4517             sse_fn_ppi(s->ptr0, s->ptr1, tcg_const_i32(val));
4518             break;
4519         case 0xc2:
4520             /* compare insns */
4521             val = x86_ldub_code(env, s);
4522             if (val >= 8)
4523                 goto unknown_op;
4524             sse_fn_epp = sse_op_table4[val][b1];
4525 
4526             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4527             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4528             sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
4529             break;
4530         case 0xf7:
4531             /* maskmov : we must prepare A0 */
4532             if (mod != 3)
4533                 goto illegal_op;
4534             tcg_gen_mov_tl(s->A0, cpu_regs[R_EDI]);
4535             gen_extu(s->aflag, s->A0);
4536             gen_add_A0_ds_seg(s);
4537 
4538             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4539             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4540             /* XXX: introduce a new table? */
4541             sse_fn_eppt = (SSEFunc_0_eppt)sse_fn_epp;
4542             sse_fn_eppt(cpu_env, s->ptr0, s->ptr1, s->A0);
4543             break;
4544         default:
4545             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4546             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4547             sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
4548             break;
4549         }
4550         if (b == 0x2e || b == 0x2f) {
4551             set_cc_op(s, CC_OP_EFLAGS);
4552         }
4553     }
4554 }
4555 
4556 /* convert one instruction. s->base.is_jmp is set if the translation must
4557    be stopped. Return the next pc value */
4558 static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
4559 {
4560     CPUX86State *env = cpu->env_ptr;
4561     int b, prefixes;
4562     int shift;
4563     MemOp ot, aflag, dflag;
4564     int modrm, reg, rm, mod, op, opreg, val;
4565     target_ulong next_eip, tval;
4566     target_ulong pc_start = s->base.pc_next;
4567 
4568     s->pc_start = s->pc = pc_start;
4569     s->override = -1;
4570 #ifdef TARGET_X86_64
4571     s->rex_w = false;
4572     s->rex_r = 0;
4573     s->rex_x = 0;
4574     s->rex_b = 0;
4575 #endif
4576     s->rip_offset = 0; /* for relative ip address */
4577     s->vex_l = 0;
4578     s->vex_v = 0;
4579     if (sigsetjmp(s->jmpbuf, 0) != 0) {
4580         gen_exception_gpf(s);
4581         return s->pc;
4582     }
4583 
4584     prefixes = 0;
4585 
4586  next_byte:
4587     b = x86_ldub_code(env, s);
4588     /* Collect prefixes.  */
4589     switch (b) {
4590     case 0xf3:
4591         prefixes |= PREFIX_REPZ;
4592         goto next_byte;
4593     case 0xf2:
4594         prefixes |= PREFIX_REPNZ;
4595         goto next_byte;
4596     case 0xf0:
4597         prefixes |= PREFIX_LOCK;
4598         goto next_byte;
4599     case 0x2e:
4600         s->override = R_CS;
4601         goto next_byte;
4602     case 0x36:
4603         s->override = R_SS;
4604         goto next_byte;
4605     case 0x3e:
4606         s->override = R_DS;
4607         goto next_byte;
4608     case 0x26:
4609         s->override = R_ES;
4610         goto next_byte;
4611     case 0x64:
4612         s->override = R_FS;
4613         goto next_byte;
4614     case 0x65:
4615         s->override = R_GS;
4616         goto next_byte;
4617     case 0x66:
4618         prefixes |= PREFIX_DATA;
4619         goto next_byte;
4620     case 0x67:
4621         prefixes |= PREFIX_ADR;
4622         goto next_byte;
4623 #ifdef TARGET_X86_64
4624     case 0x40 ... 0x4f:
4625         if (CODE64(s)) {
4626             /* REX prefix */
4627             prefixes |= PREFIX_REX;
4628             s->rex_w = (b >> 3) & 1;
4629             s->rex_r = (b & 0x4) << 1;
4630             s->rex_x = (b & 0x2) << 2;
4631             s->rex_b = (b & 0x1) << 3;
4632             goto next_byte;
4633         }
4634         break;
4635 #endif
4636     case 0xc5: /* 2-byte VEX */
4637     case 0xc4: /* 3-byte VEX */
4638         /* VEX prefixes cannot be used except in 32-bit mode.
4639            Otherwise the instruction is LES or LDS.  */
4640         if (CODE32(s) && !VM86(s)) {
4641             static const int pp_prefix[4] = {
4642                 0, PREFIX_DATA, PREFIX_REPZ, PREFIX_REPNZ
4643             };
4644             int vex3, vex2 = x86_ldub_code(env, s);
4645 
4646             if (!CODE64(s) && (vex2 & 0xc0) != 0xc0) {
4647                 /* 4.1.4.6: In 32-bit mode, bits [7:6] must be 11b,
4648                    otherwise the instruction is LES or LDS.  */
4649                 s->pc--; /* rewind the advance_pc() x86_ldub_code() did */
4650                 break;
4651             }
4652 
4653             /* 4.1.1-4.1.3: No preceding lock, 66, f2, f3, or rex prefixes. */
4654             if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ
4655                             | PREFIX_LOCK | PREFIX_DATA | PREFIX_REX)) {
4656                 goto illegal_op;
4657             }
4658 #ifdef TARGET_X86_64
4659             s->rex_r = (~vex2 >> 4) & 8;
4660 #endif
4661             if (b == 0xc5) {
4662                 /* 2-byte VEX prefix: RVVVVlpp, implied 0f leading opcode byte */
4663                 vex3 = vex2;
4664                 b = x86_ldub_code(env, s) | 0x100;
4665             } else {
4666                 /* 3-byte VEX prefix: RXBmmmmm wVVVVlpp */
4667                 vex3 = x86_ldub_code(env, s);
4668 #ifdef TARGET_X86_64
4669                 s->rex_x = (~vex2 >> 3) & 8;
4670                 s->rex_b = (~vex2 >> 2) & 8;
4671                 s->rex_w = (vex3 >> 7) & 1;
4672 #endif
4673                 switch (vex2 & 0x1f) {
4674                 case 0x01: /* Implied 0f leading opcode bytes.  */
4675                     b = x86_ldub_code(env, s) | 0x100;
4676                     break;
4677                 case 0x02: /* Implied 0f 38 leading opcode bytes.  */
4678                     b = 0x138;
4679                     break;
4680                 case 0x03: /* Implied 0f 3a leading opcode bytes.  */
4681                     b = 0x13a;
4682                     break;
4683                 default:   /* Reserved for future use.  */
4684                     goto unknown_op;
4685                 }
4686             }
4687             s->vex_v = (~vex3 >> 3) & 0xf;
4688             s->vex_l = (vex3 >> 2) & 1;
4689             prefixes |= pp_prefix[vex3 & 3] | PREFIX_VEX;
4690         }
4691         break;
4692     }
4693 
4694     /* Post-process prefixes.  */
4695     if (CODE64(s)) {
4696         /* In 64-bit mode, the default data size is 32-bit.  Select 64-bit
4697            data with rex_w, and 16-bit data with 0x66; rex_w takes precedence
4698            over 0x66 if both are present.  */
4699         dflag = (REX_W(s) ? MO_64 : prefixes & PREFIX_DATA ? MO_16 : MO_32);
4700         /* In 64-bit mode, 0x67 selects 32-bit addressing.  */
4701         aflag = (prefixes & PREFIX_ADR ? MO_32 : MO_64);
4702     } else {
4703         /* In 16/32-bit mode, 0x66 selects the opposite data size.  */
4704         if (CODE32(s) ^ ((prefixes & PREFIX_DATA) != 0)) {
4705             dflag = MO_32;
4706         } else {
4707             dflag = MO_16;
4708         }
4709         /* In 16/32-bit mode, 0x67 selects the opposite addressing.  */
4710         if (CODE32(s) ^ ((prefixes & PREFIX_ADR) != 0)) {
4711             aflag = MO_32;
4712         }  else {
4713             aflag = MO_16;
4714         }
4715     }
4716 
4717     s->prefix = prefixes;
4718     s->aflag = aflag;
4719     s->dflag = dflag;
4720 
4721     /* now check op code */
4722  reswitch:
4723     switch(b) {
4724     case 0x0f:
4725         /**************************/
4726         /* extended op code */
4727         b = x86_ldub_code(env, s) | 0x100;
4728         goto reswitch;
4729 
4730         /**************************/
4731         /* arith & logic */
4732     case 0x00 ... 0x05:
4733     case 0x08 ... 0x0d:
4734     case 0x10 ... 0x15:
4735     case 0x18 ... 0x1d:
4736     case 0x20 ... 0x25:
4737     case 0x28 ... 0x2d:
4738     case 0x30 ... 0x35:
4739     case 0x38 ... 0x3d:
4740         {
4741             int op, f, val;
4742             op = (b >> 3) & 7;
4743             f = (b >> 1) & 3;
4744 
4745             ot = mo_b_d(b, dflag);
4746 
4747             switch(f) {
4748             case 0: /* OP Ev, Gv */
4749                 modrm = x86_ldub_code(env, s);
4750                 reg = ((modrm >> 3) & 7) | REX_R(s);
4751                 mod = (modrm >> 6) & 3;
4752                 rm = (modrm & 7) | REX_B(s);
4753                 if (mod != 3) {
4754                     gen_lea_modrm(env, s, modrm);
4755                     opreg = OR_TMP0;
4756                 } else if (op == OP_XORL && rm == reg) {
4757                 xor_zero:
4758                     /* xor reg, reg optimisation */
4759                     set_cc_op(s, CC_OP_CLR);
4760                     tcg_gen_movi_tl(s->T0, 0);
4761                     gen_op_mov_reg_v(s, ot, reg, s->T0);
4762                     break;
4763                 } else {
4764                     opreg = rm;
4765                 }
4766                 gen_op_mov_v_reg(s, ot, s->T1, reg);
4767                 gen_op(s, op, ot, opreg);
4768                 break;
4769             case 1: /* OP Gv, Ev */
4770                 modrm = x86_ldub_code(env, s);
4771                 mod = (modrm >> 6) & 3;
4772                 reg = ((modrm >> 3) & 7) | REX_R(s);
4773                 rm = (modrm & 7) | REX_B(s);
4774                 if (mod != 3) {
4775                     gen_lea_modrm(env, s, modrm);
4776                     gen_op_ld_v(s, ot, s->T1, s->A0);
4777                 } else if (op == OP_XORL && rm == reg) {
4778                     goto xor_zero;
4779                 } else {
4780                     gen_op_mov_v_reg(s, ot, s->T1, rm);
4781                 }
4782                 gen_op(s, op, ot, reg);
4783                 break;
4784             case 2: /* OP A, Iv */
4785                 val = insn_get(env, s, ot);
4786                 tcg_gen_movi_tl(s->T1, val);
4787                 gen_op(s, op, ot, OR_EAX);
4788                 break;
4789             }
4790         }
4791         break;
4792 
4793     case 0x82:
4794         if (CODE64(s))
4795             goto illegal_op;
4796         /* fall through */
4797     case 0x80: /* GRP1 */
4798     case 0x81:
4799     case 0x83:
4800         {
4801             int val;
4802 
4803             ot = mo_b_d(b, dflag);
4804 
4805             modrm = x86_ldub_code(env, s);
4806             mod = (modrm >> 6) & 3;
4807             rm = (modrm & 7) | REX_B(s);
4808             op = (modrm >> 3) & 7;
4809 
4810             if (mod != 3) {
4811                 if (b == 0x83)
4812                     s->rip_offset = 1;
4813                 else
4814                     s->rip_offset = insn_const_size(ot);
4815                 gen_lea_modrm(env, s, modrm);
4816                 opreg = OR_TMP0;
4817             } else {
4818                 opreg = rm;
4819             }
4820 
4821             switch(b) {
4822             default:
4823             case 0x80:
4824             case 0x81:
4825             case 0x82:
4826                 val = insn_get(env, s, ot);
4827                 break;
4828             case 0x83:
4829                 val = (int8_t)insn_get(env, s, MO_8);
4830                 break;
4831             }
4832             tcg_gen_movi_tl(s->T1, val);
4833             gen_op(s, op, ot, opreg);
4834         }
4835         break;
4836 
4837         /**************************/
4838         /* inc, dec, and other misc arith */
4839     case 0x40 ... 0x47: /* inc Gv */
4840         ot = dflag;
4841         gen_inc(s, ot, OR_EAX + (b & 7), 1);
4842         break;
4843     case 0x48 ... 0x4f: /* dec Gv */
4844         ot = dflag;
4845         gen_inc(s, ot, OR_EAX + (b & 7), -1);
4846         break;
4847     case 0xf6: /* GRP3 */
4848     case 0xf7:
4849         ot = mo_b_d(b, dflag);
4850 
4851         modrm = x86_ldub_code(env, s);
4852         mod = (modrm >> 6) & 3;
4853         rm = (modrm & 7) | REX_B(s);
4854         op = (modrm >> 3) & 7;
4855         if (mod != 3) {
4856             if (op == 0) {
4857                 s->rip_offset = insn_const_size(ot);
4858             }
4859             gen_lea_modrm(env, s, modrm);
4860             /* For those below that handle locked memory, don't load here.  */
4861             if (!(s->prefix & PREFIX_LOCK)
4862                 || op != 2) {
4863                 gen_op_ld_v(s, ot, s->T0, s->A0);
4864             }
4865         } else {
4866             gen_op_mov_v_reg(s, ot, s->T0, rm);
4867         }
4868 
4869         switch(op) {
4870         case 0: /* test */
4871             val = insn_get(env, s, ot);
4872             tcg_gen_movi_tl(s->T1, val);
4873             gen_op_testl_T0_T1_cc(s);
4874             set_cc_op(s, CC_OP_LOGICB + ot);
4875             break;
4876         case 2: /* not */
4877             if (s->prefix & PREFIX_LOCK) {
4878                 if (mod == 3) {
4879                     goto illegal_op;
4880                 }
4881                 tcg_gen_movi_tl(s->T0, ~0);
4882                 tcg_gen_atomic_xor_fetch_tl(s->T0, s->A0, s->T0,
4883                                             s->mem_index, ot | MO_LE);
4884             } else {
4885                 tcg_gen_not_tl(s->T0, s->T0);
4886                 if (mod != 3) {
4887                     gen_op_st_v(s, ot, s->T0, s->A0);
4888                 } else {
4889                     gen_op_mov_reg_v(s, ot, rm, s->T0);
4890                 }
4891             }
4892             break;
4893         case 3: /* neg */
4894             if (s->prefix & PREFIX_LOCK) {
4895                 TCGLabel *label1;
4896                 TCGv a0, t0, t1, t2;
4897 
4898                 if (mod == 3) {
4899                     goto illegal_op;
4900                 }
4901                 a0 = tcg_temp_local_new();
4902                 t0 = tcg_temp_local_new();
4903                 label1 = gen_new_label();
4904 
4905                 tcg_gen_mov_tl(a0, s->A0);
4906                 tcg_gen_mov_tl(t0, s->T0);
4907 
4908                 gen_set_label(label1);
4909                 t1 = tcg_temp_new();
4910                 t2 = tcg_temp_new();
4911                 tcg_gen_mov_tl(t2, t0);
4912                 tcg_gen_neg_tl(t1, t0);
4913                 tcg_gen_atomic_cmpxchg_tl(t0, a0, t0, t1,
4914                                           s->mem_index, ot | MO_LE);
4915                 tcg_temp_free(t1);
4916                 tcg_gen_brcond_tl(TCG_COND_NE, t0, t2, label1);
4917 
4918                 tcg_temp_free(t2);
4919                 tcg_temp_free(a0);
4920                 tcg_gen_mov_tl(s->T0, t0);
4921                 tcg_temp_free(t0);
4922             } else {
4923                 tcg_gen_neg_tl(s->T0, s->T0);
4924                 if (mod != 3) {
4925                     gen_op_st_v(s, ot, s->T0, s->A0);
4926                 } else {
4927                     gen_op_mov_reg_v(s, ot, rm, s->T0);
4928                 }
4929             }
4930             gen_op_update_neg_cc(s);
4931             set_cc_op(s, CC_OP_SUBB + ot);
4932             break;
4933         case 4: /* mul */
4934             switch(ot) {
4935             case MO_8:
4936                 gen_op_mov_v_reg(s, MO_8, s->T1, R_EAX);
4937                 tcg_gen_ext8u_tl(s->T0, s->T0);
4938                 tcg_gen_ext8u_tl(s->T1, s->T1);
4939                 /* XXX: use 32 bit mul which could be faster */
4940                 tcg_gen_mul_tl(s->T0, s->T0, s->T1);
4941                 gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
4942                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
4943                 tcg_gen_andi_tl(cpu_cc_src, s->T0, 0xff00);
4944                 set_cc_op(s, CC_OP_MULB);
4945                 break;
4946             case MO_16:
4947                 gen_op_mov_v_reg(s, MO_16, s->T1, R_EAX);
4948                 tcg_gen_ext16u_tl(s->T0, s->T0);
4949                 tcg_gen_ext16u_tl(s->T1, s->T1);
4950                 /* XXX: use 32 bit mul which could be faster */
4951                 tcg_gen_mul_tl(s->T0, s->T0, s->T1);
4952                 gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
4953                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
4954                 tcg_gen_shri_tl(s->T0, s->T0, 16);
4955                 gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0);
4956                 tcg_gen_mov_tl(cpu_cc_src, s->T0);
4957                 set_cc_op(s, CC_OP_MULW);
4958                 break;
4959             default:
4960             case MO_32:
4961                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
4962                 tcg_gen_trunc_tl_i32(s->tmp3_i32, cpu_regs[R_EAX]);
4963                 tcg_gen_mulu2_i32(s->tmp2_i32, s->tmp3_i32,
4964                                   s->tmp2_i32, s->tmp3_i32);
4965                 tcg_gen_extu_i32_tl(cpu_regs[R_EAX], s->tmp2_i32);
4966                 tcg_gen_extu_i32_tl(cpu_regs[R_EDX], s->tmp3_i32);
4967                 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4968                 tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]);
4969                 set_cc_op(s, CC_OP_MULL);
4970                 break;
4971 #ifdef TARGET_X86_64
4972             case MO_64:
4973                 tcg_gen_mulu2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
4974                                   s->T0, cpu_regs[R_EAX]);
4975                 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4976                 tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]);
4977                 set_cc_op(s, CC_OP_MULQ);
4978                 break;
4979 #endif
4980             }
4981             break;
4982         case 5: /* imul */
4983             switch(ot) {
4984             case MO_8:
4985                 gen_op_mov_v_reg(s, MO_8, s->T1, R_EAX);
4986                 tcg_gen_ext8s_tl(s->T0, s->T0);
4987                 tcg_gen_ext8s_tl(s->T1, s->T1);
4988                 /* XXX: use 32 bit mul which could be faster */
4989                 tcg_gen_mul_tl(s->T0, s->T0, s->T1);
4990                 gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
4991                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
4992                 tcg_gen_ext8s_tl(s->tmp0, s->T0);
4993                 tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
4994                 set_cc_op(s, CC_OP_MULB);
4995                 break;
4996             case MO_16:
4997                 gen_op_mov_v_reg(s, MO_16, s->T1, R_EAX);
4998                 tcg_gen_ext16s_tl(s->T0, s->T0);
4999                 tcg_gen_ext16s_tl(s->T1, s->T1);
5000                 /* XXX: use 32 bit mul which could be faster */
5001                 tcg_gen_mul_tl(s->T0, s->T0, s->T1);
5002                 gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
5003                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
5004                 tcg_gen_ext16s_tl(s->tmp0, s->T0);
5005                 tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
5006                 tcg_gen_shri_tl(s->T0, s->T0, 16);
5007                 gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0);
5008                 set_cc_op(s, CC_OP_MULW);
5009                 break;
5010             default:
5011             case MO_32:
5012                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5013                 tcg_gen_trunc_tl_i32(s->tmp3_i32, cpu_regs[R_EAX]);
5014                 tcg_gen_muls2_i32(s->tmp2_i32, s->tmp3_i32,
5015                                   s->tmp2_i32, s->tmp3_i32);
5016                 tcg_gen_extu_i32_tl(cpu_regs[R_EAX], s->tmp2_i32);
5017                 tcg_gen_extu_i32_tl(cpu_regs[R_EDX], s->tmp3_i32);
5018                 tcg_gen_sari_i32(s->tmp2_i32, s->tmp2_i32, 31);
5019                 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
5020                 tcg_gen_sub_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
5021                 tcg_gen_extu_i32_tl(cpu_cc_src, s->tmp2_i32);
5022                 set_cc_op(s, CC_OP_MULL);
5023                 break;
5024 #ifdef TARGET_X86_64
5025             case MO_64:
5026                 tcg_gen_muls2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
5027                                   s->T0, cpu_regs[R_EAX]);
5028                 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
5029                 tcg_gen_sari_tl(cpu_cc_src, cpu_regs[R_EAX], 63);
5030                 tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, cpu_regs[R_EDX]);
5031                 set_cc_op(s, CC_OP_MULQ);
5032                 break;
5033 #endif
5034             }
5035             break;
5036         case 6: /* div */
5037             switch(ot) {
5038             case MO_8:
5039                 gen_helper_divb_AL(cpu_env, s->T0);
5040                 break;
5041             case MO_16:
5042                 gen_helper_divw_AX(cpu_env, s->T0);
5043                 break;
5044             default:
5045             case MO_32:
5046                 gen_helper_divl_EAX(cpu_env, s->T0);
5047                 break;
5048 #ifdef TARGET_X86_64
5049             case MO_64:
5050                 gen_helper_divq_EAX(cpu_env, s->T0);
5051                 break;
5052 #endif
5053             }
5054             break;
5055         case 7: /* idiv */
5056             switch(ot) {
5057             case MO_8:
5058                 gen_helper_idivb_AL(cpu_env, s->T0);
5059                 break;
5060             case MO_16:
5061                 gen_helper_idivw_AX(cpu_env, s->T0);
5062                 break;
5063             default:
5064             case MO_32:
5065                 gen_helper_idivl_EAX(cpu_env, s->T0);
5066                 break;
5067 #ifdef TARGET_X86_64
5068             case MO_64:
5069                 gen_helper_idivq_EAX(cpu_env, s->T0);
5070                 break;
5071 #endif
5072             }
5073             break;
5074         default:
5075             goto unknown_op;
5076         }
5077         break;
5078 
5079     case 0xfe: /* GRP4 */
5080     case 0xff: /* GRP5 */
5081         ot = mo_b_d(b, dflag);
5082 
5083         modrm = x86_ldub_code(env, s);
5084         mod = (modrm >> 6) & 3;
5085         rm = (modrm & 7) | REX_B(s);
5086         op = (modrm >> 3) & 7;
5087         if (op >= 2 && b == 0xfe) {
5088             goto unknown_op;
5089         }
5090         if (CODE64(s)) {
5091             if (op == 2 || op == 4) {
5092                 /* operand size for jumps is 64 bit */
5093                 ot = MO_64;
5094             } else if (op == 3 || op == 5) {
5095                 ot = dflag != MO_16 ? MO_32 + REX_W(s) : MO_16;
5096             } else if (op == 6) {
5097                 /* default push size is 64 bit */
5098                 ot = mo_pushpop(s, dflag);
5099             }
5100         }
5101         if (mod != 3) {
5102             gen_lea_modrm(env, s, modrm);
5103             if (op >= 2 && op != 3 && op != 5)
5104                 gen_op_ld_v(s, ot, s->T0, s->A0);
5105         } else {
5106             gen_op_mov_v_reg(s, ot, s->T0, rm);
5107         }
5108 
5109         switch(op) {
5110         case 0: /* inc Ev */
5111             if (mod != 3)
5112                 opreg = OR_TMP0;
5113             else
5114                 opreg = rm;
5115             gen_inc(s, ot, opreg, 1);
5116             break;
5117         case 1: /* dec Ev */
5118             if (mod != 3)
5119                 opreg = OR_TMP0;
5120             else
5121                 opreg = rm;
5122             gen_inc(s, ot, opreg, -1);
5123             break;
5124         case 2: /* call Ev */
5125             /* XXX: optimize if memory (no 'and' is necessary) */
5126             if (dflag == MO_16) {
5127                 tcg_gen_ext16u_tl(s->T0, s->T0);
5128             }
5129             next_eip = s->pc - s->cs_base;
5130             tcg_gen_movi_tl(s->T1, next_eip);
5131             gen_push_v(s, s->T1);
5132             gen_op_jmp_v(s->T0);
5133             gen_bnd_jmp(s);
5134             gen_jr(s, s->T0);
5135             break;
5136         case 3: /* lcall Ev */
5137             if (mod == 3) {
5138                 goto illegal_op;
5139             }
5140             gen_op_ld_v(s, ot, s->T1, s->A0);
5141             gen_add_A0_im(s, 1 << ot);
5142             gen_op_ld_v(s, MO_16, s->T0, s->A0);
5143         do_lcall:
5144             if (PE(s) && !VM86(s)) {
5145                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5146                 gen_helper_lcall_protected(cpu_env, s->tmp2_i32, s->T1,
5147                                            tcg_const_i32(dflag - 1),
5148                                            tcg_const_tl(s->pc - s->cs_base));
5149             } else {
5150                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5151                 gen_helper_lcall_real(cpu_env, s->tmp2_i32, s->T1,
5152                                       tcg_const_i32(dflag - 1),
5153                                       tcg_const_i32(s->pc - s->cs_base));
5154             }
5155             tcg_gen_ld_tl(s->tmp4, cpu_env, offsetof(CPUX86State, eip));
5156             gen_jr(s, s->tmp4);
5157             break;
5158         case 4: /* jmp Ev */
5159             if (dflag == MO_16) {
5160                 tcg_gen_ext16u_tl(s->T0, s->T0);
5161             }
5162             gen_op_jmp_v(s->T0);
5163             gen_bnd_jmp(s);
5164             gen_jr(s, s->T0);
5165             break;
5166         case 5: /* ljmp Ev */
5167             if (mod == 3) {
5168                 goto illegal_op;
5169             }
5170             gen_op_ld_v(s, ot, s->T1, s->A0);
5171             gen_add_A0_im(s, 1 << ot);
5172             gen_op_ld_v(s, MO_16, s->T0, s->A0);
5173         do_ljmp:
5174             if (PE(s) && !VM86(s)) {
5175                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5176                 gen_helper_ljmp_protected(cpu_env, s->tmp2_i32, s->T1,
5177                                           tcg_const_tl(s->pc - s->cs_base));
5178             } else {
5179                 gen_op_movl_seg_T0_vm(s, R_CS);
5180                 gen_op_jmp_v(s->T1);
5181             }
5182             tcg_gen_ld_tl(s->tmp4, cpu_env, offsetof(CPUX86State, eip));
5183             gen_jr(s, s->tmp4);
5184             break;
5185         case 6: /* push Ev */
5186             gen_push_v(s, s->T0);
5187             break;
5188         default:
5189             goto unknown_op;
5190         }
5191         break;
5192 
5193     case 0x84: /* test Ev, Gv */
5194     case 0x85:
5195         ot = mo_b_d(b, dflag);
5196 
5197         modrm = x86_ldub_code(env, s);
5198         reg = ((modrm >> 3) & 7) | REX_R(s);
5199 
5200         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
5201         gen_op_mov_v_reg(s, ot, s->T1, reg);
5202         gen_op_testl_T0_T1_cc(s);
5203         set_cc_op(s, CC_OP_LOGICB + ot);
5204         break;
5205 
5206     case 0xa8: /* test eAX, Iv */
5207     case 0xa9:
5208         ot = mo_b_d(b, dflag);
5209         val = insn_get(env, s, ot);
5210 
5211         gen_op_mov_v_reg(s, ot, s->T0, OR_EAX);
5212         tcg_gen_movi_tl(s->T1, val);
5213         gen_op_testl_T0_T1_cc(s);
5214         set_cc_op(s, CC_OP_LOGICB + ot);
5215         break;
5216 
5217     case 0x98: /* CWDE/CBW */
5218         switch (dflag) {
5219 #ifdef TARGET_X86_64
5220         case MO_64:
5221             gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
5222             tcg_gen_ext32s_tl(s->T0, s->T0);
5223             gen_op_mov_reg_v(s, MO_64, R_EAX, s->T0);
5224             break;
5225 #endif
5226         case MO_32:
5227             gen_op_mov_v_reg(s, MO_16, s->T0, R_EAX);
5228             tcg_gen_ext16s_tl(s->T0, s->T0);
5229             gen_op_mov_reg_v(s, MO_32, R_EAX, s->T0);
5230             break;
5231         case MO_16:
5232             gen_op_mov_v_reg(s, MO_8, s->T0, R_EAX);
5233             tcg_gen_ext8s_tl(s->T0, s->T0);
5234             gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
5235             break;
5236         default:
5237             tcg_abort();
5238         }
5239         break;
5240     case 0x99: /* CDQ/CWD */
5241         switch (dflag) {
5242 #ifdef TARGET_X86_64
5243         case MO_64:
5244             gen_op_mov_v_reg(s, MO_64, s->T0, R_EAX);
5245             tcg_gen_sari_tl(s->T0, s->T0, 63);
5246             gen_op_mov_reg_v(s, MO_64, R_EDX, s->T0);
5247             break;
5248 #endif
5249         case MO_32:
5250             gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
5251             tcg_gen_ext32s_tl(s->T0, s->T0);
5252             tcg_gen_sari_tl(s->T0, s->T0, 31);
5253             gen_op_mov_reg_v(s, MO_32, R_EDX, s->T0);
5254             break;
5255         case MO_16:
5256             gen_op_mov_v_reg(s, MO_16, s->T0, R_EAX);
5257             tcg_gen_ext16s_tl(s->T0, s->T0);
5258             tcg_gen_sari_tl(s->T0, s->T0, 15);
5259             gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0);
5260             break;
5261         default:
5262             tcg_abort();
5263         }
5264         break;
5265     case 0x1af: /* imul Gv, Ev */
5266     case 0x69: /* imul Gv, Ev, I */
5267     case 0x6b:
5268         ot = dflag;
5269         modrm = x86_ldub_code(env, s);
5270         reg = ((modrm >> 3) & 7) | REX_R(s);
5271         if (b == 0x69)
5272             s->rip_offset = insn_const_size(ot);
5273         else if (b == 0x6b)
5274             s->rip_offset = 1;
5275         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
5276         if (b == 0x69) {
5277             val = insn_get(env, s, ot);
5278             tcg_gen_movi_tl(s->T1, val);
5279         } else if (b == 0x6b) {
5280             val = (int8_t)insn_get(env, s, MO_8);
5281             tcg_gen_movi_tl(s->T1, val);
5282         } else {
5283             gen_op_mov_v_reg(s, ot, s->T1, reg);
5284         }
5285         switch (ot) {
5286 #ifdef TARGET_X86_64
5287         case MO_64:
5288             tcg_gen_muls2_i64(cpu_regs[reg], s->T1, s->T0, s->T1);
5289             tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
5290             tcg_gen_sari_tl(cpu_cc_src, cpu_cc_dst, 63);
5291             tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, s->T1);
5292             break;
5293 #endif
5294         case MO_32:
5295             tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5296             tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
5297             tcg_gen_muls2_i32(s->tmp2_i32, s->tmp3_i32,
5298                               s->tmp2_i32, s->tmp3_i32);
5299             tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
5300             tcg_gen_sari_i32(s->tmp2_i32, s->tmp2_i32, 31);
5301             tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
5302             tcg_gen_sub_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
5303             tcg_gen_extu_i32_tl(cpu_cc_src, s->tmp2_i32);
5304             break;
5305         default:
5306             tcg_gen_ext16s_tl(s->T0, s->T0);
5307             tcg_gen_ext16s_tl(s->T1, s->T1);
5308             /* XXX: use 32 bit mul which could be faster */
5309             tcg_gen_mul_tl(s->T0, s->T0, s->T1);
5310             tcg_gen_mov_tl(cpu_cc_dst, s->T0);
5311             tcg_gen_ext16s_tl(s->tmp0, s->T0);
5312             tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
5313             gen_op_mov_reg_v(s, ot, reg, s->T0);
5314             break;
5315         }
5316         set_cc_op(s, CC_OP_MULB + ot);
5317         break;
5318     case 0x1c0:
5319     case 0x1c1: /* xadd Ev, Gv */
5320         ot = mo_b_d(b, dflag);
5321         modrm = x86_ldub_code(env, s);
5322         reg = ((modrm >> 3) & 7) | REX_R(s);
5323         mod = (modrm >> 6) & 3;
5324         gen_op_mov_v_reg(s, ot, s->T0, reg);
5325         if (mod == 3) {
5326             rm = (modrm & 7) | REX_B(s);
5327             gen_op_mov_v_reg(s, ot, s->T1, rm);
5328             tcg_gen_add_tl(s->T0, s->T0, s->T1);
5329             gen_op_mov_reg_v(s, ot, reg, s->T1);
5330             gen_op_mov_reg_v(s, ot, rm, s->T0);
5331         } else {
5332             gen_lea_modrm(env, s, modrm);
5333             if (s->prefix & PREFIX_LOCK) {
5334                 tcg_gen_atomic_fetch_add_tl(s->T1, s->A0, s->T0,
5335                                             s->mem_index, ot | MO_LE);
5336                 tcg_gen_add_tl(s->T0, s->T0, s->T1);
5337             } else {
5338                 gen_op_ld_v(s, ot, s->T1, s->A0);
5339                 tcg_gen_add_tl(s->T0, s->T0, s->T1);
5340                 gen_op_st_v(s, ot, s->T0, s->A0);
5341             }
5342             gen_op_mov_reg_v(s, ot, reg, s->T1);
5343         }
5344         gen_op_update2_cc(s);
5345         set_cc_op(s, CC_OP_ADDB + ot);
5346         break;
5347     case 0x1b0:
5348     case 0x1b1: /* cmpxchg Ev, Gv */
5349         {
5350             TCGv oldv, newv, cmpv;
5351 
5352             ot = mo_b_d(b, dflag);
5353             modrm = x86_ldub_code(env, s);
5354             reg = ((modrm >> 3) & 7) | REX_R(s);
5355             mod = (modrm >> 6) & 3;
5356             oldv = tcg_temp_new();
5357             newv = tcg_temp_new();
5358             cmpv = tcg_temp_new();
5359             gen_op_mov_v_reg(s, ot, newv, reg);
5360             tcg_gen_mov_tl(cmpv, cpu_regs[R_EAX]);
5361 
5362             if (s->prefix & PREFIX_LOCK) {
5363                 if (mod == 3) {
5364                     goto illegal_op;
5365                 }
5366                 gen_lea_modrm(env, s, modrm);
5367                 tcg_gen_atomic_cmpxchg_tl(oldv, s->A0, cmpv, newv,
5368                                           s->mem_index, ot | MO_LE);
5369                 gen_op_mov_reg_v(s, ot, R_EAX, oldv);
5370             } else {
5371                 if (mod == 3) {
5372                     rm = (modrm & 7) | REX_B(s);
5373                     gen_op_mov_v_reg(s, ot, oldv, rm);
5374                 } else {
5375                     gen_lea_modrm(env, s, modrm);
5376                     gen_op_ld_v(s, ot, oldv, s->A0);
5377                     rm = 0; /* avoid warning */
5378                 }
5379                 gen_extu(ot, oldv);
5380                 gen_extu(ot, cmpv);
5381                 /* store value = (old == cmp ? new : old);  */
5382                 tcg_gen_movcond_tl(TCG_COND_EQ, newv, oldv, cmpv, newv, oldv);
5383                 if (mod == 3) {
5384                     gen_op_mov_reg_v(s, ot, R_EAX, oldv);
5385                     gen_op_mov_reg_v(s, ot, rm, newv);
5386                 } else {
5387                     /* Perform an unconditional store cycle like physical cpu;
5388                        must be before changing accumulator to ensure
5389                        idempotency if the store faults and the instruction
5390                        is restarted */
5391                     gen_op_st_v(s, ot, newv, s->A0);
5392                     gen_op_mov_reg_v(s, ot, R_EAX, oldv);
5393                 }
5394             }
5395             tcg_gen_mov_tl(cpu_cc_src, oldv);
5396             tcg_gen_mov_tl(s->cc_srcT, cmpv);
5397             tcg_gen_sub_tl(cpu_cc_dst, cmpv, oldv);
5398             set_cc_op(s, CC_OP_SUBB + ot);
5399             tcg_temp_free(oldv);
5400             tcg_temp_free(newv);
5401             tcg_temp_free(cmpv);
5402         }
5403         break;
5404     case 0x1c7: /* cmpxchg8b */
5405         modrm = x86_ldub_code(env, s);
5406         mod = (modrm >> 6) & 3;
5407         switch ((modrm >> 3) & 7) {
5408         case 1: /* CMPXCHG8, CMPXCHG16 */
5409             if (mod == 3) {
5410                 goto illegal_op;
5411             }
5412 #ifdef TARGET_X86_64
5413             if (dflag == MO_64) {
5414                 if (!(s->cpuid_ext_features & CPUID_EXT_CX16)) {
5415                     goto illegal_op;
5416                 }
5417                 gen_lea_modrm(env, s, modrm);
5418                 if ((s->prefix & PREFIX_LOCK) &&
5419                     (tb_cflags(s->base.tb) & CF_PARALLEL)) {
5420                     gen_helper_cmpxchg16b(cpu_env, s->A0);
5421                 } else {
5422                     gen_helper_cmpxchg16b_unlocked(cpu_env, s->A0);
5423                 }
5424                 set_cc_op(s, CC_OP_EFLAGS);
5425                 break;
5426             }
5427 #endif
5428             if (!(s->cpuid_features & CPUID_CX8)) {
5429                 goto illegal_op;
5430             }
5431             gen_lea_modrm(env, s, modrm);
5432             if ((s->prefix & PREFIX_LOCK) &&
5433                 (tb_cflags(s->base.tb) & CF_PARALLEL)) {
5434                 gen_helper_cmpxchg8b(cpu_env, s->A0);
5435             } else {
5436                 gen_helper_cmpxchg8b_unlocked(cpu_env, s->A0);
5437             }
5438             set_cc_op(s, CC_OP_EFLAGS);
5439             break;
5440 
5441         case 7: /* RDSEED */
5442         case 6: /* RDRAND */
5443             if (mod != 3 ||
5444                 (s->prefix & (PREFIX_LOCK | PREFIX_REPZ | PREFIX_REPNZ)) ||
5445                 !(s->cpuid_ext_features & CPUID_EXT_RDRAND)) {
5446                 goto illegal_op;
5447             }
5448             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
5449                 gen_io_start();
5450             }
5451             gen_helper_rdrand(s->T0, cpu_env);
5452             rm = (modrm & 7) | REX_B(s);
5453             gen_op_mov_reg_v(s, dflag, rm, s->T0);
5454             set_cc_op(s, CC_OP_EFLAGS);
5455             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
5456                 gen_jmp(s, s->pc - s->cs_base);
5457             }
5458             break;
5459 
5460         default:
5461             goto illegal_op;
5462         }
5463         break;
5464 
5465         /**************************/
5466         /* push/pop */
5467     case 0x50 ... 0x57: /* push */
5468         gen_op_mov_v_reg(s, MO_32, s->T0, (b & 7) | REX_B(s));
5469         gen_push_v(s, s->T0);
5470         break;
5471     case 0x58 ... 0x5f: /* pop */
5472         ot = gen_pop_T0(s);
5473         /* NOTE: order is important for pop %sp */
5474         gen_pop_update(s, ot);
5475         gen_op_mov_reg_v(s, ot, (b & 7) | REX_B(s), s->T0);
5476         break;
5477     case 0x60: /* pusha */
5478         if (CODE64(s))
5479             goto illegal_op;
5480         gen_pusha(s);
5481         break;
5482     case 0x61: /* popa */
5483         if (CODE64(s))
5484             goto illegal_op;
5485         gen_popa(s);
5486         break;
5487     case 0x68: /* push Iv */
5488     case 0x6a:
5489         ot = mo_pushpop(s, dflag);
5490         if (b == 0x68)
5491             val = insn_get(env, s, ot);
5492         else
5493             val = (int8_t)insn_get(env, s, MO_8);
5494         tcg_gen_movi_tl(s->T0, val);
5495         gen_push_v(s, s->T0);
5496         break;
5497     case 0x8f: /* pop Ev */
5498         modrm = x86_ldub_code(env, s);
5499         mod = (modrm >> 6) & 3;
5500         ot = gen_pop_T0(s);
5501         if (mod == 3) {
5502             /* NOTE: order is important for pop %sp */
5503             gen_pop_update(s, ot);
5504             rm = (modrm & 7) | REX_B(s);
5505             gen_op_mov_reg_v(s, ot, rm, s->T0);
5506         } else {
5507             /* NOTE: order is important too for MMU exceptions */
5508             s->popl_esp_hack = 1 << ot;
5509             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
5510             s->popl_esp_hack = 0;
5511             gen_pop_update(s, ot);
5512         }
5513         break;
5514     case 0xc8: /* enter */
5515         {
5516             int level;
5517             val = x86_lduw_code(env, s);
5518             level = x86_ldub_code(env, s);
5519             gen_enter(s, val, level);
5520         }
5521         break;
5522     case 0xc9: /* leave */
5523         gen_leave(s);
5524         break;
5525     case 0x06: /* push es */
5526     case 0x0e: /* push cs */
5527     case 0x16: /* push ss */
5528     case 0x1e: /* push ds */
5529         if (CODE64(s))
5530             goto illegal_op;
5531         gen_op_movl_T0_seg(s, b >> 3);
5532         gen_push_v(s, s->T0);
5533         break;
5534     case 0x1a0: /* push fs */
5535     case 0x1a8: /* push gs */
5536         gen_op_movl_T0_seg(s, (b >> 3) & 7);
5537         gen_push_v(s, s->T0);
5538         break;
5539     case 0x07: /* pop es */
5540     case 0x17: /* pop ss */
5541     case 0x1f: /* pop ds */
5542         if (CODE64(s))
5543             goto illegal_op;
5544         reg = b >> 3;
5545         ot = gen_pop_T0(s);
5546         gen_movl_seg_T0(s, reg);
5547         gen_pop_update(s, ot);
5548         /* Note that reg == R_SS in gen_movl_seg_T0 always sets is_jmp.  */
5549         if (s->base.is_jmp) {
5550             gen_jmp_im(s, s->pc - s->cs_base);
5551             if (reg == R_SS) {
5552                 s->flags &= ~HF_TF_MASK;
5553                 gen_eob_inhibit_irq(s, true);
5554             } else {
5555                 gen_eob(s);
5556             }
5557         }
5558         break;
5559     case 0x1a1: /* pop fs */
5560     case 0x1a9: /* pop gs */
5561         ot = gen_pop_T0(s);
5562         gen_movl_seg_T0(s, (b >> 3) & 7);
5563         gen_pop_update(s, ot);
5564         if (s->base.is_jmp) {
5565             gen_jmp_im(s, s->pc - s->cs_base);
5566             gen_eob(s);
5567         }
5568         break;
5569 
5570         /**************************/
5571         /* mov */
5572     case 0x88:
5573     case 0x89: /* mov Gv, Ev */
5574         ot = mo_b_d(b, dflag);
5575         modrm = x86_ldub_code(env, s);
5576         reg = ((modrm >> 3) & 7) | REX_R(s);
5577 
5578         /* generate a generic store */
5579         gen_ldst_modrm(env, s, modrm, ot, reg, 1);
5580         break;
5581     case 0xc6:
5582     case 0xc7: /* mov Ev, Iv */
5583         ot = mo_b_d(b, dflag);
5584         modrm = x86_ldub_code(env, s);
5585         mod = (modrm >> 6) & 3;
5586         if (mod != 3) {
5587             s->rip_offset = insn_const_size(ot);
5588             gen_lea_modrm(env, s, modrm);
5589         }
5590         val = insn_get(env, s, ot);
5591         tcg_gen_movi_tl(s->T0, val);
5592         if (mod != 3) {
5593             gen_op_st_v(s, ot, s->T0, s->A0);
5594         } else {
5595             gen_op_mov_reg_v(s, ot, (modrm & 7) | REX_B(s), s->T0);
5596         }
5597         break;
5598     case 0x8a:
5599     case 0x8b: /* mov Ev, Gv */
5600         ot = mo_b_d(b, dflag);
5601         modrm = x86_ldub_code(env, s);
5602         reg = ((modrm >> 3) & 7) | REX_R(s);
5603 
5604         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
5605         gen_op_mov_reg_v(s, ot, reg, s->T0);
5606         break;
5607     case 0x8e: /* mov seg, Gv */
5608         modrm = x86_ldub_code(env, s);
5609         reg = (modrm >> 3) & 7;
5610         if (reg >= 6 || reg == R_CS)
5611             goto illegal_op;
5612         gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
5613         gen_movl_seg_T0(s, reg);
5614         /* Note that reg == R_SS in gen_movl_seg_T0 always sets is_jmp.  */
5615         if (s->base.is_jmp) {
5616             gen_jmp_im(s, s->pc - s->cs_base);
5617             if (reg == R_SS) {
5618                 s->flags &= ~HF_TF_MASK;
5619                 gen_eob_inhibit_irq(s, true);
5620             } else {
5621                 gen_eob(s);
5622             }
5623         }
5624         break;
5625     case 0x8c: /* mov Gv, seg */
5626         modrm = x86_ldub_code(env, s);
5627         reg = (modrm >> 3) & 7;
5628         mod = (modrm >> 6) & 3;
5629         if (reg >= 6)
5630             goto illegal_op;
5631         gen_op_movl_T0_seg(s, reg);
5632         ot = mod == 3 ? dflag : MO_16;
5633         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
5634         break;
5635 
5636     case 0x1b6: /* movzbS Gv, Eb */
5637     case 0x1b7: /* movzwS Gv, Eb */
5638     case 0x1be: /* movsbS Gv, Eb */
5639     case 0x1bf: /* movswS Gv, Eb */
5640         {
5641             MemOp d_ot;
5642             MemOp s_ot;
5643 
5644             /* d_ot is the size of destination */
5645             d_ot = dflag;
5646             /* ot is the size of source */
5647             ot = (b & 1) + MO_8;
5648             /* s_ot is the sign+size of source */
5649             s_ot = b & 8 ? MO_SIGN | ot : ot;
5650 
5651             modrm = x86_ldub_code(env, s);
5652             reg = ((modrm >> 3) & 7) | REX_R(s);
5653             mod = (modrm >> 6) & 3;
5654             rm = (modrm & 7) | REX_B(s);
5655 
5656             if (mod == 3) {
5657                 if (s_ot == MO_SB && byte_reg_is_xH(s, rm)) {
5658                     tcg_gen_sextract_tl(s->T0, cpu_regs[rm - 4], 8, 8);
5659                 } else {
5660                     gen_op_mov_v_reg(s, ot, s->T0, rm);
5661                     switch (s_ot) {
5662                     case MO_UB:
5663                         tcg_gen_ext8u_tl(s->T0, s->T0);
5664                         break;
5665                     case MO_SB:
5666                         tcg_gen_ext8s_tl(s->T0, s->T0);
5667                         break;
5668                     case MO_UW:
5669                         tcg_gen_ext16u_tl(s->T0, s->T0);
5670                         break;
5671                     default:
5672                     case MO_SW:
5673                         tcg_gen_ext16s_tl(s->T0, s->T0);
5674                         break;
5675                     }
5676                 }
5677                 gen_op_mov_reg_v(s, d_ot, reg, s->T0);
5678             } else {
5679                 gen_lea_modrm(env, s, modrm);
5680                 gen_op_ld_v(s, s_ot, s->T0, s->A0);
5681                 gen_op_mov_reg_v(s, d_ot, reg, s->T0);
5682             }
5683         }
5684         break;
5685 
5686     case 0x8d: /* lea */
5687         modrm = x86_ldub_code(env, s);
5688         mod = (modrm >> 6) & 3;
5689         if (mod == 3)
5690             goto illegal_op;
5691         reg = ((modrm >> 3) & 7) | REX_R(s);
5692         {
5693             AddressParts a = gen_lea_modrm_0(env, s, modrm);
5694             TCGv ea = gen_lea_modrm_1(s, a);
5695             gen_lea_v_seg(s, s->aflag, ea, -1, -1);
5696             gen_op_mov_reg_v(s, dflag, reg, s->A0);
5697         }
5698         break;
5699 
5700     case 0xa0: /* mov EAX, Ov */
5701     case 0xa1:
5702     case 0xa2: /* mov Ov, EAX */
5703     case 0xa3:
5704         {
5705             target_ulong offset_addr;
5706 
5707             ot = mo_b_d(b, dflag);
5708             switch (s->aflag) {
5709 #ifdef TARGET_X86_64
5710             case MO_64:
5711                 offset_addr = x86_ldq_code(env, s);
5712                 break;
5713 #endif
5714             default:
5715                 offset_addr = insn_get(env, s, s->aflag);
5716                 break;
5717             }
5718             tcg_gen_movi_tl(s->A0, offset_addr);
5719             gen_add_A0_ds_seg(s);
5720             if ((b & 2) == 0) {
5721                 gen_op_ld_v(s, ot, s->T0, s->A0);
5722                 gen_op_mov_reg_v(s, ot, R_EAX, s->T0);
5723             } else {
5724                 gen_op_mov_v_reg(s, ot, s->T0, R_EAX);
5725                 gen_op_st_v(s, ot, s->T0, s->A0);
5726             }
5727         }
5728         break;
5729     case 0xd7: /* xlat */
5730         tcg_gen_mov_tl(s->A0, cpu_regs[R_EBX]);
5731         tcg_gen_ext8u_tl(s->T0, cpu_regs[R_EAX]);
5732         tcg_gen_add_tl(s->A0, s->A0, s->T0);
5733         gen_extu(s->aflag, s->A0);
5734         gen_add_A0_ds_seg(s);
5735         gen_op_ld_v(s, MO_8, s->T0, s->A0);
5736         gen_op_mov_reg_v(s, MO_8, R_EAX, s->T0);
5737         break;
5738     case 0xb0 ... 0xb7: /* mov R, Ib */
5739         val = insn_get(env, s, MO_8);
5740         tcg_gen_movi_tl(s->T0, val);
5741         gen_op_mov_reg_v(s, MO_8, (b & 7) | REX_B(s), s->T0);
5742         break;
5743     case 0xb8 ... 0xbf: /* mov R, Iv */
5744 #ifdef TARGET_X86_64
5745         if (dflag == MO_64) {
5746             uint64_t tmp;
5747             /* 64 bit case */
5748             tmp = x86_ldq_code(env, s);
5749             reg = (b & 7) | REX_B(s);
5750             tcg_gen_movi_tl(s->T0, tmp);
5751             gen_op_mov_reg_v(s, MO_64, reg, s->T0);
5752         } else
5753 #endif
5754         {
5755             ot = dflag;
5756             val = insn_get(env, s, ot);
5757             reg = (b & 7) | REX_B(s);
5758             tcg_gen_movi_tl(s->T0, val);
5759             gen_op_mov_reg_v(s, ot, reg, s->T0);
5760         }
5761         break;
5762 
5763     case 0x91 ... 0x97: /* xchg R, EAX */
5764     do_xchg_reg_eax:
5765         ot = dflag;
5766         reg = (b & 7) | REX_B(s);
5767         rm = R_EAX;
5768         goto do_xchg_reg;
5769     case 0x86:
5770     case 0x87: /* xchg Ev, Gv */
5771         ot = mo_b_d(b, dflag);
5772         modrm = x86_ldub_code(env, s);
5773         reg = ((modrm >> 3) & 7) | REX_R(s);
5774         mod = (modrm >> 6) & 3;
5775         if (mod == 3) {
5776             rm = (modrm & 7) | REX_B(s);
5777         do_xchg_reg:
5778             gen_op_mov_v_reg(s, ot, s->T0, reg);
5779             gen_op_mov_v_reg(s, ot, s->T1, rm);
5780             gen_op_mov_reg_v(s, ot, rm, s->T0);
5781             gen_op_mov_reg_v(s, ot, reg, s->T1);
5782         } else {
5783             gen_lea_modrm(env, s, modrm);
5784             gen_op_mov_v_reg(s, ot, s->T0, reg);
5785             /* for xchg, lock is implicit */
5786             tcg_gen_atomic_xchg_tl(s->T1, s->A0, s->T0,
5787                                    s->mem_index, ot | MO_LE);
5788             gen_op_mov_reg_v(s, ot, reg, s->T1);
5789         }
5790         break;
5791     case 0xc4: /* les Gv */
5792         /* In CODE64 this is VEX3; see above.  */
5793         op = R_ES;
5794         goto do_lxx;
5795     case 0xc5: /* lds Gv */
5796         /* In CODE64 this is VEX2; see above.  */
5797         op = R_DS;
5798         goto do_lxx;
5799     case 0x1b2: /* lss Gv */
5800         op = R_SS;
5801         goto do_lxx;
5802     case 0x1b4: /* lfs Gv */
5803         op = R_FS;
5804         goto do_lxx;
5805     case 0x1b5: /* lgs Gv */
5806         op = R_GS;
5807     do_lxx:
5808         ot = dflag != MO_16 ? MO_32 : MO_16;
5809         modrm = x86_ldub_code(env, s);
5810         reg = ((modrm >> 3) & 7) | REX_R(s);
5811         mod = (modrm >> 6) & 3;
5812         if (mod == 3)
5813             goto illegal_op;
5814         gen_lea_modrm(env, s, modrm);
5815         gen_op_ld_v(s, ot, s->T1, s->A0);
5816         gen_add_A0_im(s, 1 << ot);
5817         /* load the segment first to handle exceptions properly */
5818         gen_op_ld_v(s, MO_16, s->T0, s->A0);
5819         gen_movl_seg_T0(s, op);
5820         /* then put the data */
5821         gen_op_mov_reg_v(s, ot, reg, s->T1);
5822         if (s->base.is_jmp) {
5823             gen_jmp_im(s, s->pc - s->cs_base);
5824             gen_eob(s);
5825         }
5826         break;
5827 
5828         /************************/
5829         /* shifts */
5830     case 0xc0:
5831     case 0xc1:
5832         /* shift Ev,Ib */
5833         shift = 2;
5834     grp2:
5835         {
5836             ot = mo_b_d(b, dflag);
5837             modrm = x86_ldub_code(env, s);
5838             mod = (modrm >> 6) & 3;
5839             op = (modrm >> 3) & 7;
5840 
5841             if (mod != 3) {
5842                 if (shift == 2) {
5843                     s->rip_offset = 1;
5844                 }
5845                 gen_lea_modrm(env, s, modrm);
5846                 opreg = OR_TMP0;
5847             } else {
5848                 opreg = (modrm & 7) | REX_B(s);
5849             }
5850 
5851             /* simpler op */
5852             if (shift == 0) {
5853                 gen_shift(s, op, ot, opreg, OR_ECX);
5854             } else {
5855                 if (shift == 2) {
5856                     shift = x86_ldub_code(env, s);
5857                 }
5858                 gen_shifti(s, op, ot, opreg, shift);
5859             }
5860         }
5861         break;
5862     case 0xd0:
5863     case 0xd1:
5864         /* shift Ev,1 */
5865         shift = 1;
5866         goto grp2;
5867     case 0xd2:
5868     case 0xd3:
5869         /* shift Ev,cl */
5870         shift = 0;
5871         goto grp2;
5872 
5873     case 0x1a4: /* shld imm */
5874         op = 0;
5875         shift = 1;
5876         goto do_shiftd;
5877     case 0x1a5: /* shld cl */
5878         op = 0;
5879         shift = 0;
5880         goto do_shiftd;
5881     case 0x1ac: /* shrd imm */
5882         op = 1;
5883         shift = 1;
5884         goto do_shiftd;
5885     case 0x1ad: /* shrd cl */
5886         op = 1;
5887         shift = 0;
5888     do_shiftd:
5889         ot = dflag;
5890         modrm = x86_ldub_code(env, s);
5891         mod = (modrm >> 6) & 3;
5892         rm = (modrm & 7) | REX_B(s);
5893         reg = ((modrm >> 3) & 7) | REX_R(s);
5894         if (mod != 3) {
5895             gen_lea_modrm(env, s, modrm);
5896             opreg = OR_TMP0;
5897         } else {
5898             opreg = rm;
5899         }
5900         gen_op_mov_v_reg(s, ot, s->T1, reg);
5901 
5902         if (shift) {
5903             TCGv imm = tcg_const_tl(x86_ldub_code(env, s));
5904             gen_shiftd_rm_T1(s, ot, opreg, op, imm);
5905             tcg_temp_free(imm);
5906         } else {
5907             gen_shiftd_rm_T1(s, ot, opreg, op, cpu_regs[R_ECX]);
5908         }
5909         break;
5910 
5911         /************************/
5912         /* floats */
5913     case 0xd8 ... 0xdf:
5914         {
5915             bool update_fip = true;
5916 
5917             if (s->flags & (HF_EM_MASK | HF_TS_MASK)) {
5918                 /* if CR0.EM or CR0.TS are set, generate an FPU exception */
5919                 /* XXX: what to do if illegal op ? */
5920                 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
5921                 break;
5922             }
5923             modrm = x86_ldub_code(env, s);
5924             mod = (modrm >> 6) & 3;
5925             rm = modrm & 7;
5926             op = ((b & 7) << 3) | ((modrm >> 3) & 7);
5927             if (mod != 3) {
5928                 /* memory op */
5929                 AddressParts a = gen_lea_modrm_0(env, s, modrm);
5930                 TCGv ea = gen_lea_modrm_1(s, a);
5931                 TCGv last_addr = tcg_temp_new();
5932                 bool update_fdp = true;
5933 
5934                 tcg_gen_mov_tl(last_addr, ea);
5935                 gen_lea_v_seg(s, s->aflag, ea, a.def_seg, s->override);
5936 
5937                 switch (op) {
5938                 case 0x00 ... 0x07: /* fxxxs */
5939                 case 0x10 ... 0x17: /* fixxxl */
5940                 case 0x20 ... 0x27: /* fxxxl */
5941                 case 0x30 ... 0x37: /* fixxx */
5942                     {
5943                         int op1;
5944                         op1 = op & 7;
5945 
5946                         switch (op >> 4) {
5947                         case 0:
5948                             tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5949                                                 s->mem_index, MO_LEUL);
5950                             gen_helper_flds_FT0(cpu_env, s->tmp2_i32);
5951                             break;
5952                         case 1:
5953                             tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5954                                                 s->mem_index, MO_LEUL);
5955                             gen_helper_fildl_FT0(cpu_env, s->tmp2_i32);
5956                             break;
5957                         case 2:
5958                             tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
5959                                                 s->mem_index, MO_LEQ);
5960                             gen_helper_fldl_FT0(cpu_env, s->tmp1_i64);
5961                             break;
5962                         case 3:
5963                         default:
5964                             tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5965                                                 s->mem_index, MO_LESW);
5966                             gen_helper_fildl_FT0(cpu_env, s->tmp2_i32);
5967                             break;
5968                         }
5969 
5970                         gen_helper_fp_arith_ST0_FT0(op1);
5971                         if (op1 == 3) {
5972                             /* fcomp needs pop */
5973                             gen_helper_fpop(cpu_env);
5974                         }
5975                     }
5976                     break;
5977                 case 0x08: /* flds */
5978                 case 0x0a: /* fsts */
5979                 case 0x0b: /* fstps */
5980                 case 0x18 ... 0x1b: /* fildl, fisttpl, fistl, fistpl */
5981                 case 0x28 ... 0x2b: /* fldl, fisttpll, fstl, fstpl */
5982                 case 0x38 ... 0x3b: /* filds, fisttps, fists, fistps */
5983                     switch (op & 7) {
5984                     case 0:
5985                         switch (op >> 4) {
5986                         case 0:
5987                             tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5988                                                 s->mem_index, MO_LEUL);
5989                             gen_helper_flds_ST0(cpu_env, s->tmp2_i32);
5990                             break;
5991                         case 1:
5992                             tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5993                                                 s->mem_index, MO_LEUL);
5994                             gen_helper_fildl_ST0(cpu_env, s->tmp2_i32);
5995                             break;
5996                         case 2:
5997                             tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
5998                                                 s->mem_index, MO_LEQ);
5999                             gen_helper_fldl_ST0(cpu_env, s->tmp1_i64);
6000                             break;
6001                         case 3:
6002                         default:
6003                             tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
6004                                                 s->mem_index, MO_LESW);
6005                             gen_helper_fildl_ST0(cpu_env, s->tmp2_i32);
6006                             break;
6007                         }
6008                         break;
6009                     case 1:
6010                         /* XXX: the corresponding CPUID bit must be tested ! */
6011                         switch (op >> 4) {
6012                         case 1:
6013                             gen_helper_fisttl_ST0(s->tmp2_i32, cpu_env);
6014                             tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6015                                                 s->mem_index, MO_LEUL);
6016                             break;
6017                         case 2:
6018                             gen_helper_fisttll_ST0(s->tmp1_i64, cpu_env);
6019                             tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
6020                                                 s->mem_index, MO_LEQ);
6021                             break;
6022                         case 3:
6023                         default:
6024                             gen_helper_fistt_ST0(s->tmp2_i32, cpu_env);
6025                             tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6026                                                 s->mem_index, MO_LEUW);
6027                             break;
6028                         }
6029                         gen_helper_fpop(cpu_env);
6030                         break;
6031                     default:
6032                         switch (op >> 4) {
6033                         case 0:
6034                             gen_helper_fsts_ST0(s->tmp2_i32, cpu_env);
6035                             tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6036                                                 s->mem_index, MO_LEUL);
6037                             break;
6038                         case 1:
6039                             gen_helper_fistl_ST0(s->tmp2_i32, cpu_env);
6040                             tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6041                                                 s->mem_index, MO_LEUL);
6042                             break;
6043                         case 2:
6044                             gen_helper_fstl_ST0(s->tmp1_i64, cpu_env);
6045                             tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
6046                                                 s->mem_index, MO_LEQ);
6047                             break;
6048                         case 3:
6049                         default:
6050                             gen_helper_fist_ST0(s->tmp2_i32, cpu_env);
6051                             tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6052                                                 s->mem_index, MO_LEUW);
6053                             break;
6054                         }
6055                         if ((op & 7) == 3) {
6056                             gen_helper_fpop(cpu_env);
6057                         }
6058                         break;
6059                     }
6060                     break;
6061                 case 0x0c: /* fldenv mem */
6062                     gen_helper_fldenv(cpu_env, s->A0,
6063                                       tcg_const_i32(dflag - 1));
6064                     update_fip = update_fdp = false;
6065                     break;
6066                 case 0x0d: /* fldcw mem */
6067                     tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
6068                                         s->mem_index, MO_LEUW);
6069                     gen_helper_fldcw(cpu_env, s->tmp2_i32);
6070                     update_fip = update_fdp = false;
6071                     break;
6072                 case 0x0e: /* fnstenv mem */
6073                     gen_helper_fstenv(cpu_env, s->A0,
6074                                       tcg_const_i32(dflag - 1));
6075                     update_fip = update_fdp = false;
6076                     break;
6077                 case 0x0f: /* fnstcw mem */
6078                     gen_helper_fnstcw(s->tmp2_i32, cpu_env);
6079                     tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6080                                         s->mem_index, MO_LEUW);
6081                     update_fip = update_fdp = false;
6082                     break;
6083                 case 0x1d: /* fldt mem */
6084                     gen_helper_fldt_ST0(cpu_env, s->A0);
6085                     break;
6086                 case 0x1f: /* fstpt mem */
6087                     gen_helper_fstt_ST0(cpu_env, s->A0);
6088                     gen_helper_fpop(cpu_env);
6089                     break;
6090                 case 0x2c: /* frstor mem */
6091                     gen_helper_frstor(cpu_env, s->A0,
6092                                       tcg_const_i32(dflag - 1));
6093                     update_fip = update_fdp = false;
6094                     break;
6095                 case 0x2e: /* fnsave mem */
6096                     gen_helper_fsave(cpu_env, s->A0,
6097                                      tcg_const_i32(dflag - 1));
6098                     update_fip = update_fdp = false;
6099                     break;
6100                 case 0x2f: /* fnstsw mem */
6101                     gen_helper_fnstsw(s->tmp2_i32, cpu_env);
6102                     tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6103                                         s->mem_index, MO_LEUW);
6104                     update_fip = update_fdp = false;
6105                     break;
6106                 case 0x3c: /* fbld */
6107                     gen_helper_fbld_ST0(cpu_env, s->A0);
6108                     break;
6109                 case 0x3e: /* fbstp */
6110                     gen_helper_fbst_ST0(cpu_env, s->A0);
6111                     gen_helper_fpop(cpu_env);
6112                     break;
6113                 case 0x3d: /* fildll */
6114                     tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
6115                                         s->mem_index, MO_LEQ);
6116                     gen_helper_fildll_ST0(cpu_env, s->tmp1_i64);
6117                     break;
6118                 case 0x3f: /* fistpll */
6119                     gen_helper_fistll_ST0(s->tmp1_i64, cpu_env);
6120                     tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
6121                                         s->mem_index, MO_LEQ);
6122                     gen_helper_fpop(cpu_env);
6123                     break;
6124                 default:
6125                     goto unknown_op;
6126                 }
6127 
6128                 if (update_fdp) {
6129                     int last_seg = s->override >= 0 ? s->override : a.def_seg;
6130 
6131                     tcg_gen_ld_i32(s->tmp2_i32, cpu_env,
6132                                    offsetof(CPUX86State,
6133                                             segs[last_seg].selector));
6134                     tcg_gen_st16_i32(s->tmp2_i32, cpu_env,
6135                                      offsetof(CPUX86State, fpds));
6136                     tcg_gen_st_tl(last_addr, cpu_env,
6137                                   offsetof(CPUX86State, fpdp));
6138                 }
6139                 tcg_temp_free(last_addr);
6140             } else {
6141                 /* register float ops */
6142                 opreg = rm;
6143 
6144                 switch (op) {
6145                 case 0x08: /* fld sti */
6146                     gen_helper_fpush(cpu_env);
6147                     gen_helper_fmov_ST0_STN(cpu_env,
6148                                             tcg_const_i32((opreg + 1) & 7));
6149                     break;
6150                 case 0x09: /* fxchg sti */
6151                 case 0x29: /* fxchg4 sti, undocumented op */
6152                 case 0x39: /* fxchg7 sti, undocumented op */
6153                     gen_helper_fxchg_ST0_STN(cpu_env, tcg_const_i32(opreg));
6154                     break;
6155                 case 0x0a: /* grp d9/2 */
6156                     switch (rm) {
6157                     case 0: /* fnop */
6158                         /* check exceptions (FreeBSD FPU probe) */
6159                         gen_helper_fwait(cpu_env);
6160                         update_fip = false;
6161                         break;
6162                     default:
6163                         goto unknown_op;
6164                     }
6165                     break;
6166                 case 0x0c: /* grp d9/4 */
6167                     switch (rm) {
6168                     case 0: /* fchs */
6169                         gen_helper_fchs_ST0(cpu_env);
6170                         break;
6171                     case 1: /* fabs */
6172                         gen_helper_fabs_ST0(cpu_env);
6173                         break;
6174                     case 4: /* ftst */
6175                         gen_helper_fldz_FT0(cpu_env);
6176                         gen_helper_fcom_ST0_FT0(cpu_env);
6177                         break;
6178                     case 5: /* fxam */
6179                         gen_helper_fxam_ST0(cpu_env);
6180                         break;
6181                     default:
6182                         goto unknown_op;
6183                     }
6184                     break;
6185                 case 0x0d: /* grp d9/5 */
6186                     {
6187                         switch (rm) {
6188                         case 0:
6189                             gen_helper_fpush(cpu_env);
6190                             gen_helper_fld1_ST0(cpu_env);
6191                             break;
6192                         case 1:
6193                             gen_helper_fpush(cpu_env);
6194                             gen_helper_fldl2t_ST0(cpu_env);
6195                             break;
6196                         case 2:
6197                             gen_helper_fpush(cpu_env);
6198                             gen_helper_fldl2e_ST0(cpu_env);
6199                             break;
6200                         case 3:
6201                             gen_helper_fpush(cpu_env);
6202                             gen_helper_fldpi_ST0(cpu_env);
6203                             break;
6204                         case 4:
6205                             gen_helper_fpush(cpu_env);
6206                             gen_helper_fldlg2_ST0(cpu_env);
6207                             break;
6208                         case 5:
6209                             gen_helper_fpush(cpu_env);
6210                             gen_helper_fldln2_ST0(cpu_env);
6211                             break;
6212                         case 6:
6213                             gen_helper_fpush(cpu_env);
6214                             gen_helper_fldz_ST0(cpu_env);
6215                             break;
6216                         default:
6217                             goto unknown_op;
6218                         }
6219                     }
6220                     break;
6221                 case 0x0e: /* grp d9/6 */
6222                     switch (rm) {
6223                     case 0: /* f2xm1 */
6224                         gen_helper_f2xm1(cpu_env);
6225                         break;
6226                     case 1: /* fyl2x */
6227                         gen_helper_fyl2x(cpu_env);
6228                         break;
6229                     case 2: /* fptan */
6230                         gen_helper_fptan(cpu_env);
6231                         break;
6232                     case 3: /* fpatan */
6233                         gen_helper_fpatan(cpu_env);
6234                         break;
6235                     case 4: /* fxtract */
6236                         gen_helper_fxtract(cpu_env);
6237                         break;
6238                     case 5: /* fprem1 */
6239                         gen_helper_fprem1(cpu_env);
6240                         break;
6241                     case 6: /* fdecstp */
6242                         gen_helper_fdecstp(cpu_env);
6243                         break;
6244                     default:
6245                     case 7: /* fincstp */
6246                         gen_helper_fincstp(cpu_env);
6247                         break;
6248                     }
6249                     break;
6250                 case 0x0f: /* grp d9/7 */
6251                     switch (rm) {
6252                     case 0: /* fprem */
6253                         gen_helper_fprem(cpu_env);
6254                         break;
6255                     case 1: /* fyl2xp1 */
6256                         gen_helper_fyl2xp1(cpu_env);
6257                         break;
6258                     case 2: /* fsqrt */
6259                         gen_helper_fsqrt(cpu_env);
6260                         break;
6261                     case 3: /* fsincos */
6262                         gen_helper_fsincos(cpu_env);
6263                         break;
6264                     case 5: /* fscale */
6265                         gen_helper_fscale(cpu_env);
6266                         break;
6267                     case 4: /* frndint */
6268                         gen_helper_frndint(cpu_env);
6269                         break;
6270                     case 6: /* fsin */
6271                         gen_helper_fsin(cpu_env);
6272                         break;
6273                     default:
6274                     case 7: /* fcos */
6275                         gen_helper_fcos(cpu_env);
6276                         break;
6277                     }
6278                     break;
6279                 case 0x00: case 0x01: case 0x04 ... 0x07: /* fxxx st, sti */
6280                 case 0x20: case 0x21: case 0x24 ... 0x27: /* fxxx sti, st */
6281                 case 0x30: case 0x31: case 0x34 ... 0x37: /* fxxxp sti, st */
6282                     {
6283                         int op1;
6284 
6285                         op1 = op & 7;
6286                         if (op >= 0x20) {
6287                             gen_helper_fp_arith_STN_ST0(op1, opreg);
6288                             if (op >= 0x30) {
6289                                 gen_helper_fpop(cpu_env);
6290                             }
6291                         } else {
6292                             gen_helper_fmov_FT0_STN(cpu_env,
6293                                                     tcg_const_i32(opreg));
6294                             gen_helper_fp_arith_ST0_FT0(op1);
6295                         }
6296                     }
6297                     break;
6298                 case 0x02: /* fcom */
6299                 case 0x22: /* fcom2, undocumented op */
6300                     gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6301                     gen_helper_fcom_ST0_FT0(cpu_env);
6302                     break;
6303                 case 0x03: /* fcomp */
6304                 case 0x23: /* fcomp3, undocumented op */
6305                 case 0x32: /* fcomp5, undocumented op */
6306                     gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6307                     gen_helper_fcom_ST0_FT0(cpu_env);
6308                     gen_helper_fpop(cpu_env);
6309                     break;
6310                 case 0x15: /* da/5 */
6311                     switch (rm) {
6312                     case 1: /* fucompp */
6313                         gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(1));
6314                         gen_helper_fucom_ST0_FT0(cpu_env);
6315                         gen_helper_fpop(cpu_env);
6316                         gen_helper_fpop(cpu_env);
6317                         break;
6318                     default:
6319                         goto unknown_op;
6320                     }
6321                     break;
6322                 case 0x1c:
6323                     switch (rm) {
6324                     case 0: /* feni (287 only, just do nop here) */
6325                         break;
6326                     case 1: /* fdisi (287 only, just do nop here) */
6327                         break;
6328                     case 2: /* fclex */
6329                         gen_helper_fclex(cpu_env);
6330                         update_fip = false;
6331                         break;
6332                     case 3: /* fninit */
6333                         gen_helper_fninit(cpu_env);
6334                         update_fip = false;
6335                         break;
6336                     case 4: /* fsetpm (287 only, just do nop here) */
6337                         break;
6338                     default:
6339                         goto unknown_op;
6340                     }
6341                     break;
6342                 case 0x1d: /* fucomi */
6343                     if (!(s->cpuid_features & CPUID_CMOV)) {
6344                         goto illegal_op;
6345                     }
6346                     gen_update_cc_op(s);
6347                     gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6348                     gen_helper_fucomi_ST0_FT0(cpu_env);
6349                     set_cc_op(s, CC_OP_EFLAGS);
6350                     break;
6351                 case 0x1e: /* fcomi */
6352                     if (!(s->cpuid_features & CPUID_CMOV)) {
6353                         goto illegal_op;
6354                     }
6355                     gen_update_cc_op(s);
6356                     gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6357                     gen_helper_fcomi_ST0_FT0(cpu_env);
6358                     set_cc_op(s, CC_OP_EFLAGS);
6359                     break;
6360                 case 0x28: /* ffree sti */
6361                     gen_helper_ffree_STN(cpu_env, tcg_const_i32(opreg));
6362                     break;
6363                 case 0x2a: /* fst sti */
6364                     gen_helper_fmov_STN_ST0(cpu_env, tcg_const_i32(opreg));
6365                     break;
6366                 case 0x2b: /* fstp sti */
6367                 case 0x0b: /* fstp1 sti, undocumented op */
6368                 case 0x3a: /* fstp8 sti, undocumented op */
6369                 case 0x3b: /* fstp9 sti, undocumented op */
6370                     gen_helper_fmov_STN_ST0(cpu_env, tcg_const_i32(opreg));
6371                     gen_helper_fpop(cpu_env);
6372                     break;
6373                 case 0x2c: /* fucom st(i) */
6374                     gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6375                     gen_helper_fucom_ST0_FT0(cpu_env);
6376                     break;
6377                 case 0x2d: /* fucomp st(i) */
6378                     gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6379                     gen_helper_fucom_ST0_FT0(cpu_env);
6380                     gen_helper_fpop(cpu_env);
6381                     break;
6382                 case 0x33: /* de/3 */
6383                     switch (rm) {
6384                     case 1: /* fcompp */
6385                         gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(1));
6386                         gen_helper_fcom_ST0_FT0(cpu_env);
6387                         gen_helper_fpop(cpu_env);
6388                         gen_helper_fpop(cpu_env);
6389                         break;
6390                     default:
6391                         goto unknown_op;
6392                     }
6393                     break;
6394                 case 0x38: /* ffreep sti, undocumented op */
6395                     gen_helper_ffree_STN(cpu_env, tcg_const_i32(opreg));
6396                     gen_helper_fpop(cpu_env);
6397                     break;
6398                 case 0x3c: /* df/4 */
6399                     switch (rm) {
6400                     case 0:
6401                         gen_helper_fnstsw(s->tmp2_i32, cpu_env);
6402                         tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
6403                         gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
6404                         break;
6405                     default:
6406                         goto unknown_op;
6407                     }
6408                     break;
6409                 case 0x3d: /* fucomip */
6410                     if (!(s->cpuid_features & CPUID_CMOV)) {
6411                         goto illegal_op;
6412                     }
6413                     gen_update_cc_op(s);
6414                     gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6415                     gen_helper_fucomi_ST0_FT0(cpu_env);
6416                     gen_helper_fpop(cpu_env);
6417                     set_cc_op(s, CC_OP_EFLAGS);
6418                     break;
6419                 case 0x3e: /* fcomip */
6420                     if (!(s->cpuid_features & CPUID_CMOV)) {
6421                         goto illegal_op;
6422                     }
6423                     gen_update_cc_op(s);
6424                     gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6425                     gen_helper_fcomi_ST0_FT0(cpu_env);
6426                     gen_helper_fpop(cpu_env);
6427                     set_cc_op(s, CC_OP_EFLAGS);
6428                     break;
6429                 case 0x10 ... 0x13: /* fcmovxx */
6430                 case 0x18 ... 0x1b:
6431                     {
6432                         int op1;
6433                         TCGLabel *l1;
6434                         static const uint8_t fcmov_cc[8] = {
6435                             (JCC_B << 1),
6436                             (JCC_Z << 1),
6437                             (JCC_BE << 1),
6438                             (JCC_P << 1),
6439                         };
6440 
6441                         if (!(s->cpuid_features & CPUID_CMOV)) {
6442                             goto illegal_op;
6443                         }
6444                         op1 = fcmov_cc[op & 3] | (((op >> 3) & 1) ^ 1);
6445                         l1 = gen_new_label();
6446                         gen_jcc1_noeob(s, op1, l1);
6447                         gen_helper_fmov_ST0_STN(cpu_env, tcg_const_i32(opreg));
6448                         gen_set_label(l1);
6449                     }
6450                     break;
6451                 default:
6452                     goto unknown_op;
6453                 }
6454             }
6455 
6456             if (update_fip) {
6457                 tcg_gen_ld_i32(s->tmp2_i32, cpu_env,
6458                                offsetof(CPUX86State, segs[R_CS].selector));
6459                 tcg_gen_st16_i32(s->tmp2_i32, cpu_env,
6460                                  offsetof(CPUX86State, fpcs));
6461                 tcg_gen_st_tl(tcg_constant_tl(pc_start - s->cs_base),
6462                               cpu_env, offsetof(CPUX86State, fpip));
6463             }
6464         }
6465         break;
6466         /************************/
6467         /* string ops */
6468 
6469     case 0xa4: /* movsS */
6470     case 0xa5:
6471         ot = mo_b_d(b, dflag);
6472         if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6473             gen_repz_movs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6474         } else {
6475             gen_movs(s, ot);
6476         }
6477         break;
6478 
6479     case 0xaa: /* stosS */
6480     case 0xab:
6481         ot = mo_b_d(b, dflag);
6482         if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6483             gen_repz_stos(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6484         } else {
6485             gen_stos(s, ot);
6486         }
6487         break;
6488     case 0xac: /* lodsS */
6489     case 0xad:
6490         ot = mo_b_d(b, dflag);
6491         if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6492             gen_repz_lods(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6493         } else {
6494             gen_lods(s, ot);
6495         }
6496         break;
6497     case 0xae: /* scasS */
6498     case 0xaf:
6499         ot = mo_b_d(b, dflag);
6500         if (prefixes & PREFIX_REPNZ) {
6501             gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1);
6502         } else if (prefixes & PREFIX_REPZ) {
6503             gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0);
6504         } else {
6505             gen_scas(s, ot);
6506         }
6507         break;
6508 
6509     case 0xa6: /* cmpsS */
6510     case 0xa7:
6511         ot = mo_b_d(b, dflag);
6512         if (prefixes & PREFIX_REPNZ) {
6513             gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1);
6514         } else if (prefixes & PREFIX_REPZ) {
6515             gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0);
6516         } else {
6517             gen_cmps(s, ot);
6518         }
6519         break;
6520     case 0x6c: /* insS */
6521     case 0x6d:
6522         ot = mo_b_d32(b, dflag);
6523         tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
6524         tcg_gen_ext16u_i32(s->tmp2_i32, s->tmp2_i32);
6525         if (!gen_check_io(s, ot, s->tmp2_i32,
6526                           SVM_IOIO_TYPE_MASK | SVM_IOIO_STR_MASK)) {
6527             break;
6528         }
6529         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6530             gen_io_start();
6531         }
6532         if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6533             gen_repz_ins(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6534             /* jump generated by gen_repz_ins */
6535         } else {
6536             gen_ins(s, ot);
6537             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6538                 gen_jmp(s, s->pc - s->cs_base);
6539             }
6540         }
6541         break;
6542     case 0x6e: /* outsS */
6543     case 0x6f:
6544         ot = mo_b_d32(b, dflag);
6545         tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
6546         tcg_gen_ext16u_i32(s->tmp2_i32, s->tmp2_i32);
6547         if (!gen_check_io(s, ot, s->tmp2_i32, SVM_IOIO_STR_MASK)) {
6548             break;
6549         }
6550         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6551             gen_io_start();
6552         }
6553         if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6554             gen_repz_outs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6555             /* jump generated by gen_repz_outs */
6556         } else {
6557             gen_outs(s, ot);
6558             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6559                 gen_jmp(s, s->pc - s->cs_base);
6560             }
6561         }
6562         break;
6563 
6564         /************************/
6565         /* port I/O */
6566 
6567     case 0xe4:
6568     case 0xe5:
6569         ot = mo_b_d32(b, dflag);
6570         val = x86_ldub_code(env, s);
6571         tcg_gen_movi_i32(s->tmp2_i32, val);
6572         if (!gen_check_io(s, ot, s->tmp2_i32, SVM_IOIO_TYPE_MASK)) {
6573             break;
6574         }
6575         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6576             gen_io_start();
6577         }
6578         gen_helper_in_func(ot, s->T1, s->tmp2_i32);
6579         gen_op_mov_reg_v(s, ot, R_EAX, s->T1);
6580         gen_bpt_io(s, s->tmp2_i32, ot);
6581         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6582             gen_jmp(s, s->pc - s->cs_base);
6583         }
6584         break;
6585     case 0xe6:
6586     case 0xe7:
6587         ot = mo_b_d32(b, dflag);
6588         val = x86_ldub_code(env, s);
6589         tcg_gen_movi_i32(s->tmp2_i32, val);
6590         if (!gen_check_io(s, ot, s->tmp2_i32, 0)) {
6591             break;
6592         }
6593         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6594             gen_io_start();
6595         }
6596         gen_op_mov_v_reg(s, ot, s->T1, R_EAX);
6597         tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
6598         gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32);
6599         gen_bpt_io(s, s->tmp2_i32, ot);
6600         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6601             gen_jmp(s, s->pc - s->cs_base);
6602         }
6603         break;
6604     case 0xec:
6605     case 0xed:
6606         ot = mo_b_d32(b, dflag);
6607         tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
6608         tcg_gen_ext16u_i32(s->tmp2_i32, s->tmp2_i32);
6609         if (!gen_check_io(s, ot, s->tmp2_i32, SVM_IOIO_TYPE_MASK)) {
6610             break;
6611         }
6612         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6613             gen_io_start();
6614         }
6615         gen_helper_in_func(ot, s->T1, s->tmp2_i32);
6616         gen_op_mov_reg_v(s, ot, R_EAX, s->T1);
6617         gen_bpt_io(s, s->tmp2_i32, ot);
6618         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6619             gen_jmp(s, s->pc - s->cs_base);
6620         }
6621         break;
6622     case 0xee:
6623     case 0xef:
6624         ot = mo_b_d32(b, dflag);
6625         tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
6626         tcg_gen_ext16u_i32(s->tmp2_i32, s->tmp2_i32);
6627         if (!gen_check_io(s, ot, s->tmp2_i32, 0)) {
6628             break;
6629         }
6630         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6631             gen_io_start();
6632         }
6633         gen_op_mov_v_reg(s, ot, s->T1, R_EAX);
6634         tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
6635         gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32);
6636         gen_bpt_io(s, s->tmp2_i32, ot);
6637         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6638             gen_jmp(s, s->pc - s->cs_base);
6639         }
6640         break;
6641 
6642         /************************/
6643         /* control */
6644     case 0xc2: /* ret im */
6645         val = x86_ldsw_code(env, s);
6646         ot = gen_pop_T0(s);
6647         gen_stack_update(s, val + (1 << ot));
6648         /* Note that gen_pop_T0 uses a zero-extending load.  */
6649         gen_op_jmp_v(s->T0);
6650         gen_bnd_jmp(s);
6651         gen_jr(s, s->T0);
6652         break;
6653     case 0xc3: /* ret */
6654         ot = gen_pop_T0(s);
6655         gen_pop_update(s, ot);
6656         /* Note that gen_pop_T0 uses a zero-extending load.  */
6657         gen_op_jmp_v(s->T0);
6658         gen_bnd_jmp(s);
6659         gen_jr(s, s->T0);
6660         break;
6661     case 0xca: /* lret im */
6662         val = x86_ldsw_code(env, s);
6663     do_lret:
6664         if (PE(s) && !VM86(s)) {
6665             gen_update_cc_op(s);
6666             gen_jmp_im(s, pc_start - s->cs_base);
6667             gen_helper_lret_protected(cpu_env, tcg_const_i32(dflag - 1),
6668                                       tcg_const_i32(val));
6669         } else {
6670             gen_stack_A0(s);
6671             /* pop offset */
6672             gen_op_ld_v(s, dflag, s->T0, s->A0);
6673             /* NOTE: keeping EIP updated is not a problem in case of
6674                exception */
6675             gen_op_jmp_v(s->T0);
6676             /* pop selector */
6677             gen_add_A0_im(s, 1 << dflag);
6678             gen_op_ld_v(s, dflag, s->T0, s->A0);
6679             gen_op_movl_seg_T0_vm(s, R_CS);
6680             /* add stack offset */
6681             gen_stack_update(s, val + (2 << dflag));
6682         }
6683         gen_eob(s);
6684         break;
6685     case 0xcb: /* lret */
6686         val = 0;
6687         goto do_lret;
6688     case 0xcf: /* iret */
6689         gen_svm_check_intercept(s, SVM_EXIT_IRET);
6690         if (!PE(s) || VM86(s)) {
6691             /* real mode or vm86 mode */
6692             if (!check_vm86_iopl(s)) {
6693                 break;
6694             }
6695             gen_helper_iret_real(cpu_env, tcg_const_i32(dflag - 1));
6696         } else {
6697             gen_helper_iret_protected(cpu_env, tcg_const_i32(dflag - 1),
6698                                       tcg_const_i32(s->pc - s->cs_base));
6699         }
6700         set_cc_op(s, CC_OP_EFLAGS);
6701         gen_eob(s);
6702         break;
6703     case 0xe8: /* call im */
6704         {
6705             if (dflag != MO_16) {
6706                 tval = (int32_t)insn_get(env, s, MO_32);
6707             } else {
6708                 tval = (int16_t)insn_get(env, s, MO_16);
6709             }
6710             next_eip = s->pc - s->cs_base;
6711             tval += next_eip;
6712             if (dflag == MO_16) {
6713                 tval &= 0xffff;
6714             } else if (!CODE64(s)) {
6715                 tval &= 0xffffffff;
6716             }
6717             tcg_gen_movi_tl(s->T0, next_eip);
6718             gen_push_v(s, s->T0);
6719             gen_bnd_jmp(s);
6720             gen_jmp(s, tval);
6721         }
6722         break;
6723     case 0x9a: /* lcall im */
6724         {
6725             unsigned int selector, offset;
6726 
6727             if (CODE64(s))
6728                 goto illegal_op;
6729             ot = dflag;
6730             offset = insn_get(env, s, ot);
6731             selector = insn_get(env, s, MO_16);
6732 
6733             tcg_gen_movi_tl(s->T0, selector);
6734             tcg_gen_movi_tl(s->T1, offset);
6735         }
6736         goto do_lcall;
6737     case 0xe9: /* jmp im */
6738         if (dflag != MO_16) {
6739             tval = (int32_t)insn_get(env, s, MO_32);
6740         } else {
6741             tval = (int16_t)insn_get(env, s, MO_16);
6742         }
6743         tval += s->pc - s->cs_base;
6744         if (dflag == MO_16) {
6745             tval &= 0xffff;
6746         } else if (!CODE64(s)) {
6747             tval &= 0xffffffff;
6748         }
6749         gen_bnd_jmp(s);
6750         gen_jmp(s, tval);
6751         break;
6752     case 0xea: /* ljmp im */
6753         {
6754             unsigned int selector, offset;
6755 
6756             if (CODE64(s))
6757                 goto illegal_op;
6758             ot = dflag;
6759             offset = insn_get(env, s, ot);
6760             selector = insn_get(env, s, MO_16);
6761 
6762             tcg_gen_movi_tl(s->T0, selector);
6763             tcg_gen_movi_tl(s->T1, offset);
6764         }
6765         goto do_ljmp;
6766     case 0xeb: /* jmp Jb */
6767         tval = (int8_t)insn_get(env, s, MO_8);
6768         tval += s->pc - s->cs_base;
6769         if (dflag == MO_16) {
6770             tval &= 0xffff;
6771         }
6772         gen_jmp(s, tval);
6773         break;
6774     case 0x70 ... 0x7f: /* jcc Jb */
6775         tval = (int8_t)insn_get(env, s, MO_8);
6776         goto do_jcc;
6777     case 0x180 ... 0x18f: /* jcc Jv */
6778         if (dflag != MO_16) {
6779             tval = (int32_t)insn_get(env, s, MO_32);
6780         } else {
6781             tval = (int16_t)insn_get(env, s, MO_16);
6782         }
6783     do_jcc:
6784         next_eip = s->pc - s->cs_base;
6785         tval += next_eip;
6786         if (dflag == MO_16) {
6787             tval &= 0xffff;
6788         }
6789         gen_bnd_jmp(s);
6790         gen_jcc(s, b, tval, next_eip);
6791         break;
6792 
6793     case 0x190 ... 0x19f: /* setcc Gv */
6794         modrm = x86_ldub_code(env, s);
6795         gen_setcc1(s, b, s->T0);
6796         gen_ldst_modrm(env, s, modrm, MO_8, OR_TMP0, 1);
6797         break;
6798     case 0x140 ... 0x14f: /* cmov Gv, Ev */
6799         if (!(s->cpuid_features & CPUID_CMOV)) {
6800             goto illegal_op;
6801         }
6802         ot = dflag;
6803         modrm = x86_ldub_code(env, s);
6804         reg = ((modrm >> 3) & 7) | REX_R(s);
6805         gen_cmovcc1(env, s, ot, b, modrm, reg);
6806         break;
6807 
6808         /************************/
6809         /* flags */
6810     case 0x9c: /* pushf */
6811         gen_svm_check_intercept(s, SVM_EXIT_PUSHF);
6812         if (check_vm86_iopl(s)) {
6813             gen_update_cc_op(s);
6814             gen_helper_read_eflags(s->T0, cpu_env);
6815             gen_push_v(s, s->T0);
6816         }
6817         break;
6818     case 0x9d: /* popf */
6819         gen_svm_check_intercept(s, SVM_EXIT_POPF);
6820         if (check_vm86_iopl(s)) {
6821             ot = gen_pop_T0(s);
6822             if (CPL(s) == 0) {
6823                 if (dflag != MO_16) {
6824                     gen_helper_write_eflags(cpu_env, s->T0,
6825                                             tcg_const_i32((TF_MASK | AC_MASK |
6826                                                            ID_MASK | NT_MASK |
6827                                                            IF_MASK |
6828                                                            IOPL_MASK)));
6829                 } else {
6830                     gen_helper_write_eflags(cpu_env, s->T0,
6831                                             tcg_const_i32((TF_MASK | AC_MASK |
6832                                                            ID_MASK | NT_MASK |
6833                                                            IF_MASK | IOPL_MASK)
6834                                                           & 0xffff));
6835                 }
6836             } else {
6837                 if (CPL(s) <= IOPL(s)) {
6838                     if (dflag != MO_16) {
6839                         gen_helper_write_eflags(cpu_env, s->T0,
6840                                                 tcg_const_i32((TF_MASK |
6841                                                                AC_MASK |
6842                                                                ID_MASK |
6843                                                                NT_MASK |
6844                                                                IF_MASK)));
6845                     } else {
6846                         gen_helper_write_eflags(cpu_env, s->T0,
6847                                                 tcg_const_i32((TF_MASK |
6848                                                                AC_MASK |
6849                                                                ID_MASK |
6850                                                                NT_MASK |
6851                                                                IF_MASK)
6852                                                               & 0xffff));
6853                     }
6854                 } else {
6855                     if (dflag != MO_16) {
6856                         gen_helper_write_eflags(cpu_env, s->T0,
6857                                            tcg_const_i32((TF_MASK | AC_MASK |
6858                                                           ID_MASK | NT_MASK)));
6859                     } else {
6860                         gen_helper_write_eflags(cpu_env, s->T0,
6861                                            tcg_const_i32((TF_MASK | AC_MASK |
6862                                                           ID_MASK | NT_MASK)
6863                                                          & 0xffff));
6864                     }
6865                 }
6866             }
6867             gen_pop_update(s, ot);
6868             set_cc_op(s, CC_OP_EFLAGS);
6869             /* abort translation because TF/AC flag may change */
6870             gen_jmp_im(s, s->pc - s->cs_base);
6871             gen_eob(s);
6872         }
6873         break;
6874     case 0x9e: /* sahf */
6875         if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
6876             goto illegal_op;
6877         gen_op_mov_v_reg(s, MO_8, s->T0, R_AH);
6878         gen_compute_eflags(s);
6879         tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, CC_O);
6880         tcg_gen_andi_tl(s->T0, s->T0, CC_S | CC_Z | CC_A | CC_P | CC_C);
6881         tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, s->T0);
6882         break;
6883     case 0x9f: /* lahf */
6884         if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
6885             goto illegal_op;
6886         gen_compute_eflags(s);
6887         /* Note: gen_compute_eflags() only gives the condition codes */
6888         tcg_gen_ori_tl(s->T0, cpu_cc_src, 0x02);
6889         gen_op_mov_reg_v(s, MO_8, R_AH, s->T0);
6890         break;
6891     case 0xf5: /* cmc */
6892         gen_compute_eflags(s);
6893         tcg_gen_xori_tl(cpu_cc_src, cpu_cc_src, CC_C);
6894         break;
6895     case 0xf8: /* clc */
6896         gen_compute_eflags(s);
6897         tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_C);
6898         break;
6899     case 0xf9: /* stc */
6900         gen_compute_eflags(s);
6901         tcg_gen_ori_tl(cpu_cc_src, cpu_cc_src, CC_C);
6902         break;
6903     case 0xfc: /* cld */
6904         tcg_gen_movi_i32(s->tmp2_i32, 1);
6905         tcg_gen_st_i32(s->tmp2_i32, cpu_env, offsetof(CPUX86State, df));
6906         break;
6907     case 0xfd: /* std */
6908         tcg_gen_movi_i32(s->tmp2_i32, -1);
6909         tcg_gen_st_i32(s->tmp2_i32, cpu_env, offsetof(CPUX86State, df));
6910         break;
6911 
6912         /************************/
6913         /* bit operations */
6914     case 0x1ba: /* bt/bts/btr/btc Gv, im */
6915         ot = dflag;
6916         modrm = x86_ldub_code(env, s);
6917         op = (modrm >> 3) & 7;
6918         mod = (modrm >> 6) & 3;
6919         rm = (modrm & 7) | REX_B(s);
6920         if (mod != 3) {
6921             s->rip_offset = 1;
6922             gen_lea_modrm(env, s, modrm);
6923             if (!(s->prefix & PREFIX_LOCK)) {
6924                 gen_op_ld_v(s, ot, s->T0, s->A0);
6925             }
6926         } else {
6927             gen_op_mov_v_reg(s, ot, s->T0, rm);
6928         }
6929         /* load shift */
6930         val = x86_ldub_code(env, s);
6931         tcg_gen_movi_tl(s->T1, val);
6932         if (op < 4)
6933             goto unknown_op;
6934         op -= 4;
6935         goto bt_op;
6936     case 0x1a3: /* bt Gv, Ev */
6937         op = 0;
6938         goto do_btx;
6939     case 0x1ab: /* bts */
6940         op = 1;
6941         goto do_btx;
6942     case 0x1b3: /* btr */
6943         op = 2;
6944         goto do_btx;
6945     case 0x1bb: /* btc */
6946         op = 3;
6947     do_btx:
6948         ot = dflag;
6949         modrm = x86_ldub_code(env, s);
6950         reg = ((modrm >> 3) & 7) | REX_R(s);
6951         mod = (modrm >> 6) & 3;
6952         rm = (modrm & 7) | REX_B(s);
6953         gen_op_mov_v_reg(s, MO_32, s->T1, reg);
6954         if (mod != 3) {
6955             AddressParts a = gen_lea_modrm_0(env, s, modrm);
6956             /* specific case: we need to add a displacement */
6957             gen_exts(ot, s->T1);
6958             tcg_gen_sari_tl(s->tmp0, s->T1, 3 + ot);
6959             tcg_gen_shli_tl(s->tmp0, s->tmp0, ot);
6960             tcg_gen_add_tl(s->A0, gen_lea_modrm_1(s, a), s->tmp0);
6961             gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
6962             if (!(s->prefix & PREFIX_LOCK)) {
6963                 gen_op_ld_v(s, ot, s->T0, s->A0);
6964             }
6965         } else {
6966             gen_op_mov_v_reg(s, ot, s->T0, rm);
6967         }
6968     bt_op:
6969         tcg_gen_andi_tl(s->T1, s->T1, (1 << (3 + ot)) - 1);
6970         tcg_gen_movi_tl(s->tmp0, 1);
6971         tcg_gen_shl_tl(s->tmp0, s->tmp0, s->T1);
6972         if (s->prefix & PREFIX_LOCK) {
6973             switch (op) {
6974             case 0: /* bt */
6975                 /* Needs no atomic ops; we surpressed the normal
6976                    memory load for LOCK above so do it now.  */
6977                 gen_op_ld_v(s, ot, s->T0, s->A0);
6978                 break;
6979             case 1: /* bts */
6980                 tcg_gen_atomic_fetch_or_tl(s->T0, s->A0, s->tmp0,
6981                                            s->mem_index, ot | MO_LE);
6982                 break;
6983             case 2: /* btr */
6984                 tcg_gen_not_tl(s->tmp0, s->tmp0);
6985                 tcg_gen_atomic_fetch_and_tl(s->T0, s->A0, s->tmp0,
6986                                             s->mem_index, ot | MO_LE);
6987                 break;
6988             default:
6989             case 3: /* btc */
6990                 tcg_gen_atomic_fetch_xor_tl(s->T0, s->A0, s->tmp0,
6991                                             s->mem_index, ot | MO_LE);
6992                 break;
6993             }
6994             tcg_gen_shr_tl(s->tmp4, s->T0, s->T1);
6995         } else {
6996             tcg_gen_shr_tl(s->tmp4, s->T0, s->T1);
6997             switch (op) {
6998             case 0: /* bt */
6999                 /* Data already loaded; nothing to do.  */
7000                 break;
7001             case 1: /* bts */
7002                 tcg_gen_or_tl(s->T0, s->T0, s->tmp0);
7003                 break;
7004             case 2: /* btr */
7005                 tcg_gen_andc_tl(s->T0, s->T0, s->tmp0);
7006                 break;
7007             default:
7008             case 3: /* btc */
7009                 tcg_gen_xor_tl(s->T0, s->T0, s->tmp0);
7010                 break;
7011             }
7012             if (op != 0) {
7013                 if (mod != 3) {
7014                     gen_op_st_v(s, ot, s->T0, s->A0);
7015                 } else {
7016                     gen_op_mov_reg_v(s, ot, rm, s->T0);
7017                 }
7018             }
7019         }
7020 
7021         /* Delay all CC updates until after the store above.  Note that
7022            C is the result of the test, Z is unchanged, and the others
7023            are all undefined.  */
7024         switch (s->cc_op) {
7025         case CC_OP_MULB ... CC_OP_MULQ:
7026         case CC_OP_ADDB ... CC_OP_ADDQ:
7027         case CC_OP_ADCB ... CC_OP_ADCQ:
7028         case CC_OP_SUBB ... CC_OP_SUBQ:
7029         case CC_OP_SBBB ... CC_OP_SBBQ:
7030         case CC_OP_LOGICB ... CC_OP_LOGICQ:
7031         case CC_OP_INCB ... CC_OP_INCQ:
7032         case CC_OP_DECB ... CC_OP_DECQ:
7033         case CC_OP_SHLB ... CC_OP_SHLQ:
7034         case CC_OP_SARB ... CC_OP_SARQ:
7035         case CC_OP_BMILGB ... CC_OP_BMILGQ:
7036             /* Z was going to be computed from the non-zero status of CC_DST.
7037                We can get that same Z value (and the new C value) by leaving
7038                CC_DST alone, setting CC_SRC, and using a CC_OP_SAR of the
7039                same width.  */
7040             tcg_gen_mov_tl(cpu_cc_src, s->tmp4);
7041             set_cc_op(s, ((s->cc_op - CC_OP_MULB) & 3) + CC_OP_SARB);
7042             break;
7043         default:
7044             /* Otherwise, generate EFLAGS and replace the C bit.  */
7045             gen_compute_eflags(s);
7046             tcg_gen_deposit_tl(cpu_cc_src, cpu_cc_src, s->tmp4,
7047                                ctz32(CC_C), 1);
7048             break;
7049         }
7050         break;
7051     case 0x1bc: /* bsf / tzcnt */
7052     case 0x1bd: /* bsr / lzcnt */
7053         ot = dflag;
7054         modrm = x86_ldub_code(env, s);
7055         reg = ((modrm >> 3) & 7) | REX_R(s);
7056         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
7057         gen_extu(ot, s->T0);
7058 
7059         /* Note that lzcnt and tzcnt are in different extensions.  */
7060         if ((prefixes & PREFIX_REPZ)
7061             && (b & 1
7062                 ? s->cpuid_ext3_features & CPUID_EXT3_ABM
7063                 : s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)) {
7064             int size = 8 << ot;
7065             /* For lzcnt/tzcnt, C bit is defined related to the input. */
7066             tcg_gen_mov_tl(cpu_cc_src, s->T0);
7067             if (b & 1) {
7068                 /* For lzcnt, reduce the target_ulong result by the
7069                    number of zeros that we expect to find at the top.  */
7070                 tcg_gen_clzi_tl(s->T0, s->T0, TARGET_LONG_BITS);
7071                 tcg_gen_subi_tl(s->T0, s->T0, TARGET_LONG_BITS - size);
7072             } else {
7073                 /* For tzcnt, a zero input must return the operand size.  */
7074                 tcg_gen_ctzi_tl(s->T0, s->T0, size);
7075             }
7076             /* For lzcnt/tzcnt, Z bit is defined related to the result.  */
7077             gen_op_update1_cc(s);
7078             set_cc_op(s, CC_OP_BMILGB + ot);
7079         } else {
7080             /* For bsr/bsf, only the Z bit is defined and it is related
7081                to the input and not the result.  */
7082             tcg_gen_mov_tl(cpu_cc_dst, s->T0);
7083             set_cc_op(s, CC_OP_LOGICB + ot);
7084 
7085             /* ??? The manual says that the output is undefined when the
7086                input is zero, but real hardware leaves it unchanged, and
7087                real programs appear to depend on that.  Accomplish this
7088                by passing the output as the value to return upon zero.  */
7089             if (b & 1) {
7090                 /* For bsr, return the bit index of the first 1 bit,
7091                    not the count of leading zeros.  */
7092                 tcg_gen_xori_tl(s->T1, cpu_regs[reg], TARGET_LONG_BITS - 1);
7093                 tcg_gen_clz_tl(s->T0, s->T0, s->T1);
7094                 tcg_gen_xori_tl(s->T0, s->T0, TARGET_LONG_BITS - 1);
7095             } else {
7096                 tcg_gen_ctz_tl(s->T0, s->T0, cpu_regs[reg]);
7097             }
7098         }
7099         gen_op_mov_reg_v(s, ot, reg, s->T0);
7100         break;
7101         /************************/
7102         /* bcd */
7103     case 0x27: /* daa */
7104         if (CODE64(s))
7105             goto illegal_op;
7106         gen_update_cc_op(s);
7107         gen_helper_daa(cpu_env);
7108         set_cc_op(s, CC_OP_EFLAGS);
7109         break;
7110     case 0x2f: /* das */
7111         if (CODE64(s))
7112             goto illegal_op;
7113         gen_update_cc_op(s);
7114         gen_helper_das(cpu_env);
7115         set_cc_op(s, CC_OP_EFLAGS);
7116         break;
7117     case 0x37: /* aaa */
7118         if (CODE64(s))
7119             goto illegal_op;
7120         gen_update_cc_op(s);
7121         gen_helper_aaa(cpu_env);
7122         set_cc_op(s, CC_OP_EFLAGS);
7123         break;
7124     case 0x3f: /* aas */
7125         if (CODE64(s))
7126             goto illegal_op;
7127         gen_update_cc_op(s);
7128         gen_helper_aas(cpu_env);
7129         set_cc_op(s, CC_OP_EFLAGS);
7130         break;
7131     case 0xd4: /* aam */
7132         if (CODE64(s))
7133             goto illegal_op;
7134         val = x86_ldub_code(env, s);
7135         if (val == 0) {
7136             gen_exception(s, EXCP00_DIVZ, pc_start - s->cs_base);
7137         } else {
7138             gen_helper_aam(cpu_env, tcg_const_i32(val));
7139             set_cc_op(s, CC_OP_LOGICB);
7140         }
7141         break;
7142     case 0xd5: /* aad */
7143         if (CODE64(s))
7144             goto illegal_op;
7145         val = x86_ldub_code(env, s);
7146         gen_helper_aad(cpu_env, tcg_const_i32(val));
7147         set_cc_op(s, CC_OP_LOGICB);
7148         break;
7149         /************************/
7150         /* misc */
7151     case 0x90: /* nop */
7152         /* XXX: correct lock test for all insn */
7153         if (prefixes & PREFIX_LOCK) {
7154             goto illegal_op;
7155         }
7156         /* If REX_B is set, then this is xchg eax, r8d, not a nop.  */
7157         if (REX_B(s)) {
7158             goto do_xchg_reg_eax;
7159         }
7160         if (prefixes & PREFIX_REPZ) {
7161             gen_update_cc_op(s);
7162             gen_jmp_im(s, pc_start - s->cs_base);
7163             gen_helper_pause(cpu_env, tcg_const_i32(s->pc - pc_start));
7164             s->base.is_jmp = DISAS_NORETURN;
7165         }
7166         break;
7167     case 0x9b: /* fwait */
7168         if ((s->flags & (HF_MP_MASK | HF_TS_MASK)) ==
7169             (HF_MP_MASK | HF_TS_MASK)) {
7170             gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
7171         } else {
7172             gen_helper_fwait(cpu_env);
7173         }
7174         break;
7175     case 0xcc: /* int3 */
7176         gen_interrupt(s, EXCP03_INT3, pc_start - s->cs_base, s->pc - s->cs_base);
7177         break;
7178     case 0xcd: /* int N */
7179         val = x86_ldub_code(env, s);
7180         if (check_vm86_iopl(s)) {
7181             gen_interrupt(s, val, pc_start - s->cs_base, s->pc - s->cs_base);
7182         }
7183         break;
7184     case 0xce: /* into */
7185         if (CODE64(s))
7186             goto illegal_op;
7187         gen_update_cc_op(s);
7188         gen_jmp_im(s, pc_start - s->cs_base);
7189         gen_helper_into(cpu_env, tcg_const_i32(s->pc - pc_start));
7190         break;
7191 #ifdef WANT_ICEBP
7192     case 0xf1: /* icebp (undocumented, exits to external debugger) */
7193         gen_svm_check_intercept(s, SVM_EXIT_ICEBP);
7194         gen_debug(s);
7195         break;
7196 #endif
7197     case 0xfa: /* cli */
7198         if (check_iopl(s)) {
7199             gen_helper_cli(cpu_env);
7200         }
7201         break;
7202     case 0xfb: /* sti */
7203         if (check_iopl(s)) {
7204             gen_helper_sti(cpu_env);
7205             /* interruptions are enabled only the first insn after sti */
7206             gen_jmp_im(s, s->pc - s->cs_base);
7207             gen_eob_inhibit_irq(s, true);
7208         }
7209         break;
7210     case 0x62: /* bound */
7211         if (CODE64(s))
7212             goto illegal_op;
7213         ot = dflag;
7214         modrm = x86_ldub_code(env, s);
7215         reg = (modrm >> 3) & 7;
7216         mod = (modrm >> 6) & 3;
7217         if (mod == 3)
7218             goto illegal_op;
7219         gen_op_mov_v_reg(s, ot, s->T0, reg);
7220         gen_lea_modrm(env, s, modrm);
7221         tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
7222         if (ot == MO_16) {
7223             gen_helper_boundw(cpu_env, s->A0, s->tmp2_i32);
7224         } else {
7225             gen_helper_boundl(cpu_env, s->A0, s->tmp2_i32);
7226         }
7227         break;
7228     case 0x1c8 ... 0x1cf: /* bswap reg */
7229         reg = (b & 7) | REX_B(s);
7230 #ifdef TARGET_X86_64
7231         if (dflag == MO_64) {
7232             tcg_gen_bswap64_i64(cpu_regs[reg], cpu_regs[reg]);
7233             break;
7234         }
7235 #endif
7236         tcg_gen_bswap32_tl(cpu_regs[reg], cpu_regs[reg], TCG_BSWAP_OZ);
7237         break;
7238     case 0xd6: /* salc */
7239         if (CODE64(s))
7240             goto illegal_op;
7241         gen_compute_eflags_c(s, s->T0);
7242         tcg_gen_neg_tl(s->T0, s->T0);
7243         gen_op_mov_reg_v(s, MO_8, R_EAX, s->T0);
7244         break;
7245     case 0xe0: /* loopnz */
7246     case 0xe1: /* loopz */
7247     case 0xe2: /* loop */
7248     case 0xe3: /* jecxz */
7249         {
7250             TCGLabel *l1, *l2, *l3;
7251 
7252             tval = (int8_t)insn_get(env, s, MO_8);
7253             next_eip = s->pc - s->cs_base;
7254             tval += next_eip;
7255             if (dflag == MO_16) {
7256                 tval &= 0xffff;
7257             }
7258 
7259             l1 = gen_new_label();
7260             l2 = gen_new_label();
7261             l3 = gen_new_label();
7262             gen_update_cc_op(s);
7263             b &= 3;
7264             switch(b) {
7265             case 0: /* loopnz */
7266             case 1: /* loopz */
7267                 gen_op_add_reg_im(s, s->aflag, R_ECX, -1);
7268                 gen_op_jz_ecx(s, s->aflag, l3);
7269                 gen_jcc1(s, (JCC_Z << 1) | (b ^ 1), l1);
7270                 break;
7271             case 2: /* loop */
7272                 gen_op_add_reg_im(s, s->aflag, R_ECX, -1);
7273                 gen_op_jnz_ecx(s, s->aflag, l1);
7274                 break;
7275             default:
7276             case 3: /* jcxz */
7277                 gen_op_jz_ecx(s, s->aflag, l1);
7278                 break;
7279             }
7280 
7281             gen_set_label(l3);
7282             gen_jmp_im(s, next_eip);
7283             tcg_gen_br(l2);
7284 
7285             gen_set_label(l1);
7286             gen_jmp_im(s, tval);
7287             gen_set_label(l2);
7288             gen_eob(s);
7289         }
7290         break;
7291     case 0x130: /* wrmsr */
7292     case 0x132: /* rdmsr */
7293         if (check_cpl0(s)) {
7294             gen_update_cc_op(s);
7295             gen_jmp_im(s, pc_start - s->cs_base);
7296             if (b & 2) {
7297                 gen_helper_rdmsr(cpu_env);
7298             } else {
7299                 gen_helper_wrmsr(cpu_env);
7300                 gen_jmp_im(s, s->pc - s->cs_base);
7301                 gen_eob(s);
7302             }
7303         }
7304         break;
7305     case 0x131: /* rdtsc */
7306         gen_update_cc_op(s);
7307         gen_jmp_im(s, pc_start - s->cs_base);
7308         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7309             gen_io_start();
7310         }
7311         gen_helper_rdtsc(cpu_env);
7312         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7313             gen_jmp(s, s->pc - s->cs_base);
7314         }
7315         break;
7316     case 0x133: /* rdpmc */
7317         gen_update_cc_op(s);
7318         gen_jmp_im(s, pc_start - s->cs_base);
7319         gen_helper_rdpmc(cpu_env);
7320         s->base.is_jmp = DISAS_NORETURN;
7321         break;
7322     case 0x134: /* sysenter */
7323         /* For Intel SYSENTER is valid on 64-bit */
7324         if (CODE64(s) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1)
7325             goto illegal_op;
7326         if (!PE(s)) {
7327             gen_exception_gpf(s);
7328         } else {
7329             gen_helper_sysenter(cpu_env);
7330             gen_eob(s);
7331         }
7332         break;
7333     case 0x135: /* sysexit */
7334         /* For Intel SYSEXIT is valid on 64-bit */
7335         if (CODE64(s) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1)
7336             goto illegal_op;
7337         if (!PE(s)) {
7338             gen_exception_gpf(s);
7339         } else {
7340             gen_helper_sysexit(cpu_env, tcg_const_i32(dflag - 1));
7341             gen_eob(s);
7342         }
7343         break;
7344 #ifdef TARGET_X86_64
7345     case 0x105: /* syscall */
7346         /* XXX: is it usable in real mode ? */
7347         gen_update_cc_op(s);
7348         gen_jmp_im(s, pc_start - s->cs_base);
7349         gen_helper_syscall(cpu_env, tcg_const_i32(s->pc - pc_start));
7350         /* TF handling for the syscall insn is different. The TF bit is  checked
7351            after the syscall insn completes. This allows #DB to not be
7352            generated after one has entered CPL0 if TF is set in FMASK.  */
7353         gen_eob_worker(s, false, true);
7354         break;
7355     case 0x107: /* sysret */
7356         if (!PE(s)) {
7357             gen_exception_gpf(s);
7358         } else {
7359             gen_helper_sysret(cpu_env, tcg_const_i32(dflag - 1));
7360             /* condition codes are modified only in long mode */
7361             if (LMA(s)) {
7362                 set_cc_op(s, CC_OP_EFLAGS);
7363             }
7364             /* TF handling for the sysret insn is different. The TF bit is
7365                checked after the sysret insn completes. This allows #DB to be
7366                generated "as if" the syscall insn in userspace has just
7367                completed.  */
7368             gen_eob_worker(s, false, true);
7369         }
7370         break;
7371 #endif
7372     case 0x1a2: /* cpuid */
7373         gen_update_cc_op(s);
7374         gen_jmp_im(s, pc_start - s->cs_base);
7375         gen_helper_cpuid(cpu_env);
7376         break;
7377     case 0xf4: /* hlt */
7378         if (check_cpl0(s)) {
7379             gen_update_cc_op(s);
7380             gen_jmp_im(s, pc_start - s->cs_base);
7381             gen_helper_hlt(cpu_env, tcg_const_i32(s->pc - pc_start));
7382             s->base.is_jmp = DISAS_NORETURN;
7383         }
7384         break;
7385     case 0x100:
7386         modrm = x86_ldub_code(env, s);
7387         mod = (modrm >> 6) & 3;
7388         op = (modrm >> 3) & 7;
7389         switch(op) {
7390         case 0: /* sldt */
7391             if (!PE(s) || VM86(s))
7392                 goto illegal_op;
7393             gen_svm_check_intercept(s, SVM_EXIT_LDTR_READ);
7394             tcg_gen_ld32u_tl(s->T0, cpu_env,
7395                              offsetof(CPUX86State, ldt.selector));
7396             ot = mod == 3 ? dflag : MO_16;
7397             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7398             break;
7399         case 2: /* lldt */
7400             if (!PE(s) || VM86(s))
7401                 goto illegal_op;
7402             if (check_cpl0(s)) {
7403                 gen_svm_check_intercept(s, SVM_EXIT_LDTR_WRITE);
7404                 gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7405                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
7406                 gen_helper_lldt(cpu_env, s->tmp2_i32);
7407             }
7408             break;
7409         case 1: /* str */
7410             if (!PE(s) || VM86(s))
7411                 goto illegal_op;
7412             gen_svm_check_intercept(s, SVM_EXIT_TR_READ);
7413             tcg_gen_ld32u_tl(s->T0, cpu_env,
7414                              offsetof(CPUX86State, tr.selector));
7415             ot = mod == 3 ? dflag : MO_16;
7416             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7417             break;
7418         case 3: /* ltr */
7419             if (!PE(s) || VM86(s))
7420                 goto illegal_op;
7421             if (check_cpl0(s)) {
7422                 gen_svm_check_intercept(s, SVM_EXIT_TR_WRITE);
7423                 gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7424                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
7425                 gen_helper_ltr(cpu_env, s->tmp2_i32);
7426             }
7427             break;
7428         case 4: /* verr */
7429         case 5: /* verw */
7430             if (!PE(s) || VM86(s))
7431                 goto illegal_op;
7432             gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7433             gen_update_cc_op(s);
7434             if (op == 4) {
7435                 gen_helper_verr(cpu_env, s->T0);
7436             } else {
7437                 gen_helper_verw(cpu_env, s->T0);
7438             }
7439             set_cc_op(s, CC_OP_EFLAGS);
7440             break;
7441         default:
7442             goto unknown_op;
7443         }
7444         break;
7445 
7446     case 0x101:
7447         modrm = x86_ldub_code(env, s);
7448         switch (modrm) {
7449         CASE_MODRM_MEM_OP(0): /* sgdt */
7450             gen_svm_check_intercept(s, SVM_EXIT_GDTR_READ);
7451             gen_lea_modrm(env, s, modrm);
7452             tcg_gen_ld32u_tl(s->T0,
7453                              cpu_env, offsetof(CPUX86State, gdt.limit));
7454             gen_op_st_v(s, MO_16, s->T0, s->A0);
7455             gen_add_A0_im(s, 2);
7456             tcg_gen_ld_tl(s->T0, cpu_env, offsetof(CPUX86State, gdt.base));
7457             if (dflag == MO_16) {
7458                 tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
7459             }
7460             gen_op_st_v(s, CODE64(s) + MO_32, s->T0, s->A0);
7461             break;
7462 
7463         case 0xc8: /* monitor */
7464             if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) || CPL(s) != 0) {
7465                 goto illegal_op;
7466             }
7467             gen_update_cc_op(s);
7468             gen_jmp_im(s, pc_start - s->cs_base);
7469             tcg_gen_mov_tl(s->A0, cpu_regs[R_EAX]);
7470             gen_extu(s->aflag, s->A0);
7471             gen_add_A0_ds_seg(s);
7472             gen_helper_monitor(cpu_env, s->A0);
7473             break;
7474 
7475         case 0xc9: /* mwait */
7476             if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) || CPL(s) != 0) {
7477                 goto illegal_op;
7478             }
7479             gen_update_cc_op(s);
7480             gen_jmp_im(s, pc_start - s->cs_base);
7481             gen_helper_mwait(cpu_env, tcg_const_i32(s->pc - pc_start));
7482             s->base.is_jmp = DISAS_NORETURN;
7483             break;
7484 
7485         case 0xca: /* clac */
7486             if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SMAP)
7487                 || CPL(s) != 0) {
7488                 goto illegal_op;
7489             }
7490             gen_helper_clac(cpu_env);
7491             gen_jmp_im(s, s->pc - s->cs_base);
7492             gen_eob(s);
7493             break;
7494 
7495         case 0xcb: /* stac */
7496             if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SMAP)
7497                 || CPL(s) != 0) {
7498                 goto illegal_op;
7499             }
7500             gen_helper_stac(cpu_env);
7501             gen_jmp_im(s, s->pc - s->cs_base);
7502             gen_eob(s);
7503             break;
7504 
7505         CASE_MODRM_MEM_OP(1): /* sidt */
7506             gen_svm_check_intercept(s, SVM_EXIT_IDTR_READ);
7507             gen_lea_modrm(env, s, modrm);
7508             tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.limit));
7509             gen_op_st_v(s, MO_16, s->T0, s->A0);
7510             gen_add_A0_im(s, 2);
7511             tcg_gen_ld_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.base));
7512             if (dflag == MO_16) {
7513                 tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
7514             }
7515             gen_op_st_v(s, CODE64(s) + MO_32, s->T0, s->A0);
7516             break;
7517 
7518         case 0xd0: /* xgetbv */
7519             if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
7520                 || (s->prefix & (PREFIX_LOCK | PREFIX_DATA
7521                                  | PREFIX_REPZ | PREFIX_REPNZ))) {
7522                 goto illegal_op;
7523             }
7524             tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
7525             gen_helper_xgetbv(s->tmp1_i64, cpu_env, s->tmp2_i32);
7526             tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], s->tmp1_i64);
7527             break;
7528 
7529         case 0xd1: /* xsetbv */
7530             if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
7531                 || (s->prefix & (PREFIX_LOCK | PREFIX_DATA
7532                                  | PREFIX_REPZ | PREFIX_REPNZ))) {
7533                 goto illegal_op;
7534             }
7535             if (!check_cpl0(s)) {
7536                 break;
7537             }
7538             tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
7539                                   cpu_regs[R_EDX]);
7540             tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
7541             gen_helper_xsetbv(cpu_env, s->tmp2_i32, s->tmp1_i64);
7542             /* End TB because translation flags may change.  */
7543             gen_jmp_im(s, s->pc - s->cs_base);
7544             gen_eob(s);
7545             break;
7546 
7547         case 0xd8: /* VMRUN */
7548             if (!SVME(s) || !PE(s)) {
7549                 goto illegal_op;
7550             }
7551             if (!check_cpl0(s)) {
7552                 break;
7553             }
7554             gen_update_cc_op(s);
7555             gen_jmp_im(s, pc_start - s->cs_base);
7556             gen_helper_vmrun(cpu_env, tcg_const_i32(s->aflag - 1),
7557                              tcg_const_i32(s->pc - pc_start));
7558             tcg_gen_exit_tb(NULL, 0);
7559             s->base.is_jmp = DISAS_NORETURN;
7560             break;
7561 
7562         case 0xd9: /* VMMCALL */
7563             if (!SVME(s)) {
7564                 goto illegal_op;
7565             }
7566             gen_update_cc_op(s);
7567             gen_jmp_im(s, pc_start - s->cs_base);
7568             gen_helper_vmmcall(cpu_env);
7569             break;
7570 
7571         case 0xda: /* VMLOAD */
7572             if (!SVME(s) || !PE(s)) {
7573                 goto illegal_op;
7574             }
7575             if (!check_cpl0(s)) {
7576                 break;
7577             }
7578             gen_update_cc_op(s);
7579             gen_jmp_im(s, pc_start - s->cs_base);
7580             gen_helper_vmload(cpu_env, tcg_const_i32(s->aflag - 1));
7581             break;
7582 
7583         case 0xdb: /* VMSAVE */
7584             if (!SVME(s) || !PE(s)) {
7585                 goto illegal_op;
7586             }
7587             if (!check_cpl0(s)) {
7588                 break;
7589             }
7590             gen_update_cc_op(s);
7591             gen_jmp_im(s, pc_start - s->cs_base);
7592             gen_helper_vmsave(cpu_env, tcg_const_i32(s->aflag - 1));
7593             break;
7594 
7595         case 0xdc: /* STGI */
7596             if ((!SVME(s) && !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT))
7597                 || !PE(s)) {
7598                 goto illegal_op;
7599             }
7600             if (!check_cpl0(s)) {
7601                 break;
7602             }
7603             gen_update_cc_op(s);
7604             gen_helper_stgi(cpu_env);
7605             gen_jmp_im(s, s->pc - s->cs_base);
7606             gen_eob(s);
7607             break;
7608 
7609         case 0xdd: /* CLGI */
7610             if (!SVME(s) || !PE(s)) {
7611                 goto illegal_op;
7612             }
7613             if (!check_cpl0(s)) {
7614                 break;
7615             }
7616             gen_update_cc_op(s);
7617             gen_jmp_im(s, pc_start - s->cs_base);
7618             gen_helper_clgi(cpu_env);
7619             break;
7620 
7621         case 0xde: /* SKINIT */
7622             if ((!SVME(s) && !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT))
7623                 || !PE(s)) {
7624                 goto illegal_op;
7625             }
7626             gen_svm_check_intercept(s, SVM_EXIT_SKINIT);
7627             /* If not intercepted, not implemented -- raise #UD. */
7628             goto illegal_op;
7629 
7630         case 0xdf: /* INVLPGA */
7631             if (!SVME(s) || !PE(s)) {
7632                 goto illegal_op;
7633             }
7634             if (!check_cpl0(s)) {
7635                 break;
7636             }
7637             gen_svm_check_intercept(s, SVM_EXIT_INVLPGA);
7638             if (s->aflag == MO_64) {
7639                 tcg_gen_mov_tl(s->A0, cpu_regs[R_EAX]);
7640             } else {
7641                 tcg_gen_ext32u_tl(s->A0, cpu_regs[R_EAX]);
7642             }
7643             gen_helper_flush_page(cpu_env, s->A0);
7644             gen_jmp_im(s, s->pc - s->cs_base);
7645             gen_eob(s);
7646             break;
7647 
7648         CASE_MODRM_MEM_OP(2): /* lgdt */
7649             if (!check_cpl0(s)) {
7650                 break;
7651             }
7652             gen_svm_check_intercept(s, SVM_EXIT_GDTR_WRITE);
7653             gen_lea_modrm(env, s, modrm);
7654             gen_op_ld_v(s, MO_16, s->T1, s->A0);
7655             gen_add_A0_im(s, 2);
7656             gen_op_ld_v(s, CODE64(s) + MO_32, s->T0, s->A0);
7657             if (dflag == MO_16) {
7658                 tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
7659             }
7660             tcg_gen_st_tl(s->T0, cpu_env, offsetof(CPUX86State, gdt.base));
7661             tcg_gen_st32_tl(s->T1, cpu_env, offsetof(CPUX86State, gdt.limit));
7662             break;
7663 
7664         CASE_MODRM_MEM_OP(3): /* lidt */
7665             if (!check_cpl0(s)) {
7666                 break;
7667             }
7668             gen_svm_check_intercept(s, SVM_EXIT_IDTR_WRITE);
7669             gen_lea_modrm(env, s, modrm);
7670             gen_op_ld_v(s, MO_16, s->T1, s->A0);
7671             gen_add_A0_im(s, 2);
7672             gen_op_ld_v(s, CODE64(s) + MO_32, s->T0, s->A0);
7673             if (dflag == MO_16) {
7674                 tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
7675             }
7676             tcg_gen_st_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.base));
7677             tcg_gen_st32_tl(s->T1, cpu_env, offsetof(CPUX86State, idt.limit));
7678             break;
7679 
7680         CASE_MODRM_OP(4): /* smsw */
7681             gen_svm_check_intercept(s, SVM_EXIT_READ_CR0);
7682             tcg_gen_ld_tl(s->T0, cpu_env, offsetof(CPUX86State, cr[0]));
7683             /*
7684              * In 32-bit mode, the higher 16 bits of the destination
7685              * register are undefined.  In practice CR0[31:0] is stored
7686              * just like in 64-bit mode.
7687              */
7688             mod = (modrm >> 6) & 3;
7689             ot = (mod != 3 ? MO_16 : s->dflag);
7690             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7691             break;
7692         case 0xee: /* rdpkru */
7693             if (prefixes & PREFIX_LOCK) {
7694                 goto illegal_op;
7695             }
7696             tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
7697             gen_helper_rdpkru(s->tmp1_i64, cpu_env, s->tmp2_i32);
7698             tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], s->tmp1_i64);
7699             break;
7700         case 0xef: /* wrpkru */
7701             if (prefixes & PREFIX_LOCK) {
7702                 goto illegal_op;
7703             }
7704             tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
7705                                   cpu_regs[R_EDX]);
7706             tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
7707             gen_helper_wrpkru(cpu_env, s->tmp2_i32, s->tmp1_i64);
7708             break;
7709 
7710         CASE_MODRM_OP(6): /* lmsw */
7711             if (!check_cpl0(s)) {
7712                 break;
7713             }
7714             gen_svm_check_intercept(s, SVM_EXIT_WRITE_CR0);
7715             gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7716             /*
7717              * Only the 4 lower bits of CR0 are modified.
7718              * PE cannot be set to zero if already set to one.
7719              */
7720             tcg_gen_ld_tl(s->T1, cpu_env, offsetof(CPUX86State, cr[0]));
7721             tcg_gen_andi_tl(s->T0, s->T0, 0xf);
7722             tcg_gen_andi_tl(s->T1, s->T1, ~0xe);
7723             tcg_gen_or_tl(s->T0, s->T0, s->T1);
7724             gen_helper_write_crN(cpu_env, tcg_constant_i32(0), s->T0);
7725             gen_jmp_im(s, s->pc - s->cs_base);
7726             gen_eob(s);
7727             break;
7728 
7729         CASE_MODRM_MEM_OP(7): /* invlpg */
7730             if (!check_cpl0(s)) {
7731                 break;
7732             }
7733             gen_svm_check_intercept(s, SVM_EXIT_INVLPG);
7734             gen_lea_modrm(env, s, modrm);
7735             gen_helper_flush_page(cpu_env, s->A0);
7736             gen_jmp_im(s, s->pc - s->cs_base);
7737             gen_eob(s);
7738             break;
7739 
7740         case 0xf8: /* swapgs */
7741 #ifdef TARGET_X86_64
7742             if (CODE64(s)) {
7743                 if (check_cpl0(s)) {
7744                     tcg_gen_mov_tl(s->T0, cpu_seg_base[R_GS]);
7745                     tcg_gen_ld_tl(cpu_seg_base[R_GS], cpu_env,
7746                                   offsetof(CPUX86State, kernelgsbase));
7747                     tcg_gen_st_tl(s->T0, cpu_env,
7748                                   offsetof(CPUX86State, kernelgsbase));
7749                 }
7750                 break;
7751             }
7752 #endif
7753             goto illegal_op;
7754 
7755         case 0xf9: /* rdtscp */
7756             if (!(s->cpuid_ext2_features & CPUID_EXT2_RDTSCP)) {
7757                 goto illegal_op;
7758             }
7759             gen_update_cc_op(s);
7760             gen_jmp_im(s, pc_start - s->cs_base);
7761             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7762                 gen_io_start();
7763             }
7764             gen_helper_rdtscp(cpu_env);
7765             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7766                 gen_jmp(s, s->pc - s->cs_base);
7767             }
7768             break;
7769 
7770         default:
7771             goto unknown_op;
7772         }
7773         break;
7774 
7775     case 0x108: /* invd */
7776     case 0x109: /* wbinvd */
7777         if (check_cpl0(s)) {
7778             gen_svm_check_intercept(s, (b & 2) ? SVM_EXIT_INVD : SVM_EXIT_WBINVD);
7779             /* nothing to do */
7780         }
7781         break;
7782     case 0x63: /* arpl or movslS (x86_64) */
7783 #ifdef TARGET_X86_64
7784         if (CODE64(s)) {
7785             int d_ot;
7786             /* d_ot is the size of destination */
7787             d_ot = dflag;
7788 
7789             modrm = x86_ldub_code(env, s);
7790             reg = ((modrm >> 3) & 7) | REX_R(s);
7791             mod = (modrm >> 6) & 3;
7792             rm = (modrm & 7) | REX_B(s);
7793 
7794             if (mod == 3) {
7795                 gen_op_mov_v_reg(s, MO_32, s->T0, rm);
7796                 /* sign extend */
7797                 if (d_ot == MO_64) {
7798                     tcg_gen_ext32s_tl(s->T0, s->T0);
7799                 }
7800                 gen_op_mov_reg_v(s, d_ot, reg, s->T0);
7801             } else {
7802                 gen_lea_modrm(env, s, modrm);
7803                 gen_op_ld_v(s, MO_32 | MO_SIGN, s->T0, s->A0);
7804                 gen_op_mov_reg_v(s, d_ot, reg, s->T0);
7805             }
7806         } else
7807 #endif
7808         {
7809             TCGLabel *label1;
7810             TCGv t0, t1, t2, a0;
7811 
7812             if (!PE(s) || VM86(s))
7813                 goto illegal_op;
7814             t0 = tcg_temp_local_new();
7815             t1 = tcg_temp_local_new();
7816             t2 = tcg_temp_local_new();
7817             ot = MO_16;
7818             modrm = x86_ldub_code(env, s);
7819             reg = (modrm >> 3) & 7;
7820             mod = (modrm >> 6) & 3;
7821             rm = modrm & 7;
7822             if (mod != 3) {
7823                 gen_lea_modrm(env, s, modrm);
7824                 gen_op_ld_v(s, ot, t0, s->A0);
7825                 a0 = tcg_temp_local_new();
7826                 tcg_gen_mov_tl(a0, s->A0);
7827             } else {
7828                 gen_op_mov_v_reg(s, ot, t0, rm);
7829                 a0 = NULL;
7830             }
7831             gen_op_mov_v_reg(s, ot, t1, reg);
7832             tcg_gen_andi_tl(s->tmp0, t0, 3);
7833             tcg_gen_andi_tl(t1, t1, 3);
7834             tcg_gen_movi_tl(t2, 0);
7835             label1 = gen_new_label();
7836             tcg_gen_brcond_tl(TCG_COND_GE, s->tmp0, t1, label1);
7837             tcg_gen_andi_tl(t0, t0, ~3);
7838             tcg_gen_or_tl(t0, t0, t1);
7839             tcg_gen_movi_tl(t2, CC_Z);
7840             gen_set_label(label1);
7841             if (mod != 3) {
7842                 gen_op_st_v(s, ot, t0, a0);
7843                 tcg_temp_free(a0);
7844            } else {
7845                 gen_op_mov_reg_v(s, ot, rm, t0);
7846             }
7847             gen_compute_eflags(s);
7848             tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_Z);
7849             tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t2);
7850             tcg_temp_free(t0);
7851             tcg_temp_free(t1);
7852             tcg_temp_free(t2);
7853         }
7854         break;
7855     case 0x102: /* lar */
7856     case 0x103: /* lsl */
7857         {
7858             TCGLabel *label1;
7859             TCGv t0;
7860             if (!PE(s) || VM86(s))
7861                 goto illegal_op;
7862             ot = dflag != MO_16 ? MO_32 : MO_16;
7863             modrm = x86_ldub_code(env, s);
7864             reg = ((modrm >> 3) & 7) | REX_R(s);
7865             gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7866             t0 = tcg_temp_local_new();
7867             gen_update_cc_op(s);
7868             if (b == 0x102) {
7869                 gen_helper_lar(t0, cpu_env, s->T0);
7870             } else {
7871                 gen_helper_lsl(t0, cpu_env, s->T0);
7872             }
7873             tcg_gen_andi_tl(s->tmp0, cpu_cc_src, CC_Z);
7874             label1 = gen_new_label();
7875             tcg_gen_brcondi_tl(TCG_COND_EQ, s->tmp0, 0, label1);
7876             gen_op_mov_reg_v(s, ot, reg, t0);
7877             gen_set_label(label1);
7878             set_cc_op(s, CC_OP_EFLAGS);
7879             tcg_temp_free(t0);
7880         }
7881         break;
7882     case 0x118:
7883         modrm = x86_ldub_code(env, s);
7884         mod = (modrm >> 6) & 3;
7885         op = (modrm >> 3) & 7;
7886         switch(op) {
7887         case 0: /* prefetchnta */
7888         case 1: /* prefetchnt0 */
7889         case 2: /* prefetchnt0 */
7890         case 3: /* prefetchnt0 */
7891             if (mod == 3)
7892                 goto illegal_op;
7893             gen_nop_modrm(env, s, modrm);
7894             /* nothing more to do */
7895             break;
7896         default: /* nop (multi byte) */
7897             gen_nop_modrm(env, s, modrm);
7898             break;
7899         }
7900         break;
7901     case 0x11a:
7902         modrm = x86_ldub_code(env, s);
7903         if (s->flags & HF_MPX_EN_MASK) {
7904             mod = (modrm >> 6) & 3;
7905             reg = ((modrm >> 3) & 7) | REX_R(s);
7906             if (prefixes & PREFIX_REPZ) {
7907                 /* bndcl */
7908                 if (reg >= 4
7909                     || (prefixes & PREFIX_LOCK)
7910                     || s->aflag == MO_16) {
7911                     goto illegal_op;
7912                 }
7913                 gen_bndck(env, s, modrm, TCG_COND_LTU, cpu_bndl[reg]);
7914             } else if (prefixes & PREFIX_REPNZ) {
7915                 /* bndcu */
7916                 if (reg >= 4
7917                     || (prefixes & PREFIX_LOCK)
7918                     || s->aflag == MO_16) {
7919                     goto illegal_op;
7920                 }
7921                 TCGv_i64 notu = tcg_temp_new_i64();
7922                 tcg_gen_not_i64(notu, cpu_bndu[reg]);
7923                 gen_bndck(env, s, modrm, TCG_COND_GTU, notu);
7924                 tcg_temp_free_i64(notu);
7925             } else if (prefixes & PREFIX_DATA) {
7926                 /* bndmov -- from reg/mem */
7927                 if (reg >= 4 || s->aflag == MO_16) {
7928                     goto illegal_op;
7929                 }
7930                 if (mod == 3) {
7931                     int reg2 = (modrm & 7) | REX_B(s);
7932                     if (reg2 >= 4 || (prefixes & PREFIX_LOCK)) {
7933                         goto illegal_op;
7934                     }
7935                     if (s->flags & HF_MPX_IU_MASK) {
7936                         tcg_gen_mov_i64(cpu_bndl[reg], cpu_bndl[reg2]);
7937                         tcg_gen_mov_i64(cpu_bndu[reg], cpu_bndu[reg2]);
7938                     }
7939                 } else {
7940                     gen_lea_modrm(env, s, modrm);
7941                     if (CODE64(s)) {
7942                         tcg_gen_qemu_ld_i64(cpu_bndl[reg], s->A0,
7943                                             s->mem_index, MO_LEQ);
7944                         tcg_gen_addi_tl(s->A0, s->A0, 8);
7945                         tcg_gen_qemu_ld_i64(cpu_bndu[reg], s->A0,
7946                                             s->mem_index, MO_LEQ);
7947                     } else {
7948                         tcg_gen_qemu_ld_i64(cpu_bndl[reg], s->A0,
7949                                             s->mem_index, MO_LEUL);
7950                         tcg_gen_addi_tl(s->A0, s->A0, 4);
7951                         tcg_gen_qemu_ld_i64(cpu_bndu[reg], s->A0,
7952                                             s->mem_index, MO_LEUL);
7953                     }
7954                     /* bnd registers are now in-use */
7955                     gen_set_hflag(s, HF_MPX_IU_MASK);
7956                 }
7957             } else if (mod != 3) {
7958                 /* bndldx */
7959                 AddressParts a = gen_lea_modrm_0(env, s, modrm);
7960                 if (reg >= 4
7961                     || (prefixes & PREFIX_LOCK)
7962                     || s->aflag == MO_16
7963                     || a.base < -1) {
7964                     goto illegal_op;
7965                 }
7966                 if (a.base >= 0) {
7967                     tcg_gen_addi_tl(s->A0, cpu_regs[a.base], a.disp);
7968                 } else {
7969                     tcg_gen_movi_tl(s->A0, 0);
7970                 }
7971                 gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
7972                 if (a.index >= 0) {
7973                     tcg_gen_mov_tl(s->T0, cpu_regs[a.index]);
7974                 } else {
7975                     tcg_gen_movi_tl(s->T0, 0);
7976                 }
7977                 if (CODE64(s)) {
7978                     gen_helper_bndldx64(cpu_bndl[reg], cpu_env, s->A0, s->T0);
7979                     tcg_gen_ld_i64(cpu_bndu[reg], cpu_env,
7980                                    offsetof(CPUX86State, mmx_t0.MMX_Q(0)));
7981                 } else {
7982                     gen_helper_bndldx32(cpu_bndu[reg], cpu_env, s->A0, s->T0);
7983                     tcg_gen_ext32u_i64(cpu_bndl[reg], cpu_bndu[reg]);
7984                     tcg_gen_shri_i64(cpu_bndu[reg], cpu_bndu[reg], 32);
7985                 }
7986                 gen_set_hflag(s, HF_MPX_IU_MASK);
7987             }
7988         }
7989         gen_nop_modrm(env, s, modrm);
7990         break;
7991     case 0x11b:
7992         modrm = x86_ldub_code(env, s);
7993         if (s->flags & HF_MPX_EN_MASK) {
7994             mod = (modrm >> 6) & 3;
7995             reg = ((modrm >> 3) & 7) | REX_R(s);
7996             if (mod != 3 && (prefixes & PREFIX_REPZ)) {
7997                 /* bndmk */
7998                 if (reg >= 4
7999                     || (prefixes & PREFIX_LOCK)
8000                     || s->aflag == MO_16) {
8001                     goto illegal_op;
8002                 }
8003                 AddressParts a = gen_lea_modrm_0(env, s, modrm);
8004                 if (a.base >= 0) {
8005                     tcg_gen_extu_tl_i64(cpu_bndl[reg], cpu_regs[a.base]);
8006                     if (!CODE64(s)) {
8007                         tcg_gen_ext32u_i64(cpu_bndl[reg], cpu_bndl[reg]);
8008                     }
8009                 } else if (a.base == -1) {
8010                     /* no base register has lower bound of 0 */
8011                     tcg_gen_movi_i64(cpu_bndl[reg], 0);
8012                 } else {
8013                     /* rip-relative generates #ud */
8014                     goto illegal_op;
8015                 }
8016                 tcg_gen_not_tl(s->A0, gen_lea_modrm_1(s, a));
8017                 if (!CODE64(s)) {
8018                     tcg_gen_ext32u_tl(s->A0, s->A0);
8019                 }
8020                 tcg_gen_extu_tl_i64(cpu_bndu[reg], s->A0);
8021                 /* bnd registers are now in-use */
8022                 gen_set_hflag(s, HF_MPX_IU_MASK);
8023                 break;
8024             } else if (prefixes & PREFIX_REPNZ) {
8025                 /* bndcn */
8026                 if (reg >= 4
8027                     || (prefixes & PREFIX_LOCK)
8028                     || s->aflag == MO_16) {
8029                     goto illegal_op;
8030                 }
8031                 gen_bndck(env, s, modrm, TCG_COND_GTU, cpu_bndu[reg]);
8032             } else if (prefixes & PREFIX_DATA) {
8033                 /* bndmov -- to reg/mem */
8034                 if (reg >= 4 || s->aflag == MO_16) {
8035                     goto illegal_op;
8036                 }
8037                 if (mod == 3) {
8038                     int reg2 = (modrm & 7) | REX_B(s);
8039                     if (reg2 >= 4 || (prefixes & PREFIX_LOCK)) {
8040                         goto illegal_op;
8041                     }
8042                     if (s->flags & HF_MPX_IU_MASK) {
8043                         tcg_gen_mov_i64(cpu_bndl[reg2], cpu_bndl[reg]);
8044                         tcg_gen_mov_i64(cpu_bndu[reg2], cpu_bndu[reg]);
8045                     }
8046                 } else {
8047                     gen_lea_modrm(env, s, modrm);
8048                     if (CODE64(s)) {
8049                         tcg_gen_qemu_st_i64(cpu_bndl[reg], s->A0,
8050                                             s->mem_index, MO_LEQ);
8051                         tcg_gen_addi_tl(s->A0, s->A0, 8);
8052                         tcg_gen_qemu_st_i64(cpu_bndu[reg], s->A0,
8053                                             s->mem_index, MO_LEQ);
8054                     } else {
8055                         tcg_gen_qemu_st_i64(cpu_bndl[reg], s->A0,
8056                                             s->mem_index, MO_LEUL);
8057                         tcg_gen_addi_tl(s->A0, s->A0, 4);
8058                         tcg_gen_qemu_st_i64(cpu_bndu[reg], s->A0,
8059                                             s->mem_index, MO_LEUL);
8060                     }
8061                 }
8062             } else if (mod != 3) {
8063                 /* bndstx */
8064                 AddressParts a = gen_lea_modrm_0(env, s, modrm);
8065                 if (reg >= 4
8066                     || (prefixes & PREFIX_LOCK)
8067                     || s->aflag == MO_16
8068                     || a.base < -1) {
8069                     goto illegal_op;
8070                 }
8071                 if (a.base >= 0) {
8072                     tcg_gen_addi_tl(s->A0, cpu_regs[a.base], a.disp);
8073                 } else {
8074                     tcg_gen_movi_tl(s->A0, 0);
8075                 }
8076                 gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
8077                 if (a.index >= 0) {
8078                     tcg_gen_mov_tl(s->T0, cpu_regs[a.index]);
8079                 } else {
8080                     tcg_gen_movi_tl(s->T0, 0);
8081                 }
8082                 if (CODE64(s)) {
8083                     gen_helper_bndstx64(cpu_env, s->A0, s->T0,
8084                                         cpu_bndl[reg], cpu_bndu[reg]);
8085                 } else {
8086                     gen_helper_bndstx32(cpu_env, s->A0, s->T0,
8087                                         cpu_bndl[reg], cpu_bndu[reg]);
8088                 }
8089             }
8090         }
8091         gen_nop_modrm(env, s, modrm);
8092         break;
8093     case 0x119: case 0x11c ... 0x11f: /* nop (multi byte) */
8094         modrm = x86_ldub_code(env, s);
8095         gen_nop_modrm(env, s, modrm);
8096         break;
8097 
8098     case 0x120: /* mov reg, crN */
8099     case 0x122: /* mov crN, reg */
8100         if (!check_cpl0(s)) {
8101             break;
8102         }
8103         modrm = x86_ldub_code(env, s);
8104         /*
8105          * Ignore the mod bits (assume (modrm&0xc0)==0xc0).
8106          * AMD documentation (24594.pdf) and testing of Intel 386 and 486
8107          * processors all show that the mod bits are assumed to be 1's,
8108          * regardless of actual values.
8109          */
8110         rm = (modrm & 7) | REX_B(s);
8111         reg = ((modrm >> 3) & 7) | REX_R(s);
8112         switch (reg) {
8113         case 0:
8114             if ((prefixes & PREFIX_LOCK) &&
8115                 (s->cpuid_ext3_features & CPUID_EXT3_CR8LEG)) {
8116                 reg = 8;
8117             }
8118             break;
8119         case 2:
8120         case 3:
8121         case 4:
8122         case 8:
8123             break;
8124         default:
8125             goto unknown_op;
8126         }
8127         ot  = (CODE64(s) ? MO_64 : MO_32);
8128 
8129         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
8130             gen_io_start();
8131         }
8132         if (b & 2) {
8133             gen_svm_check_intercept(s, SVM_EXIT_WRITE_CR0 + reg);
8134             gen_op_mov_v_reg(s, ot, s->T0, rm);
8135             gen_helper_write_crN(cpu_env, tcg_constant_i32(reg), s->T0);
8136             gen_jmp_im(s, s->pc - s->cs_base);
8137             gen_eob(s);
8138         } else {
8139             gen_svm_check_intercept(s, SVM_EXIT_READ_CR0 + reg);
8140             gen_helper_read_crN(s->T0, cpu_env, tcg_constant_i32(reg));
8141             gen_op_mov_reg_v(s, ot, rm, s->T0);
8142             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
8143                 gen_jmp(s, s->pc - s->cs_base);
8144             }
8145         }
8146         break;
8147 
8148     case 0x121: /* mov reg, drN */
8149     case 0x123: /* mov drN, reg */
8150         if (check_cpl0(s)) {
8151             modrm = x86_ldub_code(env, s);
8152             /* Ignore the mod bits (assume (modrm&0xc0)==0xc0).
8153              * AMD documentation (24594.pdf) and testing of
8154              * intel 386 and 486 processors all show that the mod bits
8155              * are assumed to be 1's, regardless of actual values.
8156              */
8157             rm = (modrm & 7) | REX_B(s);
8158             reg = ((modrm >> 3) & 7) | REX_R(s);
8159             if (CODE64(s))
8160                 ot = MO_64;
8161             else
8162                 ot = MO_32;
8163             if (reg >= 8) {
8164                 goto illegal_op;
8165             }
8166             if (b & 2) {
8167                 gen_svm_check_intercept(s, SVM_EXIT_WRITE_DR0 + reg);
8168                 gen_op_mov_v_reg(s, ot, s->T0, rm);
8169                 tcg_gen_movi_i32(s->tmp2_i32, reg);
8170                 gen_helper_set_dr(cpu_env, s->tmp2_i32, s->T0);
8171                 gen_jmp_im(s, s->pc - s->cs_base);
8172                 gen_eob(s);
8173             } else {
8174                 gen_svm_check_intercept(s, SVM_EXIT_READ_DR0 + reg);
8175                 tcg_gen_movi_i32(s->tmp2_i32, reg);
8176                 gen_helper_get_dr(s->T0, cpu_env, s->tmp2_i32);
8177                 gen_op_mov_reg_v(s, ot, rm, s->T0);
8178             }
8179         }
8180         break;
8181     case 0x106: /* clts */
8182         if (check_cpl0(s)) {
8183             gen_svm_check_intercept(s, SVM_EXIT_WRITE_CR0);
8184             gen_helper_clts(cpu_env);
8185             /* abort block because static cpu state changed */
8186             gen_jmp_im(s, s->pc - s->cs_base);
8187             gen_eob(s);
8188         }
8189         break;
8190     /* MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4 support */
8191     case 0x1c3: /* MOVNTI reg, mem */
8192         if (!(s->cpuid_features & CPUID_SSE2))
8193             goto illegal_op;
8194         ot = mo_64_32(dflag);
8195         modrm = x86_ldub_code(env, s);
8196         mod = (modrm >> 6) & 3;
8197         if (mod == 3)
8198             goto illegal_op;
8199         reg = ((modrm >> 3) & 7) | REX_R(s);
8200         /* generate a generic store */
8201         gen_ldst_modrm(env, s, modrm, ot, reg, 1);
8202         break;
8203     case 0x1ae:
8204         modrm = x86_ldub_code(env, s);
8205         switch (modrm) {
8206         CASE_MODRM_MEM_OP(0): /* fxsave */
8207             if (!(s->cpuid_features & CPUID_FXSR)
8208                 || (prefixes & PREFIX_LOCK)) {
8209                 goto illegal_op;
8210             }
8211             if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) {
8212                 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8213                 break;
8214             }
8215             gen_lea_modrm(env, s, modrm);
8216             gen_helper_fxsave(cpu_env, s->A0);
8217             break;
8218 
8219         CASE_MODRM_MEM_OP(1): /* fxrstor */
8220             if (!(s->cpuid_features & CPUID_FXSR)
8221                 || (prefixes & PREFIX_LOCK)) {
8222                 goto illegal_op;
8223             }
8224             if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) {
8225                 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8226                 break;
8227             }
8228             gen_lea_modrm(env, s, modrm);
8229             gen_helper_fxrstor(cpu_env, s->A0);
8230             break;
8231 
8232         CASE_MODRM_MEM_OP(2): /* ldmxcsr */
8233             if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK)) {
8234                 goto illegal_op;
8235             }
8236             if (s->flags & HF_TS_MASK) {
8237                 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8238                 break;
8239             }
8240             gen_lea_modrm(env, s, modrm);
8241             tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUL);
8242             gen_helper_ldmxcsr(cpu_env, s->tmp2_i32);
8243             break;
8244 
8245         CASE_MODRM_MEM_OP(3): /* stmxcsr */
8246             if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK)) {
8247                 goto illegal_op;
8248             }
8249             if (s->flags & HF_TS_MASK) {
8250                 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8251                 break;
8252             }
8253             gen_helper_update_mxcsr(cpu_env);
8254             gen_lea_modrm(env, s, modrm);
8255             tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State, mxcsr));
8256             gen_op_st_v(s, MO_32, s->T0, s->A0);
8257             break;
8258 
8259         CASE_MODRM_MEM_OP(4): /* xsave */
8260             if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
8261                 || (prefixes & (PREFIX_LOCK | PREFIX_DATA
8262                                 | PREFIX_REPZ | PREFIX_REPNZ))) {
8263                 goto illegal_op;
8264             }
8265             gen_lea_modrm(env, s, modrm);
8266             tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
8267                                   cpu_regs[R_EDX]);
8268             gen_helper_xsave(cpu_env, s->A0, s->tmp1_i64);
8269             break;
8270 
8271         CASE_MODRM_MEM_OP(5): /* xrstor */
8272             if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
8273                 || (prefixes & (PREFIX_LOCK | PREFIX_DATA
8274                                 | PREFIX_REPZ | PREFIX_REPNZ))) {
8275                 goto illegal_op;
8276             }
8277             gen_lea_modrm(env, s, modrm);
8278             tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
8279                                   cpu_regs[R_EDX]);
8280             gen_helper_xrstor(cpu_env, s->A0, s->tmp1_i64);
8281             /* XRSTOR is how MPX is enabled, which changes how
8282                we translate.  Thus we need to end the TB.  */
8283             gen_update_cc_op(s);
8284             gen_jmp_im(s, s->pc - s->cs_base);
8285             gen_eob(s);
8286             break;
8287 
8288         CASE_MODRM_MEM_OP(6): /* xsaveopt / clwb */
8289             if (prefixes & PREFIX_LOCK) {
8290                 goto illegal_op;
8291             }
8292             if (prefixes & PREFIX_DATA) {
8293                 /* clwb */
8294                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_CLWB)) {
8295                     goto illegal_op;
8296                 }
8297                 gen_nop_modrm(env, s, modrm);
8298             } else {
8299                 /* xsaveopt */
8300                 if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
8301                     || (s->cpuid_xsave_features & CPUID_XSAVE_XSAVEOPT) == 0
8302                     || (prefixes & (PREFIX_REPZ | PREFIX_REPNZ))) {
8303                     goto illegal_op;
8304                 }
8305                 gen_lea_modrm(env, s, modrm);
8306                 tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
8307                                       cpu_regs[R_EDX]);
8308                 gen_helper_xsaveopt(cpu_env, s->A0, s->tmp1_i64);
8309             }
8310             break;
8311 
8312         CASE_MODRM_MEM_OP(7): /* clflush / clflushopt */
8313             if (prefixes & PREFIX_LOCK) {
8314                 goto illegal_op;
8315             }
8316             if (prefixes & PREFIX_DATA) {
8317                 /* clflushopt */
8318                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_CLFLUSHOPT)) {
8319                     goto illegal_op;
8320                 }
8321             } else {
8322                 /* clflush */
8323                 if ((s->prefix & (PREFIX_REPZ | PREFIX_REPNZ))
8324                     || !(s->cpuid_features & CPUID_CLFLUSH)) {
8325                     goto illegal_op;
8326                 }
8327             }
8328             gen_nop_modrm(env, s, modrm);
8329             break;
8330 
8331         case 0xc0 ... 0xc7: /* rdfsbase (f3 0f ae /0) */
8332         case 0xc8 ... 0xcf: /* rdgsbase (f3 0f ae /1) */
8333         case 0xd0 ... 0xd7: /* wrfsbase (f3 0f ae /2) */
8334         case 0xd8 ... 0xdf: /* wrgsbase (f3 0f ae /3) */
8335             if (CODE64(s)
8336                 && (prefixes & PREFIX_REPZ)
8337                 && !(prefixes & PREFIX_LOCK)
8338                 && (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_FSGSBASE)) {
8339                 TCGv base, treg, src, dst;
8340 
8341                 /* Preserve hflags bits by testing CR4 at runtime.  */
8342                 tcg_gen_movi_i32(s->tmp2_i32, CR4_FSGSBASE_MASK);
8343                 gen_helper_cr4_testbit(cpu_env, s->tmp2_i32);
8344 
8345                 base = cpu_seg_base[modrm & 8 ? R_GS : R_FS];
8346                 treg = cpu_regs[(modrm & 7) | REX_B(s)];
8347 
8348                 if (modrm & 0x10) {
8349                     /* wr*base */
8350                     dst = base, src = treg;
8351                 } else {
8352                     /* rd*base */
8353                     dst = treg, src = base;
8354                 }
8355 
8356                 if (s->dflag == MO_32) {
8357                     tcg_gen_ext32u_tl(dst, src);
8358                 } else {
8359                     tcg_gen_mov_tl(dst, src);
8360                 }
8361                 break;
8362             }
8363             goto unknown_op;
8364 
8365         case 0xf8: /* sfence / pcommit */
8366             if (prefixes & PREFIX_DATA) {
8367                 /* pcommit */
8368                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_PCOMMIT)
8369                     || (prefixes & PREFIX_LOCK)) {
8370                     goto illegal_op;
8371                 }
8372                 break;
8373             }
8374             /* fallthru */
8375         case 0xf9 ... 0xff: /* sfence */
8376             if (!(s->cpuid_features & CPUID_SSE)
8377                 || (prefixes & PREFIX_LOCK)) {
8378                 goto illegal_op;
8379             }
8380             tcg_gen_mb(TCG_MO_ST_ST | TCG_BAR_SC);
8381             break;
8382         case 0xe8 ... 0xef: /* lfence */
8383             if (!(s->cpuid_features & CPUID_SSE)
8384                 || (prefixes & PREFIX_LOCK)) {
8385                 goto illegal_op;
8386             }
8387             tcg_gen_mb(TCG_MO_LD_LD | TCG_BAR_SC);
8388             break;
8389         case 0xf0 ... 0xf7: /* mfence */
8390             if (!(s->cpuid_features & CPUID_SSE2)
8391                 || (prefixes & PREFIX_LOCK)) {
8392                 goto illegal_op;
8393             }
8394             tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
8395             break;
8396 
8397         default:
8398             goto unknown_op;
8399         }
8400         break;
8401 
8402     case 0x10d: /* 3DNow! prefetch(w) */
8403         modrm = x86_ldub_code(env, s);
8404         mod = (modrm >> 6) & 3;
8405         if (mod == 3)
8406             goto illegal_op;
8407         gen_nop_modrm(env, s, modrm);
8408         break;
8409     case 0x1aa: /* rsm */
8410         gen_svm_check_intercept(s, SVM_EXIT_RSM);
8411         if (!(s->flags & HF_SMM_MASK))
8412             goto illegal_op;
8413 #ifdef CONFIG_USER_ONLY
8414         /* we should not be in SMM mode */
8415         g_assert_not_reached();
8416 #else
8417         gen_update_cc_op(s);
8418         gen_jmp_im(s, s->pc - s->cs_base);
8419         gen_helper_rsm(cpu_env);
8420 #endif /* CONFIG_USER_ONLY */
8421         gen_eob(s);
8422         break;
8423     case 0x1b8: /* SSE4.2 popcnt */
8424         if ((prefixes & (PREFIX_REPZ | PREFIX_LOCK | PREFIX_REPNZ)) !=
8425              PREFIX_REPZ)
8426             goto illegal_op;
8427         if (!(s->cpuid_ext_features & CPUID_EXT_POPCNT))
8428             goto illegal_op;
8429 
8430         modrm = x86_ldub_code(env, s);
8431         reg = ((modrm >> 3) & 7) | REX_R(s);
8432 
8433         if (s->prefix & PREFIX_DATA) {
8434             ot = MO_16;
8435         } else {
8436             ot = mo_64_32(dflag);
8437         }
8438 
8439         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
8440         gen_extu(ot, s->T0);
8441         tcg_gen_mov_tl(cpu_cc_src, s->T0);
8442         tcg_gen_ctpop_tl(s->T0, s->T0);
8443         gen_op_mov_reg_v(s, ot, reg, s->T0);
8444 
8445         set_cc_op(s, CC_OP_POPCNT);
8446         break;
8447     case 0x10e ... 0x10f:
8448         /* 3DNow! instructions, ignore prefixes */
8449         s->prefix &= ~(PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA);
8450         /* fall through */
8451     case 0x110 ... 0x117:
8452     case 0x128 ... 0x12f:
8453     case 0x138 ... 0x13a:
8454     case 0x150 ... 0x179:
8455     case 0x17c ... 0x17f:
8456     case 0x1c2:
8457     case 0x1c4 ... 0x1c6:
8458     case 0x1d0 ... 0x1fe:
8459         gen_sse(env, s, b, pc_start);
8460         break;
8461     default:
8462         goto unknown_op;
8463     }
8464     return s->pc;
8465  illegal_op:
8466     gen_illegal_opcode(s);
8467     return s->pc;
8468  unknown_op:
8469     gen_unknown_opcode(env, s);
8470     return s->pc;
8471 }
8472 
8473 void tcg_x86_init(void)
8474 {
8475     static const char reg_names[CPU_NB_REGS][4] = {
8476 #ifdef TARGET_X86_64
8477         [R_EAX] = "rax",
8478         [R_EBX] = "rbx",
8479         [R_ECX] = "rcx",
8480         [R_EDX] = "rdx",
8481         [R_ESI] = "rsi",
8482         [R_EDI] = "rdi",
8483         [R_EBP] = "rbp",
8484         [R_ESP] = "rsp",
8485         [8]  = "r8",
8486         [9]  = "r9",
8487         [10] = "r10",
8488         [11] = "r11",
8489         [12] = "r12",
8490         [13] = "r13",
8491         [14] = "r14",
8492         [15] = "r15",
8493 #else
8494         [R_EAX] = "eax",
8495         [R_EBX] = "ebx",
8496         [R_ECX] = "ecx",
8497         [R_EDX] = "edx",
8498         [R_ESI] = "esi",
8499         [R_EDI] = "edi",
8500         [R_EBP] = "ebp",
8501         [R_ESP] = "esp",
8502 #endif
8503     };
8504     static const char seg_base_names[6][8] = {
8505         [R_CS] = "cs_base",
8506         [R_DS] = "ds_base",
8507         [R_ES] = "es_base",
8508         [R_FS] = "fs_base",
8509         [R_GS] = "gs_base",
8510         [R_SS] = "ss_base",
8511     };
8512     static const char bnd_regl_names[4][8] = {
8513         "bnd0_lb", "bnd1_lb", "bnd2_lb", "bnd3_lb"
8514     };
8515     static const char bnd_regu_names[4][8] = {
8516         "bnd0_ub", "bnd1_ub", "bnd2_ub", "bnd3_ub"
8517     };
8518     int i;
8519 
8520     cpu_cc_op = tcg_global_mem_new_i32(cpu_env,
8521                                        offsetof(CPUX86State, cc_op), "cc_op");
8522     cpu_cc_dst = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_dst),
8523                                     "cc_dst");
8524     cpu_cc_src = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_src),
8525                                     "cc_src");
8526     cpu_cc_src2 = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_src2),
8527                                      "cc_src2");
8528 
8529     for (i = 0; i < CPU_NB_REGS; ++i) {
8530         cpu_regs[i] = tcg_global_mem_new(cpu_env,
8531                                          offsetof(CPUX86State, regs[i]),
8532                                          reg_names[i]);
8533     }
8534 
8535     for (i = 0; i < 6; ++i) {
8536         cpu_seg_base[i]
8537             = tcg_global_mem_new(cpu_env,
8538                                  offsetof(CPUX86State, segs[i].base),
8539                                  seg_base_names[i]);
8540     }
8541 
8542     for (i = 0; i < 4; ++i) {
8543         cpu_bndl[i]
8544             = tcg_global_mem_new_i64(cpu_env,
8545                                      offsetof(CPUX86State, bnd_regs[i].lb),
8546                                      bnd_regl_names[i]);
8547         cpu_bndu[i]
8548             = tcg_global_mem_new_i64(cpu_env,
8549                                      offsetof(CPUX86State, bnd_regs[i].ub),
8550                                      bnd_regu_names[i]);
8551     }
8552 }
8553 
8554 static void i386_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu)
8555 {
8556     DisasContext *dc = container_of(dcbase, DisasContext, base);
8557     CPUX86State *env = cpu->env_ptr;
8558     uint32_t flags = dc->base.tb->flags;
8559     int cpl = (flags >> HF_CPL_SHIFT) & 3;
8560     int iopl = (flags >> IOPL_SHIFT) & 3;
8561 
8562     dc->cs_base = dc->base.tb->cs_base;
8563     dc->flags = flags;
8564 #ifndef CONFIG_USER_ONLY
8565     dc->cpl = cpl;
8566     dc->iopl = iopl;
8567 #endif
8568 
8569     /* We make some simplifying assumptions; validate they're correct. */
8570     g_assert(PE(dc) == ((flags & HF_PE_MASK) != 0));
8571     g_assert(CPL(dc) == cpl);
8572     g_assert(IOPL(dc) == iopl);
8573     g_assert(VM86(dc) == ((flags & HF_VM_MASK) != 0));
8574     g_assert(CODE32(dc) == ((flags & HF_CS32_MASK) != 0));
8575     g_assert(CODE64(dc) == ((flags & HF_CS64_MASK) != 0));
8576     g_assert(SS32(dc) == ((flags & HF_SS32_MASK) != 0));
8577     g_assert(LMA(dc) == ((flags & HF_LMA_MASK) != 0));
8578     g_assert(ADDSEG(dc) == ((flags & HF_ADDSEG_MASK) != 0));
8579     g_assert(SVME(dc) == ((flags & HF_SVME_MASK) != 0));
8580     g_assert(GUEST(dc) == ((flags & HF_GUEST_MASK) != 0));
8581 
8582     dc->cc_op = CC_OP_DYNAMIC;
8583     dc->cc_op_dirty = false;
8584     dc->popl_esp_hack = 0;
8585     /* select memory access functions */
8586     dc->mem_index = 0;
8587 #ifdef CONFIG_SOFTMMU
8588     dc->mem_index = cpu_mmu_index(env, false);
8589 #endif
8590     dc->cpuid_features = env->features[FEAT_1_EDX];
8591     dc->cpuid_ext_features = env->features[FEAT_1_ECX];
8592     dc->cpuid_ext2_features = env->features[FEAT_8000_0001_EDX];
8593     dc->cpuid_ext3_features = env->features[FEAT_8000_0001_ECX];
8594     dc->cpuid_7_0_ebx_features = env->features[FEAT_7_0_EBX];
8595     dc->cpuid_xsave_features = env->features[FEAT_XSAVE];
8596     dc->jmp_opt = !(dc->base.singlestep_enabled ||
8597                     (flags & (HF_TF_MASK | HF_INHIBIT_IRQ_MASK)));
8598     /*
8599      * If jmp_opt, we want to handle each string instruction individually.
8600      * For icount also disable repz optimization so that each iteration
8601      * is accounted separately.
8602      */
8603     dc->repz_opt = !dc->jmp_opt && !(tb_cflags(dc->base.tb) & CF_USE_ICOUNT);
8604 
8605     dc->T0 = tcg_temp_new();
8606     dc->T1 = tcg_temp_new();
8607     dc->A0 = tcg_temp_new();
8608 
8609     dc->tmp0 = tcg_temp_new();
8610     dc->tmp1_i64 = tcg_temp_new_i64();
8611     dc->tmp2_i32 = tcg_temp_new_i32();
8612     dc->tmp3_i32 = tcg_temp_new_i32();
8613     dc->tmp4 = tcg_temp_new();
8614     dc->ptr0 = tcg_temp_new_ptr();
8615     dc->ptr1 = tcg_temp_new_ptr();
8616     dc->cc_srcT = tcg_temp_local_new();
8617 }
8618 
8619 static void i386_tr_tb_start(DisasContextBase *db, CPUState *cpu)
8620 {
8621 }
8622 
8623 static void i386_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
8624 {
8625     DisasContext *dc = container_of(dcbase, DisasContext, base);
8626 
8627     tcg_gen_insn_start(dc->base.pc_next, dc->cc_op);
8628 }
8629 
8630 static void i386_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
8631 {
8632     DisasContext *dc = container_of(dcbase, DisasContext, base);
8633     target_ulong pc_next;
8634 
8635 #ifdef TARGET_VSYSCALL_PAGE
8636     /*
8637      * Detect entry into the vsyscall page and invoke the syscall.
8638      */
8639     if ((dc->base.pc_next & TARGET_PAGE_MASK) == TARGET_VSYSCALL_PAGE) {
8640         gen_exception(dc, EXCP_VSYSCALL, dc->base.pc_next);
8641         dc->base.pc_next = dc->pc + 1;
8642         return;
8643     }
8644 #endif
8645 
8646     pc_next = disas_insn(dc, cpu);
8647 
8648     if (dc->flags & (HF_TF_MASK | HF_INHIBIT_IRQ_MASK)) {
8649         /* if single step mode, we generate only one instruction and
8650            generate an exception */
8651         /* if irq were inhibited with HF_INHIBIT_IRQ_MASK, we clear
8652            the flag and abort the translation to give the irqs a
8653            chance to happen */
8654         dc->base.is_jmp = DISAS_TOO_MANY;
8655     } else if ((tb_cflags(dc->base.tb) & CF_USE_ICOUNT)
8656                && ((pc_next & TARGET_PAGE_MASK)
8657                    != ((pc_next + TARGET_MAX_INSN_SIZE - 1)
8658                        & TARGET_PAGE_MASK)
8659                    || (pc_next & ~TARGET_PAGE_MASK) == 0)) {
8660         /* Do not cross the boundary of the pages in icount mode,
8661            it can cause an exception. Do it only when boundary is
8662            crossed by the first instruction in the block.
8663            If current instruction already crossed the bound - it's ok,
8664            because an exception hasn't stopped this code.
8665          */
8666         dc->base.is_jmp = DISAS_TOO_MANY;
8667     } else if ((pc_next - dc->base.pc_first) >= (TARGET_PAGE_SIZE - 32)) {
8668         dc->base.is_jmp = DISAS_TOO_MANY;
8669     }
8670 
8671     dc->base.pc_next = pc_next;
8672 }
8673 
8674 static void i386_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
8675 {
8676     DisasContext *dc = container_of(dcbase, DisasContext, base);
8677 
8678     if (dc->base.is_jmp == DISAS_TOO_MANY) {
8679         gen_jmp_im(dc, dc->base.pc_next - dc->cs_base);
8680         gen_eob(dc);
8681     }
8682 }
8683 
8684 static void i386_tr_disas_log(const DisasContextBase *dcbase,
8685                               CPUState *cpu)
8686 {
8687     DisasContext *dc = container_of(dcbase, DisasContext, base);
8688 
8689     qemu_log("IN: %s\n", lookup_symbol(dc->base.pc_first));
8690     log_target_disas(cpu, dc->base.pc_first, dc->base.tb->size);
8691 }
8692 
8693 static const TranslatorOps i386_tr_ops = {
8694     .init_disas_context = i386_tr_init_disas_context,
8695     .tb_start           = i386_tr_tb_start,
8696     .insn_start         = i386_tr_insn_start,
8697     .translate_insn     = i386_tr_translate_insn,
8698     .tb_stop            = i386_tr_tb_stop,
8699     .disas_log          = i386_tr_disas_log,
8700 };
8701 
8702 /* generate intermediate code for basic block 'tb'.  */
8703 void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns)
8704 {
8705     DisasContext dc;
8706 
8707     translator_loop(&i386_tr_ops, &dc.base, cpu, tb, max_insns);
8708 }
8709 
8710 void restore_state_to_opc(CPUX86State *env, TranslationBlock *tb,
8711                           target_ulong *data)
8712 {
8713     int cc_op = data[1];
8714     env->eip = data[0] - tb->cs_base;
8715     if (cc_op != CC_OP_DYNAMIC) {
8716         env->cc_op = cc_op;
8717     }
8718 }
8719