xref: /qemu/target/i386/tcg/translate.c (revision 785ea711)
1 /*
2  *  i386 translation
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 #include "qemu/osdep.h"
20 
21 #include "qemu/host-utils.h"
22 #include "cpu.h"
23 #include "disas/disas.h"
24 #include "exec/exec-all.h"
25 #include "tcg/tcg-op.h"
26 #include "exec/cpu_ldst.h"
27 #include "exec/translator.h"
28 
29 #include "exec/helper-proto.h"
30 #include "exec/helper-gen.h"
31 #include "helper-tcg.h"
32 
33 #include "exec/log.h"
34 
35 #define PREFIX_REPZ   0x01
36 #define PREFIX_REPNZ  0x02
37 #define PREFIX_LOCK   0x04
38 #define PREFIX_DATA   0x08
39 #define PREFIX_ADR    0x10
40 #define PREFIX_VEX    0x20
41 #define PREFIX_REX    0x40
42 
43 #ifdef TARGET_X86_64
44 # define ctztl  ctz64
45 # define clztl  clz64
46 #else
47 # define ctztl  ctz32
48 # define clztl  clz32
49 #endif
50 
51 /* For a switch indexed by MODRM, match all memory operands for a given OP.  */
52 #define CASE_MODRM_MEM_OP(OP) \
53     case (0 << 6) | (OP << 3) | 0 ... (0 << 6) | (OP << 3) | 7: \
54     case (1 << 6) | (OP << 3) | 0 ... (1 << 6) | (OP << 3) | 7: \
55     case (2 << 6) | (OP << 3) | 0 ... (2 << 6) | (OP << 3) | 7
56 
57 #define CASE_MODRM_OP(OP) \
58     case (0 << 6) | (OP << 3) | 0 ... (0 << 6) | (OP << 3) | 7: \
59     case (1 << 6) | (OP << 3) | 0 ... (1 << 6) | (OP << 3) | 7: \
60     case (2 << 6) | (OP << 3) | 0 ... (2 << 6) | (OP << 3) | 7: \
61     case (3 << 6) | (OP << 3) | 0 ... (3 << 6) | (OP << 3) | 7
62 
63 //#define MACRO_TEST   1
64 
65 /* global register indexes */
66 static TCGv cpu_cc_dst, cpu_cc_src, cpu_cc_src2;
67 static TCGv_i32 cpu_cc_op;
68 static TCGv cpu_regs[CPU_NB_REGS];
69 static TCGv cpu_seg_base[6];
70 static TCGv_i64 cpu_bndl[4];
71 static TCGv_i64 cpu_bndu[4];
72 
73 #include "exec/gen-icount.h"
74 
75 typedef struct DisasContext {
76     DisasContextBase base;
77 
78     target_ulong pc;       /* pc = eip + cs_base */
79     target_ulong pc_start; /* pc at TB entry */
80     target_ulong cs_base;  /* base of CS segment */
81 
82     MemOp aflag;
83     MemOp dflag;
84 
85     int8_t override; /* -1 if no override, else R_CS, R_DS, etc */
86     uint8_t prefix;
87 
88 #ifndef CONFIG_USER_ONLY
89     uint8_t cpl;   /* code priv level */
90     uint8_t iopl;  /* i/o priv level */
91 #endif
92     uint8_t vex_l;  /* vex vector length */
93     uint8_t vex_v;  /* vex vvvv register, without 1's complement.  */
94     uint8_t popl_esp_hack; /* for correct popl with esp base handling */
95     uint8_t rip_offset; /* only used in x86_64, but left for simplicity */
96 
97 #ifdef TARGET_X86_64
98     uint8_t rex_r;
99     uint8_t rex_x;
100     uint8_t rex_b;
101     bool rex_w;
102 #endif
103     bool jmp_opt; /* use direct block chaining for direct jumps */
104     bool repz_opt; /* optimize jumps within repz instructions */
105     bool cc_op_dirty;
106 
107     CCOp cc_op;  /* current CC operation */
108     int mem_index; /* select memory access functions */
109     uint32_t flags; /* all execution flags */
110     int cpuid_features;
111     int cpuid_ext_features;
112     int cpuid_ext2_features;
113     int cpuid_ext3_features;
114     int cpuid_7_0_ebx_features;
115     int cpuid_xsave_features;
116 
117     /* TCG local temps */
118     TCGv cc_srcT;
119     TCGv A0;
120     TCGv T0;
121     TCGv T1;
122 
123     /* TCG local register indexes (only used inside old micro ops) */
124     TCGv tmp0;
125     TCGv tmp4;
126     TCGv_ptr ptr0;
127     TCGv_ptr ptr1;
128     TCGv_i32 tmp2_i32;
129     TCGv_i32 tmp3_i32;
130     TCGv_i64 tmp1_i64;
131 
132     sigjmp_buf jmpbuf;
133 } DisasContext;
134 
135 /* The environment in which user-only runs is constrained. */
136 #ifdef CONFIG_USER_ONLY
137 #define PE(S)     true
138 #define CPL(S)    3
139 #define IOPL(S)   0
140 #define SVME(S)   false
141 #define GUEST(S)  false
142 #else
143 #define PE(S)     (((S)->flags & HF_PE_MASK) != 0)
144 #define CPL(S)    ((S)->cpl)
145 #define IOPL(S)   ((S)->iopl)
146 #define SVME(S)   (((S)->flags & HF_SVME_MASK) != 0)
147 #define GUEST(S)  (((S)->flags & HF_GUEST_MASK) != 0)
148 #endif
149 #if defined(CONFIG_USER_ONLY) && defined(TARGET_X86_64)
150 #define VM86(S)   false
151 #define CODE32(S) true
152 #define SS32(S)   true
153 #define ADDSEG(S) false
154 #else
155 #define VM86(S)   (((S)->flags & HF_VM_MASK) != 0)
156 #define CODE32(S) (((S)->flags & HF_CS32_MASK) != 0)
157 #define SS32(S)   (((S)->flags & HF_SS32_MASK) != 0)
158 #define ADDSEG(S) (((S)->flags & HF_ADDSEG_MASK) != 0)
159 #endif
160 #if !defined(TARGET_X86_64)
161 #define CODE64(S) false
162 #define LMA(S)    false
163 #elif defined(CONFIG_USER_ONLY)
164 #define CODE64(S) true
165 #define LMA(S)    true
166 #else
167 #define CODE64(S) (((S)->flags & HF_CS64_MASK) != 0)
168 #define LMA(S)    (((S)->flags & HF_LMA_MASK) != 0)
169 #endif
170 
171 #ifdef TARGET_X86_64
172 #define REX_PREFIX(S)  (((S)->prefix & PREFIX_REX) != 0)
173 #define REX_W(S)       ((S)->rex_w)
174 #define REX_R(S)       ((S)->rex_r + 0)
175 #define REX_X(S)       ((S)->rex_x + 0)
176 #define REX_B(S)       ((S)->rex_b + 0)
177 #else
178 #define REX_PREFIX(S)  false
179 #define REX_W(S)       false
180 #define REX_R(S)       0
181 #define REX_X(S)       0
182 #define REX_B(S)       0
183 #endif
184 
185 /*
186  * Many sysemu-only helpers are not reachable for user-only.
187  * Define stub generators here, so that we need not either sprinkle
188  * ifdefs through the translator, nor provide the helper function.
189  */
190 #define STUB_HELPER(NAME, ...) \
191     static inline void gen_helper_##NAME(__VA_ARGS__) \
192     { qemu_build_not_reached(); }
193 
194 #ifdef CONFIG_USER_ONLY
195 STUB_HELPER(clgi, TCGv_env env)
196 STUB_HELPER(flush_page, TCGv_env env, TCGv addr)
197 STUB_HELPER(hlt, TCGv_env env, TCGv_i32 pc_ofs)
198 STUB_HELPER(inb, TCGv ret, TCGv_env env, TCGv_i32 port)
199 STUB_HELPER(inw, TCGv ret, TCGv_env env, TCGv_i32 port)
200 STUB_HELPER(inl, TCGv ret, TCGv_env env, TCGv_i32 port)
201 STUB_HELPER(monitor, TCGv_env env, TCGv addr)
202 STUB_HELPER(mwait, TCGv_env env, TCGv_i32 pc_ofs)
203 STUB_HELPER(outb, TCGv_env env, TCGv_i32 port, TCGv_i32 val)
204 STUB_HELPER(outw, TCGv_env env, TCGv_i32 port, TCGv_i32 val)
205 STUB_HELPER(outl, TCGv_env env, TCGv_i32 port, TCGv_i32 val)
206 STUB_HELPER(rdmsr, TCGv_env env)
207 STUB_HELPER(read_crN, TCGv ret, TCGv_env env, TCGv_i32 reg)
208 STUB_HELPER(get_dr, TCGv ret, TCGv_env env, TCGv_i32 reg)
209 STUB_HELPER(set_dr, TCGv_env env, TCGv_i32 reg, TCGv val)
210 STUB_HELPER(stgi, TCGv_env env)
211 STUB_HELPER(svm_check_intercept, TCGv_env env, TCGv_i32 type)
212 STUB_HELPER(vmload, TCGv_env env, TCGv_i32 aflag)
213 STUB_HELPER(vmmcall, TCGv_env env)
214 STUB_HELPER(vmrun, TCGv_env env, TCGv_i32 aflag, TCGv_i32 pc_ofs)
215 STUB_HELPER(vmsave, TCGv_env env, TCGv_i32 aflag)
216 STUB_HELPER(write_crN, TCGv_env env, TCGv_i32 reg, TCGv val)
217 STUB_HELPER(wrmsr, TCGv_env env)
218 #endif
219 
220 static void gen_eob(DisasContext *s);
221 static void gen_jr(DisasContext *s, TCGv dest);
222 static void gen_jmp(DisasContext *s, target_ulong eip);
223 static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num);
224 static void gen_op(DisasContext *s1, int op, MemOp ot, int d);
225 static void gen_exception_gpf(DisasContext *s);
226 
227 /* i386 arith/logic operations */
228 enum {
229     OP_ADDL,
230     OP_ORL,
231     OP_ADCL,
232     OP_SBBL,
233     OP_ANDL,
234     OP_SUBL,
235     OP_XORL,
236     OP_CMPL,
237 };
238 
239 /* i386 shift ops */
240 enum {
241     OP_ROL,
242     OP_ROR,
243     OP_RCL,
244     OP_RCR,
245     OP_SHL,
246     OP_SHR,
247     OP_SHL1, /* undocumented */
248     OP_SAR = 7,
249 };
250 
251 enum {
252     JCC_O,
253     JCC_B,
254     JCC_Z,
255     JCC_BE,
256     JCC_S,
257     JCC_P,
258     JCC_L,
259     JCC_LE,
260 };
261 
262 enum {
263     /* I386 int registers */
264     OR_EAX,   /* MUST be even numbered */
265     OR_ECX,
266     OR_EDX,
267     OR_EBX,
268     OR_ESP,
269     OR_EBP,
270     OR_ESI,
271     OR_EDI,
272 
273     OR_TMP0 = 16,    /* temporary operand register */
274     OR_TMP1,
275     OR_A0, /* temporary register used when doing address evaluation */
276 };
277 
278 enum {
279     USES_CC_DST  = 1,
280     USES_CC_SRC  = 2,
281     USES_CC_SRC2 = 4,
282     USES_CC_SRCT = 8,
283 };
284 
285 /* Bit set if the global variable is live after setting CC_OP to X.  */
286 static const uint8_t cc_op_live[CC_OP_NB] = {
287     [CC_OP_DYNAMIC] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
288     [CC_OP_EFLAGS] = USES_CC_SRC,
289     [CC_OP_MULB ... CC_OP_MULQ] = USES_CC_DST | USES_CC_SRC,
290     [CC_OP_ADDB ... CC_OP_ADDQ] = USES_CC_DST | USES_CC_SRC,
291     [CC_OP_ADCB ... CC_OP_ADCQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
292     [CC_OP_SUBB ... CC_OP_SUBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRCT,
293     [CC_OP_SBBB ... CC_OP_SBBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
294     [CC_OP_LOGICB ... CC_OP_LOGICQ] = USES_CC_DST,
295     [CC_OP_INCB ... CC_OP_INCQ] = USES_CC_DST | USES_CC_SRC,
296     [CC_OP_DECB ... CC_OP_DECQ] = USES_CC_DST | USES_CC_SRC,
297     [CC_OP_SHLB ... CC_OP_SHLQ] = USES_CC_DST | USES_CC_SRC,
298     [CC_OP_SARB ... CC_OP_SARQ] = USES_CC_DST | USES_CC_SRC,
299     [CC_OP_BMILGB ... CC_OP_BMILGQ] = USES_CC_DST | USES_CC_SRC,
300     [CC_OP_ADCX] = USES_CC_DST | USES_CC_SRC,
301     [CC_OP_ADOX] = USES_CC_SRC | USES_CC_SRC2,
302     [CC_OP_ADCOX] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
303     [CC_OP_CLR] = 0,
304     [CC_OP_POPCNT] = USES_CC_SRC,
305 };
306 
307 static void set_cc_op(DisasContext *s, CCOp op)
308 {
309     int dead;
310 
311     if (s->cc_op == op) {
312         return;
313     }
314 
315     /* Discard CC computation that will no longer be used.  */
316     dead = cc_op_live[s->cc_op] & ~cc_op_live[op];
317     if (dead & USES_CC_DST) {
318         tcg_gen_discard_tl(cpu_cc_dst);
319     }
320     if (dead & USES_CC_SRC) {
321         tcg_gen_discard_tl(cpu_cc_src);
322     }
323     if (dead & USES_CC_SRC2) {
324         tcg_gen_discard_tl(cpu_cc_src2);
325     }
326     if (dead & USES_CC_SRCT) {
327         tcg_gen_discard_tl(s->cc_srcT);
328     }
329 
330     if (op == CC_OP_DYNAMIC) {
331         /* The DYNAMIC setting is translator only, and should never be
332            stored.  Thus we always consider it clean.  */
333         s->cc_op_dirty = false;
334     } else {
335         /* Discard any computed CC_OP value (see shifts).  */
336         if (s->cc_op == CC_OP_DYNAMIC) {
337             tcg_gen_discard_i32(cpu_cc_op);
338         }
339         s->cc_op_dirty = true;
340     }
341     s->cc_op = op;
342 }
343 
344 static void gen_update_cc_op(DisasContext *s)
345 {
346     if (s->cc_op_dirty) {
347         tcg_gen_movi_i32(cpu_cc_op, s->cc_op);
348         s->cc_op_dirty = false;
349     }
350 }
351 
352 #ifdef TARGET_X86_64
353 
354 #define NB_OP_SIZES 4
355 
356 #else /* !TARGET_X86_64 */
357 
358 #define NB_OP_SIZES 3
359 
360 #endif /* !TARGET_X86_64 */
361 
362 #if defined(HOST_WORDS_BIGENDIAN)
363 #define REG_B_OFFSET (sizeof(target_ulong) - 1)
364 #define REG_H_OFFSET (sizeof(target_ulong) - 2)
365 #define REG_W_OFFSET (sizeof(target_ulong) - 2)
366 #define REG_L_OFFSET (sizeof(target_ulong) - 4)
367 #define REG_LH_OFFSET (sizeof(target_ulong) - 8)
368 #else
369 #define REG_B_OFFSET 0
370 #define REG_H_OFFSET 1
371 #define REG_W_OFFSET 0
372 #define REG_L_OFFSET 0
373 #define REG_LH_OFFSET 4
374 #endif
375 
376 /* In instruction encodings for byte register accesses the
377  * register number usually indicates "low 8 bits of register N";
378  * however there are some special cases where N 4..7 indicates
379  * [AH, CH, DH, BH], ie "bits 15..8 of register N-4". Return
380  * true for this special case, false otherwise.
381  */
382 static inline bool byte_reg_is_xH(DisasContext *s, int reg)
383 {
384     /* Any time the REX prefix is present, byte registers are uniform */
385     if (reg < 4 || REX_PREFIX(s)) {
386         return false;
387     }
388     return true;
389 }
390 
391 /* Select the size of a push/pop operation.  */
392 static inline MemOp mo_pushpop(DisasContext *s, MemOp ot)
393 {
394     if (CODE64(s)) {
395         return ot == MO_16 ? MO_16 : MO_64;
396     } else {
397         return ot;
398     }
399 }
400 
401 /* Select the size of the stack pointer.  */
402 static inline MemOp mo_stacksize(DisasContext *s)
403 {
404     return CODE64(s) ? MO_64 : SS32(s) ? MO_32 : MO_16;
405 }
406 
407 /* Select only size 64 else 32.  Used for SSE operand sizes.  */
408 static inline MemOp mo_64_32(MemOp ot)
409 {
410 #ifdef TARGET_X86_64
411     return ot == MO_64 ? MO_64 : MO_32;
412 #else
413     return MO_32;
414 #endif
415 }
416 
417 /* Select size 8 if lsb of B is clear, else OT.  Used for decoding
418    byte vs word opcodes.  */
419 static inline MemOp mo_b_d(int b, MemOp ot)
420 {
421     return b & 1 ? ot : MO_8;
422 }
423 
424 /* Select size 8 if lsb of B is clear, else OT capped at 32.
425    Used for decoding operand size of port opcodes.  */
426 static inline MemOp mo_b_d32(int b, MemOp ot)
427 {
428     return b & 1 ? (ot == MO_16 ? MO_16 : MO_32) : MO_8;
429 }
430 
431 static void gen_op_mov_reg_v(DisasContext *s, MemOp ot, int reg, TCGv t0)
432 {
433     switch(ot) {
434     case MO_8:
435         if (!byte_reg_is_xH(s, reg)) {
436             tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 8);
437         } else {
438             tcg_gen_deposit_tl(cpu_regs[reg - 4], cpu_regs[reg - 4], t0, 8, 8);
439         }
440         break;
441     case MO_16:
442         tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 16);
443         break;
444     case MO_32:
445         /* For x86_64, this sets the higher half of register to zero.
446            For i386, this is equivalent to a mov. */
447         tcg_gen_ext32u_tl(cpu_regs[reg], t0);
448         break;
449 #ifdef TARGET_X86_64
450     case MO_64:
451         tcg_gen_mov_tl(cpu_regs[reg], t0);
452         break;
453 #endif
454     default:
455         tcg_abort();
456     }
457 }
458 
459 static inline
460 void gen_op_mov_v_reg(DisasContext *s, MemOp ot, TCGv t0, int reg)
461 {
462     if (ot == MO_8 && byte_reg_is_xH(s, reg)) {
463         tcg_gen_extract_tl(t0, cpu_regs[reg - 4], 8, 8);
464     } else {
465         tcg_gen_mov_tl(t0, cpu_regs[reg]);
466     }
467 }
468 
469 static void gen_add_A0_im(DisasContext *s, int val)
470 {
471     tcg_gen_addi_tl(s->A0, s->A0, val);
472     if (!CODE64(s)) {
473         tcg_gen_ext32u_tl(s->A0, s->A0);
474     }
475 }
476 
477 static inline void gen_op_jmp_v(TCGv dest)
478 {
479     tcg_gen_st_tl(dest, cpu_env, offsetof(CPUX86State, eip));
480 }
481 
482 static inline
483 void gen_op_add_reg_im(DisasContext *s, MemOp size, int reg, int32_t val)
484 {
485     tcg_gen_addi_tl(s->tmp0, cpu_regs[reg], val);
486     gen_op_mov_reg_v(s, size, reg, s->tmp0);
487 }
488 
489 static inline void gen_op_add_reg_T0(DisasContext *s, MemOp size, int reg)
490 {
491     tcg_gen_add_tl(s->tmp0, cpu_regs[reg], s->T0);
492     gen_op_mov_reg_v(s, size, reg, s->tmp0);
493 }
494 
495 static inline void gen_op_ld_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
496 {
497     tcg_gen_qemu_ld_tl(t0, a0, s->mem_index, idx | MO_LE);
498 }
499 
500 static inline void gen_op_st_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
501 {
502     tcg_gen_qemu_st_tl(t0, a0, s->mem_index, idx | MO_LE);
503 }
504 
505 static inline void gen_op_st_rm_T0_A0(DisasContext *s, int idx, int d)
506 {
507     if (d == OR_TMP0) {
508         gen_op_st_v(s, idx, s->T0, s->A0);
509     } else {
510         gen_op_mov_reg_v(s, idx, d, s->T0);
511     }
512 }
513 
514 static inline void gen_jmp_im(DisasContext *s, target_ulong pc)
515 {
516     tcg_gen_movi_tl(s->tmp0, pc);
517     gen_op_jmp_v(s->tmp0);
518 }
519 
520 /* Compute SEG:REG into A0.  SEG is selected from the override segment
521    (OVR_SEG) and the default segment (DEF_SEG).  OVR_SEG may be -1 to
522    indicate no override.  */
523 static void gen_lea_v_seg(DisasContext *s, MemOp aflag, TCGv a0,
524                           int def_seg, int ovr_seg)
525 {
526     switch (aflag) {
527 #ifdef TARGET_X86_64
528     case MO_64:
529         if (ovr_seg < 0) {
530             tcg_gen_mov_tl(s->A0, a0);
531             return;
532         }
533         break;
534 #endif
535     case MO_32:
536         /* 32 bit address */
537         if (ovr_seg < 0 && ADDSEG(s)) {
538             ovr_seg = def_seg;
539         }
540         if (ovr_seg < 0) {
541             tcg_gen_ext32u_tl(s->A0, a0);
542             return;
543         }
544         break;
545     case MO_16:
546         /* 16 bit address */
547         tcg_gen_ext16u_tl(s->A0, a0);
548         a0 = s->A0;
549         if (ovr_seg < 0) {
550             if (ADDSEG(s)) {
551                 ovr_seg = def_seg;
552             } else {
553                 return;
554             }
555         }
556         break;
557     default:
558         tcg_abort();
559     }
560 
561     if (ovr_seg >= 0) {
562         TCGv seg = cpu_seg_base[ovr_seg];
563 
564         if (aflag == MO_64) {
565             tcg_gen_add_tl(s->A0, a0, seg);
566         } else if (CODE64(s)) {
567             tcg_gen_ext32u_tl(s->A0, a0);
568             tcg_gen_add_tl(s->A0, s->A0, seg);
569         } else {
570             tcg_gen_add_tl(s->A0, a0, seg);
571             tcg_gen_ext32u_tl(s->A0, s->A0);
572         }
573     }
574 }
575 
576 static inline void gen_string_movl_A0_ESI(DisasContext *s)
577 {
578     gen_lea_v_seg(s, s->aflag, cpu_regs[R_ESI], R_DS, s->override);
579 }
580 
581 static inline void gen_string_movl_A0_EDI(DisasContext *s)
582 {
583     gen_lea_v_seg(s, s->aflag, cpu_regs[R_EDI], R_ES, -1);
584 }
585 
586 static inline void gen_op_movl_T0_Dshift(DisasContext *s, MemOp ot)
587 {
588     tcg_gen_ld32s_tl(s->T0, cpu_env, offsetof(CPUX86State, df));
589     tcg_gen_shli_tl(s->T0, s->T0, ot);
590 };
591 
592 static TCGv gen_ext_tl(TCGv dst, TCGv src, MemOp size, bool sign)
593 {
594     switch (size) {
595     case MO_8:
596         if (sign) {
597             tcg_gen_ext8s_tl(dst, src);
598         } else {
599             tcg_gen_ext8u_tl(dst, src);
600         }
601         return dst;
602     case MO_16:
603         if (sign) {
604             tcg_gen_ext16s_tl(dst, src);
605         } else {
606             tcg_gen_ext16u_tl(dst, src);
607         }
608         return dst;
609 #ifdef TARGET_X86_64
610     case MO_32:
611         if (sign) {
612             tcg_gen_ext32s_tl(dst, src);
613         } else {
614             tcg_gen_ext32u_tl(dst, src);
615         }
616         return dst;
617 #endif
618     default:
619         return src;
620     }
621 }
622 
623 static void gen_extu(MemOp ot, TCGv reg)
624 {
625     gen_ext_tl(reg, reg, ot, false);
626 }
627 
628 static void gen_exts(MemOp ot, TCGv reg)
629 {
630     gen_ext_tl(reg, reg, ot, true);
631 }
632 
633 static inline
634 void gen_op_jnz_ecx(DisasContext *s, MemOp size, TCGLabel *label1)
635 {
636     tcg_gen_mov_tl(s->tmp0, cpu_regs[R_ECX]);
637     gen_extu(size, s->tmp0);
638     tcg_gen_brcondi_tl(TCG_COND_NE, s->tmp0, 0, label1);
639 }
640 
641 static inline
642 void gen_op_jz_ecx(DisasContext *s, MemOp size, TCGLabel *label1)
643 {
644     tcg_gen_mov_tl(s->tmp0, cpu_regs[R_ECX]);
645     gen_extu(size, s->tmp0);
646     tcg_gen_brcondi_tl(TCG_COND_EQ, s->tmp0, 0, label1);
647 }
648 
649 static void gen_helper_in_func(MemOp ot, TCGv v, TCGv_i32 n)
650 {
651     switch (ot) {
652     case MO_8:
653         gen_helper_inb(v, cpu_env, n);
654         break;
655     case MO_16:
656         gen_helper_inw(v, cpu_env, n);
657         break;
658     case MO_32:
659         gen_helper_inl(v, cpu_env, n);
660         break;
661     default:
662         tcg_abort();
663     }
664 }
665 
666 static void gen_helper_out_func(MemOp ot, TCGv_i32 v, TCGv_i32 n)
667 {
668     switch (ot) {
669     case MO_8:
670         gen_helper_outb(cpu_env, v, n);
671         break;
672     case MO_16:
673         gen_helper_outw(cpu_env, v, n);
674         break;
675     case MO_32:
676         gen_helper_outl(cpu_env, v, n);
677         break;
678     default:
679         tcg_abort();
680     }
681 }
682 
683 /*
684  * Validate that access to [port, port + 1<<ot) is allowed.
685  * Raise #GP, or VMM exit if not.
686  */
687 static bool gen_check_io(DisasContext *s, MemOp ot, TCGv_i32 port,
688                          uint32_t svm_flags)
689 {
690 #ifdef CONFIG_USER_ONLY
691     /*
692      * We do not implement the ioperm(2) syscall, so the TSS check
693      * will always fail.
694      */
695     gen_exception_gpf(s);
696     return false;
697 #else
698     if (PE(s) && (CPL(s) > IOPL(s) || VM86(s))) {
699         gen_helper_check_io(cpu_env, port, tcg_constant_i32(1 << ot));
700     }
701     if (GUEST(s)) {
702         target_ulong cur_eip = s->base.pc_next - s->cs_base;
703         target_ulong next_eip = s->pc - s->cs_base;
704 
705         gen_update_cc_op(s);
706         gen_jmp_im(s, cur_eip);
707         if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
708             svm_flags |= SVM_IOIO_REP_MASK;
709         }
710         svm_flags |= 1 << (SVM_IOIO_SIZE_SHIFT + ot);
711         gen_helper_svm_check_io(cpu_env, port,
712                                 tcg_constant_i32(svm_flags),
713                                 tcg_constant_i32(next_eip - cur_eip));
714     }
715     return true;
716 #endif
717 }
718 
719 static inline void gen_movs(DisasContext *s, MemOp ot)
720 {
721     gen_string_movl_A0_ESI(s);
722     gen_op_ld_v(s, ot, s->T0, s->A0);
723     gen_string_movl_A0_EDI(s);
724     gen_op_st_v(s, ot, s->T0, s->A0);
725     gen_op_movl_T0_Dshift(s, ot);
726     gen_op_add_reg_T0(s, s->aflag, R_ESI);
727     gen_op_add_reg_T0(s, s->aflag, R_EDI);
728 }
729 
730 static void gen_op_update1_cc(DisasContext *s)
731 {
732     tcg_gen_mov_tl(cpu_cc_dst, s->T0);
733 }
734 
735 static void gen_op_update2_cc(DisasContext *s)
736 {
737     tcg_gen_mov_tl(cpu_cc_src, s->T1);
738     tcg_gen_mov_tl(cpu_cc_dst, s->T0);
739 }
740 
741 static void gen_op_update3_cc(DisasContext *s, TCGv reg)
742 {
743     tcg_gen_mov_tl(cpu_cc_src2, reg);
744     tcg_gen_mov_tl(cpu_cc_src, s->T1);
745     tcg_gen_mov_tl(cpu_cc_dst, s->T0);
746 }
747 
748 static inline void gen_op_testl_T0_T1_cc(DisasContext *s)
749 {
750     tcg_gen_and_tl(cpu_cc_dst, s->T0, s->T1);
751 }
752 
753 static void gen_op_update_neg_cc(DisasContext *s)
754 {
755     tcg_gen_mov_tl(cpu_cc_dst, s->T0);
756     tcg_gen_neg_tl(cpu_cc_src, s->T0);
757     tcg_gen_movi_tl(s->cc_srcT, 0);
758 }
759 
760 /* compute all eflags to cc_src */
761 static void gen_compute_eflags(DisasContext *s)
762 {
763     TCGv zero, dst, src1, src2;
764     int live, dead;
765 
766     if (s->cc_op == CC_OP_EFLAGS) {
767         return;
768     }
769     if (s->cc_op == CC_OP_CLR) {
770         tcg_gen_movi_tl(cpu_cc_src, CC_Z | CC_P);
771         set_cc_op(s, CC_OP_EFLAGS);
772         return;
773     }
774 
775     zero = NULL;
776     dst = cpu_cc_dst;
777     src1 = cpu_cc_src;
778     src2 = cpu_cc_src2;
779 
780     /* Take care to not read values that are not live.  */
781     live = cc_op_live[s->cc_op] & ~USES_CC_SRCT;
782     dead = live ^ (USES_CC_DST | USES_CC_SRC | USES_CC_SRC2);
783     if (dead) {
784         zero = tcg_const_tl(0);
785         if (dead & USES_CC_DST) {
786             dst = zero;
787         }
788         if (dead & USES_CC_SRC) {
789             src1 = zero;
790         }
791         if (dead & USES_CC_SRC2) {
792             src2 = zero;
793         }
794     }
795 
796     gen_update_cc_op(s);
797     gen_helper_cc_compute_all(cpu_cc_src, dst, src1, src2, cpu_cc_op);
798     set_cc_op(s, CC_OP_EFLAGS);
799 
800     if (dead) {
801         tcg_temp_free(zero);
802     }
803 }
804 
805 typedef struct CCPrepare {
806     TCGCond cond;
807     TCGv reg;
808     TCGv reg2;
809     target_ulong imm;
810     target_ulong mask;
811     bool use_reg2;
812     bool no_setcond;
813 } CCPrepare;
814 
815 /* compute eflags.C to reg */
816 static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
817 {
818     TCGv t0, t1;
819     int size, shift;
820 
821     switch (s->cc_op) {
822     case CC_OP_SUBB ... CC_OP_SUBQ:
823         /* (DATA_TYPE)CC_SRCT < (DATA_TYPE)CC_SRC */
824         size = s->cc_op - CC_OP_SUBB;
825         t1 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
826         /* If no temporary was used, be careful not to alias t1 and t0.  */
827         t0 = t1 == cpu_cc_src ? s->tmp0 : reg;
828         tcg_gen_mov_tl(t0, s->cc_srcT);
829         gen_extu(size, t0);
830         goto add_sub;
831 
832     case CC_OP_ADDB ... CC_OP_ADDQ:
833         /* (DATA_TYPE)CC_DST < (DATA_TYPE)CC_SRC */
834         size = s->cc_op - CC_OP_ADDB;
835         t1 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
836         t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
837     add_sub:
838         return (CCPrepare) { .cond = TCG_COND_LTU, .reg = t0,
839                              .reg2 = t1, .mask = -1, .use_reg2 = true };
840 
841     case CC_OP_LOGICB ... CC_OP_LOGICQ:
842     case CC_OP_CLR:
843     case CC_OP_POPCNT:
844         return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
845 
846     case CC_OP_INCB ... CC_OP_INCQ:
847     case CC_OP_DECB ... CC_OP_DECQ:
848         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
849                              .mask = -1, .no_setcond = true };
850 
851     case CC_OP_SHLB ... CC_OP_SHLQ:
852         /* (CC_SRC >> (DATA_BITS - 1)) & 1 */
853         size = s->cc_op - CC_OP_SHLB;
854         shift = (8 << size) - 1;
855         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
856                              .mask = (target_ulong)1 << shift };
857 
858     case CC_OP_MULB ... CC_OP_MULQ:
859         return (CCPrepare) { .cond = TCG_COND_NE,
860                              .reg = cpu_cc_src, .mask = -1 };
861 
862     case CC_OP_BMILGB ... CC_OP_BMILGQ:
863         size = s->cc_op - CC_OP_BMILGB;
864         t0 = gen_ext_tl(reg, cpu_cc_src, size, false);
865         return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
866 
867     case CC_OP_ADCX:
868     case CC_OP_ADCOX:
869         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_dst,
870                              .mask = -1, .no_setcond = true };
871 
872     case CC_OP_EFLAGS:
873     case CC_OP_SARB ... CC_OP_SARQ:
874         /* CC_SRC & 1 */
875         return (CCPrepare) { .cond = TCG_COND_NE,
876                              .reg = cpu_cc_src, .mask = CC_C };
877 
878     default:
879        /* The need to compute only C from CC_OP_DYNAMIC is important
880           in efficiently implementing e.g. INC at the start of a TB.  */
881        gen_update_cc_op(s);
882        gen_helper_cc_compute_c(reg, cpu_cc_dst, cpu_cc_src,
883                                cpu_cc_src2, cpu_cc_op);
884        return (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
885                             .mask = -1, .no_setcond = true };
886     }
887 }
888 
889 /* compute eflags.P to reg */
890 static CCPrepare gen_prepare_eflags_p(DisasContext *s, TCGv reg)
891 {
892     gen_compute_eflags(s);
893     return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
894                          .mask = CC_P };
895 }
896 
897 /* compute eflags.S to reg */
898 static CCPrepare gen_prepare_eflags_s(DisasContext *s, TCGv reg)
899 {
900     switch (s->cc_op) {
901     case CC_OP_DYNAMIC:
902         gen_compute_eflags(s);
903         /* FALLTHRU */
904     case CC_OP_EFLAGS:
905     case CC_OP_ADCX:
906     case CC_OP_ADOX:
907     case CC_OP_ADCOX:
908         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
909                              .mask = CC_S };
910     case CC_OP_CLR:
911     case CC_OP_POPCNT:
912         return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
913     default:
914         {
915             MemOp size = (s->cc_op - CC_OP_ADDB) & 3;
916             TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, true);
917             return (CCPrepare) { .cond = TCG_COND_LT, .reg = t0, .mask = -1 };
918         }
919     }
920 }
921 
922 /* compute eflags.O to reg */
923 static CCPrepare gen_prepare_eflags_o(DisasContext *s, TCGv reg)
924 {
925     switch (s->cc_op) {
926     case CC_OP_ADOX:
927     case CC_OP_ADCOX:
928         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src2,
929                              .mask = -1, .no_setcond = true };
930     case CC_OP_CLR:
931     case CC_OP_POPCNT:
932         return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
933     default:
934         gen_compute_eflags(s);
935         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
936                              .mask = CC_O };
937     }
938 }
939 
940 /* compute eflags.Z to reg */
941 static CCPrepare gen_prepare_eflags_z(DisasContext *s, TCGv reg)
942 {
943     switch (s->cc_op) {
944     case CC_OP_DYNAMIC:
945         gen_compute_eflags(s);
946         /* FALLTHRU */
947     case CC_OP_EFLAGS:
948     case CC_OP_ADCX:
949     case CC_OP_ADOX:
950     case CC_OP_ADCOX:
951         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
952                              .mask = CC_Z };
953     case CC_OP_CLR:
954         return (CCPrepare) { .cond = TCG_COND_ALWAYS, .mask = -1 };
955     case CC_OP_POPCNT:
956         return (CCPrepare) { .cond = TCG_COND_EQ, .reg = cpu_cc_src,
957                              .mask = -1 };
958     default:
959         {
960             MemOp size = (s->cc_op - CC_OP_ADDB) & 3;
961             TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
962             return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
963         }
964     }
965 }
966 
967 /* perform a conditional store into register 'reg' according to jump opcode
968    value 'b'. In the fast case, T0 is guaranted not to be used. */
969 static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
970 {
971     int inv, jcc_op, cond;
972     MemOp size;
973     CCPrepare cc;
974     TCGv t0;
975 
976     inv = b & 1;
977     jcc_op = (b >> 1) & 7;
978 
979     switch (s->cc_op) {
980     case CC_OP_SUBB ... CC_OP_SUBQ:
981         /* We optimize relational operators for the cmp/jcc case.  */
982         size = s->cc_op - CC_OP_SUBB;
983         switch (jcc_op) {
984         case JCC_BE:
985             tcg_gen_mov_tl(s->tmp4, s->cc_srcT);
986             gen_extu(size, s->tmp4);
987             t0 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
988             cc = (CCPrepare) { .cond = TCG_COND_LEU, .reg = s->tmp4,
989                                .reg2 = t0, .mask = -1, .use_reg2 = true };
990             break;
991 
992         case JCC_L:
993             cond = TCG_COND_LT;
994             goto fast_jcc_l;
995         case JCC_LE:
996             cond = TCG_COND_LE;
997         fast_jcc_l:
998             tcg_gen_mov_tl(s->tmp4, s->cc_srcT);
999             gen_exts(size, s->tmp4);
1000             t0 = gen_ext_tl(s->tmp0, cpu_cc_src, size, true);
1001             cc = (CCPrepare) { .cond = cond, .reg = s->tmp4,
1002                                .reg2 = t0, .mask = -1, .use_reg2 = true };
1003             break;
1004 
1005         default:
1006             goto slow_jcc;
1007         }
1008         break;
1009 
1010     default:
1011     slow_jcc:
1012         /* This actually generates good code for JC, JZ and JS.  */
1013         switch (jcc_op) {
1014         case JCC_O:
1015             cc = gen_prepare_eflags_o(s, reg);
1016             break;
1017         case JCC_B:
1018             cc = gen_prepare_eflags_c(s, reg);
1019             break;
1020         case JCC_Z:
1021             cc = gen_prepare_eflags_z(s, reg);
1022             break;
1023         case JCC_BE:
1024             gen_compute_eflags(s);
1025             cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
1026                                .mask = CC_Z | CC_C };
1027             break;
1028         case JCC_S:
1029             cc = gen_prepare_eflags_s(s, reg);
1030             break;
1031         case JCC_P:
1032             cc = gen_prepare_eflags_p(s, reg);
1033             break;
1034         case JCC_L:
1035             gen_compute_eflags(s);
1036             if (reg == cpu_cc_src) {
1037                 reg = s->tmp0;
1038             }
1039             tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
1040             tcg_gen_xor_tl(reg, reg, cpu_cc_src);
1041             cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
1042                                .mask = CC_S };
1043             break;
1044         default:
1045         case JCC_LE:
1046             gen_compute_eflags(s);
1047             if (reg == cpu_cc_src) {
1048                 reg = s->tmp0;
1049             }
1050             tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
1051             tcg_gen_xor_tl(reg, reg, cpu_cc_src);
1052             cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
1053                                .mask = CC_S | CC_Z };
1054             break;
1055         }
1056         break;
1057     }
1058 
1059     if (inv) {
1060         cc.cond = tcg_invert_cond(cc.cond);
1061     }
1062     return cc;
1063 }
1064 
1065 static void gen_setcc1(DisasContext *s, int b, TCGv reg)
1066 {
1067     CCPrepare cc = gen_prepare_cc(s, b, reg);
1068 
1069     if (cc.no_setcond) {
1070         if (cc.cond == TCG_COND_EQ) {
1071             tcg_gen_xori_tl(reg, cc.reg, 1);
1072         } else {
1073             tcg_gen_mov_tl(reg, cc.reg);
1074         }
1075         return;
1076     }
1077 
1078     if (cc.cond == TCG_COND_NE && !cc.use_reg2 && cc.imm == 0 &&
1079         cc.mask != 0 && (cc.mask & (cc.mask - 1)) == 0) {
1080         tcg_gen_shri_tl(reg, cc.reg, ctztl(cc.mask));
1081         tcg_gen_andi_tl(reg, reg, 1);
1082         return;
1083     }
1084     if (cc.mask != -1) {
1085         tcg_gen_andi_tl(reg, cc.reg, cc.mask);
1086         cc.reg = reg;
1087     }
1088     if (cc.use_reg2) {
1089         tcg_gen_setcond_tl(cc.cond, reg, cc.reg, cc.reg2);
1090     } else {
1091         tcg_gen_setcondi_tl(cc.cond, reg, cc.reg, cc.imm);
1092     }
1093 }
1094 
1095 static inline void gen_compute_eflags_c(DisasContext *s, TCGv reg)
1096 {
1097     gen_setcc1(s, JCC_B << 1, reg);
1098 }
1099 
1100 /* generate a conditional jump to label 'l1' according to jump opcode
1101    value 'b'. In the fast case, T0 is guaranted not to be used. */
1102 static inline void gen_jcc1_noeob(DisasContext *s, int b, TCGLabel *l1)
1103 {
1104     CCPrepare cc = gen_prepare_cc(s, b, s->T0);
1105 
1106     if (cc.mask != -1) {
1107         tcg_gen_andi_tl(s->T0, cc.reg, cc.mask);
1108         cc.reg = s->T0;
1109     }
1110     if (cc.use_reg2) {
1111         tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
1112     } else {
1113         tcg_gen_brcondi_tl(cc.cond, cc.reg, cc.imm, l1);
1114     }
1115 }
1116 
1117 /* Generate a conditional jump to label 'l1' according to jump opcode
1118    value 'b'. In the fast case, T0 is guaranted not to be used.
1119    A translation block must end soon.  */
1120 static inline void gen_jcc1(DisasContext *s, int b, TCGLabel *l1)
1121 {
1122     CCPrepare cc = gen_prepare_cc(s, b, s->T0);
1123 
1124     gen_update_cc_op(s);
1125     if (cc.mask != -1) {
1126         tcg_gen_andi_tl(s->T0, cc.reg, cc.mask);
1127         cc.reg = s->T0;
1128     }
1129     set_cc_op(s, CC_OP_DYNAMIC);
1130     if (cc.use_reg2) {
1131         tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
1132     } else {
1133         tcg_gen_brcondi_tl(cc.cond, cc.reg, cc.imm, l1);
1134     }
1135 }
1136 
1137 /* XXX: does not work with gdbstub "ice" single step - not a
1138    serious problem */
1139 static TCGLabel *gen_jz_ecx_string(DisasContext *s, target_ulong next_eip)
1140 {
1141     TCGLabel *l1 = gen_new_label();
1142     TCGLabel *l2 = gen_new_label();
1143     gen_op_jnz_ecx(s, s->aflag, l1);
1144     gen_set_label(l2);
1145     gen_jmp_tb(s, next_eip, 1);
1146     gen_set_label(l1);
1147     return l2;
1148 }
1149 
1150 static inline void gen_stos(DisasContext *s, MemOp ot)
1151 {
1152     gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
1153     gen_string_movl_A0_EDI(s);
1154     gen_op_st_v(s, ot, s->T0, s->A0);
1155     gen_op_movl_T0_Dshift(s, ot);
1156     gen_op_add_reg_T0(s, s->aflag, R_EDI);
1157 }
1158 
1159 static inline void gen_lods(DisasContext *s, MemOp ot)
1160 {
1161     gen_string_movl_A0_ESI(s);
1162     gen_op_ld_v(s, ot, s->T0, s->A0);
1163     gen_op_mov_reg_v(s, ot, R_EAX, s->T0);
1164     gen_op_movl_T0_Dshift(s, ot);
1165     gen_op_add_reg_T0(s, s->aflag, R_ESI);
1166 }
1167 
1168 static inline void gen_scas(DisasContext *s, MemOp ot)
1169 {
1170     gen_string_movl_A0_EDI(s);
1171     gen_op_ld_v(s, ot, s->T1, s->A0);
1172     gen_op(s, OP_CMPL, ot, R_EAX);
1173     gen_op_movl_T0_Dshift(s, ot);
1174     gen_op_add_reg_T0(s, s->aflag, R_EDI);
1175 }
1176 
1177 static inline void gen_cmps(DisasContext *s, MemOp ot)
1178 {
1179     gen_string_movl_A0_EDI(s);
1180     gen_op_ld_v(s, ot, s->T1, s->A0);
1181     gen_string_movl_A0_ESI(s);
1182     gen_op(s, OP_CMPL, ot, OR_TMP0);
1183     gen_op_movl_T0_Dshift(s, ot);
1184     gen_op_add_reg_T0(s, s->aflag, R_ESI);
1185     gen_op_add_reg_T0(s, s->aflag, R_EDI);
1186 }
1187 
1188 static void gen_bpt_io(DisasContext *s, TCGv_i32 t_port, int ot)
1189 {
1190     if (s->flags & HF_IOBPT_MASK) {
1191 #ifdef CONFIG_USER_ONLY
1192         /* user-mode cpu should not be in IOBPT mode */
1193         g_assert_not_reached();
1194 #else
1195         TCGv_i32 t_size = tcg_const_i32(1 << ot);
1196         TCGv t_next = tcg_const_tl(s->pc - s->cs_base);
1197 
1198         gen_helper_bpt_io(cpu_env, t_port, t_size, t_next);
1199         tcg_temp_free_i32(t_size);
1200         tcg_temp_free(t_next);
1201 #endif /* CONFIG_USER_ONLY */
1202     }
1203 }
1204 
1205 static inline void gen_ins(DisasContext *s, MemOp ot)
1206 {
1207     gen_string_movl_A0_EDI(s);
1208     /* Note: we must do this dummy write first to be restartable in
1209        case of page fault. */
1210     tcg_gen_movi_tl(s->T0, 0);
1211     gen_op_st_v(s, ot, s->T0, s->A0);
1212     tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
1213     tcg_gen_andi_i32(s->tmp2_i32, s->tmp2_i32, 0xffff);
1214     gen_helper_in_func(ot, s->T0, s->tmp2_i32);
1215     gen_op_st_v(s, ot, s->T0, s->A0);
1216     gen_op_movl_T0_Dshift(s, ot);
1217     gen_op_add_reg_T0(s, s->aflag, R_EDI);
1218     gen_bpt_io(s, s->tmp2_i32, ot);
1219 }
1220 
1221 static inline void gen_outs(DisasContext *s, MemOp ot)
1222 {
1223     gen_string_movl_A0_ESI(s);
1224     gen_op_ld_v(s, ot, s->T0, s->A0);
1225 
1226     tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
1227     tcg_gen_andi_i32(s->tmp2_i32, s->tmp2_i32, 0xffff);
1228     tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T0);
1229     gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32);
1230     gen_op_movl_T0_Dshift(s, ot);
1231     gen_op_add_reg_T0(s, s->aflag, R_ESI);
1232     gen_bpt_io(s, s->tmp2_i32, ot);
1233 }
1234 
1235 /* same method as Valgrind : we generate jumps to current or next
1236    instruction */
1237 #define GEN_REPZ(op)                                                          \
1238 static inline void gen_repz_ ## op(DisasContext *s, MemOp ot,              \
1239                                  target_ulong cur_eip, target_ulong next_eip) \
1240 {                                                                             \
1241     TCGLabel *l2;                                                             \
1242     gen_update_cc_op(s);                                                      \
1243     l2 = gen_jz_ecx_string(s, next_eip);                                      \
1244     gen_ ## op(s, ot);                                                        \
1245     gen_op_add_reg_im(s, s->aflag, R_ECX, -1);                                \
1246     /* a loop would cause two single step exceptions if ECX = 1               \
1247        before rep string_insn */                                              \
1248     if (s->repz_opt)                                                          \
1249         gen_op_jz_ecx(s, s->aflag, l2);                                       \
1250     gen_jmp(s, cur_eip);                                                      \
1251 }
1252 
1253 #define GEN_REPZ2(op)                                                         \
1254 static inline void gen_repz_ ## op(DisasContext *s, MemOp ot,              \
1255                                    target_ulong cur_eip,                      \
1256                                    target_ulong next_eip,                     \
1257                                    int nz)                                    \
1258 {                                                                             \
1259     TCGLabel *l2;                                                             \
1260     gen_update_cc_op(s);                                                      \
1261     l2 = gen_jz_ecx_string(s, next_eip);                                      \
1262     gen_ ## op(s, ot);                                                        \
1263     gen_op_add_reg_im(s, s->aflag, R_ECX, -1);                                \
1264     gen_update_cc_op(s);                                                      \
1265     gen_jcc1(s, (JCC_Z << 1) | (nz ^ 1), l2);                                 \
1266     if (s->repz_opt)                                                          \
1267         gen_op_jz_ecx(s, s->aflag, l2);                                       \
1268     gen_jmp(s, cur_eip);                                                      \
1269 }
1270 
1271 GEN_REPZ(movs)
1272 GEN_REPZ(stos)
1273 GEN_REPZ(lods)
1274 GEN_REPZ(ins)
1275 GEN_REPZ(outs)
1276 GEN_REPZ2(scas)
1277 GEN_REPZ2(cmps)
1278 
1279 static void gen_helper_fp_arith_ST0_FT0(int op)
1280 {
1281     switch (op) {
1282     case 0:
1283         gen_helper_fadd_ST0_FT0(cpu_env);
1284         break;
1285     case 1:
1286         gen_helper_fmul_ST0_FT0(cpu_env);
1287         break;
1288     case 2:
1289         gen_helper_fcom_ST0_FT0(cpu_env);
1290         break;
1291     case 3:
1292         gen_helper_fcom_ST0_FT0(cpu_env);
1293         break;
1294     case 4:
1295         gen_helper_fsub_ST0_FT0(cpu_env);
1296         break;
1297     case 5:
1298         gen_helper_fsubr_ST0_FT0(cpu_env);
1299         break;
1300     case 6:
1301         gen_helper_fdiv_ST0_FT0(cpu_env);
1302         break;
1303     case 7:
1304         gen_helper_fdivr_ST0_FT0(cpu_env);
1305         break;
1306     }
1307 }
1308 
1309 /* NOTE the exception in "r" op ordering */
1310 static void gen_helper_fp_arith_STN_ST0(int op, int opreg)
1311 {
1312     TCGv_i32 tmp = tcg_const_i32(opreg);
1313     switch (op) {
1314     case 0:
1315         gen_helper_fadd_STN_ST0(cpu_env, tmp);
1316         break;
1317     case 1:
1318         gen_helper_fmul_STN_ST0(cpu_env, tmp);
1319         break;
1320     case 4:
1321         gen_helper_fsubr_STN_ST0(cpu_env, tmp);
1322         break;
1323     case 5:
1324         gen_helper_fsub_STN_ST0(cpu_env, tmp);
1325         break;
1326     case 6:
1327         gen_helper_fdivr_STN_ST0(cpu_env, tmp);
1328         break;
1329     case 7:
1330         gen_helper_fdiv_STN_ST0(cpu_env, tmp);
1331         break;
1332     }
1333 }
1334 
1335 static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip)
1336 {
1337     gen_update_cc_op(s);
1338     gen_jmp_im(s, cur_eip);
1339     gen_helper_raise_exception(cpu_env, tcg_const_i32(trapno));
1340     s->base.is_jmp = DISAS_NORETURN;
1341 }
1342 
1343 /* Generate #UD for the current instruction.  The assumption here is that
1344    the instruction is known, but it isn't allowed in the current cpu mode.  */
1345 static void gen_illegal_opcode(DisasContext *s)
1346 {
1347     gen_exception(s, EXCP06_ILLOP, s->pc_start - s->cs_base);
1348 }
1349 
1350 /* Generate #GP for the current instruction. */
1351 static void gen_exception_gpf(DisasContext *s)
1352 {
1353     gen_exception(s, EXCP0D_GPF, s->pc_start - s->cs_base);
1354 }
1355 
1356 /* Check for cpl == 0; if not, raise #GP and return false. */
1357 static bool check_cpl0(DisasContext *s)
1358 {
1359     if (CPL(s) == 0) {
1360         return true;
1361     }
1362     gen_exception_gpf(s);
1363     return false;
1364 }
1365 
1366 /* If vm86, check for iopl == 3; if not, raise #GP and return false. */
1367 static bool check_vm86_iopl(DisasContext *s)
1368 {
1369     if (!VM86(s) || IOPL(s) == 3) {
1370         return true;
1371     }
1372     gen_exception_gpf(s);
1373     return false;
1374 }
1375 
1376 /* Check for iopl allowing access; if not, raise #GP and return false. */
1377 static bool check_iopl(DisasContext *s)
1378 {
1379     if (VM86(s) ? IOPL(s) == 3 : CPL(s) <= IOPL(s)) {
1380         return true;
1381     }
1382     gen_exception_gpf(s);
1383     return false;
1384 }
1385 
1386 /* if d == OR_TMP0, it means memory operand (address in A0) */
1387 static void gen_op(DisasContext *s1, int op, MemOp ot, int d)
1388 {
1389     if (d != OR_TMP0) {
1390         if (s1->prefix & PREFIX_LOCK) {
1391             /* Lock prefix when destination is not memory.  */
1392             gen_illegal_opcode(s1);
1393             return;
1394         }
1395         gen_op_mov_v_reg(s1, ot, s1->T0, d);
1396     } else if (!(s1->prefix & PREFIX_LOCK)) {
1397         gen_op_ld_v(s1, ot, s1->T0, s1->A0);
1398     }
1399     switch(op) {
1400     case OP_ADCL:
1401         gen_compute_eflags_c(s1, s1->tmp4);
1402         if (s1->prefix & PREFIX_LOCK) {
1403             tcg_gen_add_tl(s1->T0, s1->tmp4, s1->T1);
1404             tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
1405                                         s1->mem_index, ot | MO_LE);
1406         } else {
1407             tcg_gen_add_tl(s1->T0, s1->T0, s1->T1);
1408             tcg_gen_add_tl(s1->T0, s1->T0, s1->tmp4);
1409             gen_op_st_rm_T0_A0(s1, ot, d);
1410         }
1411         gen_op_update3_cc(s1, s1->tmp4);
1412         set_cc_op(s1, CC_OP_ADCB + ot);
1413         break;
1414     case OP_SBBL:
1415         gen_compute_eflags_c(s1, s1->tmp4);
1416         if (s1->prefix & PREFIX_LOCK) {
1417             tcg_gen_add_tl(s1->T0, s1->T1, s1->tmp4);
1418             tcg_gen_neg_tl(s1->T0, s1->T0);
1419             tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
1420                                         s1->mem_index, ot | MO_LE);
1421         } else {
1422             tcg_gen_sub_tl(s1->T0, s1->T0, s1->T1);
1423             tcg_gen_sub_tl(s1->T0, s1->T0, s1->tmp4);
1424             gen_op_st_rm_T0_A0(s1, ot, d);
1425         }
1426         gen_op_update3_cc(s1, s1->tmp4);
1427         set_cc_op(s1, CC_OP_SBBB + ot);
1428         break;
1429     case OP_ADDL:
1430         if (s1->prefix & PREFIX_LOCK) {
1431             tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T1,
1432                                         s1->mem_index, ot | MO_LE);
1433         } else {
1434             tcg_gen_add_tl(s1->T0, s1->T0, s1->T1);
1435             gen_op_st_rm_T0_A0(s1, ot, d);
1436         }
1437         gen_op_update2_cc(s1);
1438         set_cc_op(s1, CC_OP_ADDB + ot);
1439         break;
1440     case OP_SUBL:
1441         if (s1->prefix & PREFIX_LOCK) {
1442             tcg_gen_neg_tl(s1->T0, s1->T1);
1443             tcg_gen_atomic_fetch_add_tl(s1->cc_srcT, s1->A0, s1->T0,
1444                                         s1->mem_index, ot | MO_LE);
1445             tcg_gen_sub_tl(s1->T0, s1->cc_srcT, s1->T1);
1446         } else {
1447             tcg_gen_mov_tl(s1->cc_srcT, s1->T0);
1448             tcg_gen_sub_tl(s1->T0, s1->T0, s1->T1);
1449             gen_op_st_rm_T0_A0(s1, ot, d);
1450         }
1451         gen_op_update2_cc(s1);
1452         set_cc_op(s1, CC_OP_SUBB + ot);
1453         break;
1454     default:
1455     case OP_ANDL:
1456         if (s1->prefix & PREFIX_LOCK) {
1457             tcg_gen_atomic_and_fetch_tl(s1->T0, s1->A0, s1->T1,
1458                                         s1->mem_index, ot | MO_LE);
1459         } else {
1460             tcg_gen_and_tl(s1->T0, s1->T0, s1->T1);
1461             gen_op_st_rm_T0_A0(s1, ot, d);
1462         }
1463         gen_op_update1_cc(s1);
1464         set_cc_op(s1, CC_OP_LOGICB + ot);
1465         break;
1466     case OP_ORL:
1467         if (s1->prefix & PREFIX_LOCK) {
1468             tcg_gen_atomic_or_fetch_tl(s1->T0, s1->A0, s1->T1,
1469                                        s1->mem_index, ot | MO_LE);
1470         } else {
1471             tcg_gen_or_tl(s1->T0, s1->T0, s1->T1);
1472             gen_op_st_rm_T0_A0(s1, ot, d);
1473         }
1474         gen_op_update1_cc(s1);
1475         set_cc_op(s1, CC_OP_LOGICB + ot);
1476         break;
1477     case OP_XORL:
1478         if (s1->prefix & PREFIX_LOCK) {
1479             tcg_gen_atomic_xor_fetch_tl(s1->T0, s1->A0, s1->T1,
1480                                         s1->mem_index, ot | MO_LE);
1481         } else {
1482             tcg_gen_xor_tl(s1->T0, s1->T0, s1->T1);
1483             gen_op_st_rm_T0_A0(s1, ot, d);
1484         }
1485         gen_op_update1_cc(s1);
1486         set_cc_op(s1, CC_OP_LOGICB + ot);
1487         break;
1488     case OP_CMPL:
1489         tcg_gen_mov_tl(cpu_cc_src, s1->T1);
1490         tcg_gen_mov_tl(s1->cc_srcT, s1->T0);
1491         tcg_gen_sub_tl(cpu_cc_dst, s1->T0, s1->T1);
1492         set_cc_op(s1, CC_OP_SUBB + ot);
1493         break;
1494     }
1495 }
1496 
1497 /* if d == OR_TMP0, it means memory operand (address in A0) */
1498 static void gen_inc(DisasContext *s1, MemOp ot, int d, int c)
1499 {
1500     if (s1->prefix & PREFIX_LOCK) {
1501         if (d != OR_TMP0) {
1502             /* Lock prefix when destination is not memory */
1503             gen_illegal_opcode(s1);
1504             return;
1505         }
1506         tcg_gen_movi_tl(s1->T0, c > 0 ? 1 : -1);
1507         tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
1508                                     s1->mem_index, ot | MO_LE);
1509     } else {
1510         if (d != OR_TMP0) {
1511             gen_op_mov_v_reg(s1, ot, s1->T0, d);
1512         } else {
1513             gen_op_ld_v(s1, ot, s1->T0, s1->A0);
1514         }
1515         tcg_gen_addi_tl(s1->T0, s1->T0, (c > 0 ? 1 : -1));
1516         gen_op_st_rm_T0_A0(s1, ot, d);
1517     }
1518 
1519     gen_compute_eflags_c(s1, cpu_cc_src);
1520     tcg_gen_mov_tl(cpu_cc_dst, s1->T0);
1521     set_cc_op(s1, (c > 0 ? CC_OP_INCB : CC_OP_DECB) + ot);
1522 }
1523 
1524 static void gen_shift_flags(DisasContext *s, MemOp ot, TCGv result,
1525                             TCGv shm1, TCGv count, bool is_right)
1526 {
1527     TCGv_i32 z32, s32, oldop;
1528     TCGv z_tl;
1529 
1530     /* Store the results into the CC variables.  If we know that the
1531        variable must be dead, store unconditionally.  Otherwise we'll
1532        need to not disrupt the current contents.  */
1533     z_tl = tcg_const_tl(0);
1534     if (cc_op_live[s->cc_op] & USES_CC_DST) {
1535         tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_dst, count, z_tl,
1536                            result, cpu_cc_dst);
1537     } else {
1538         tcg_gen_mov_tl(cpu_cc_dst, result);
1539     }
1540     if (cc_op_live[s->cc_op] & USES_CC_SRC) {
1541         tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_src, count, z_tl,
1542                            shm1, cpu_cc_src);
1543     } else {
1544         tcg_gen_mov_tl(cpu_cc_src, shm1);
1545     }
1546     tcg_temp_free(z_tl);
1547 
1548     /* Get the two potential CC_OP values into temporaries.  */
1549     tcg_gen_movi_i32(s->tmp2_i32, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
1550     if (s->cc_op == CC_OP_DYNAMIC) {
1551         oldop = cpu_cc_op;
1552     } else {
1553         tcg_gen_movi_i32(s->tmp3_i32, s->cc_op);
1554         oldop = s->tmp3_i32;
1555     }
1556 
1557     /* Conditionally store the CC_OP value.  */
1558     z32 = tcg_const_i32(0);
1559     s32 = tcg_temp_new_i32();
1560     tcg_gen_trunc_tl_i32(s32, count);
1561     tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, s32, z32, s->tmp2_i32, oldop);
1562     tcg_temp_free_i32(z32);
1563     tcg_temp_free_i32(s32);
1564 
1565     /* The CC_OP value is no longer predictable.  */
1566     set_cc_op(s, CC_OP_DYNAMIC);
1567 }
1568 
1569 static void gen_shift_rm_T1(DisasContext *s, MemOp ot, int op1,
1570                             int is_right, int is_arith)
1571 {
1572     target_ulong mask = (ot == MO_64 ? 0x3f : 0x1f);
1573 
1574     /* load */
1575     if (op1 == OR_TMP0) {
1576         gen_op_ld_v(s, ot, s->T0, s->A0);
1577     } else {
1578         gen_op_mov_v_reg(s, ot, s->T0, op1);
1579     }
1580 
1581     tcg_gen_andi_tl(s->T1, s->T1, mask);
1582     tcg_gen_subi_tl(s->tmp0, s->T1, 1);
1583 
1584     if (is_right) {
1585         if (is_arith) {
1586             gen_exts(ot, s->T0);
1587             tcg_gen_sar_tl(s->tmp0, s->T0, s->tmp0);
1588             tcg_gen_sar_tl(s->T0, s->T0, s->T1);
1589         } else {
1590             gen_extu(ot, s->T0);
1591             tcg_gen_shr_tl(s->tmp0, s->T0, s->tmp0);
1592             tcg_gen_shr_tl(s->T0, s->T0, s->T1);
1593         }
1594     } else {
1595         tcg_gen_shl_tl(s->tmp0, s->T0, s->tmp0);
1596         tcg_gen_shl_tl(s->T0, s->T0, s->T1);
1597     }
1598 
1599     /* store */
1600     gen_op_st_rm_T0_A0(s, ot, op1);
1601 
1602     gen_shift_flags(s, ot, s->T0, s->tmp0, s->T1, is_right);
1603 }
1604 
1605 static void gen_shift_rm_im(DisasContext *s, MemOp ot, int op1, int op2,
1606                             int is_right, int is_arith)
1607 {
1608     int mask = (ot == MO_64 ? 0x3f : 0x1f);
1609 
1610     /* load */
1611     if (op1 == OR_TMP0)
1612         gen_op_ld_v(s, ot, s->T0, s->A0);
1613     else
1614         gen_op_mov_v_reg(s, ot, s->T0, op1);
1615 
1616     op2 &= mask;
1617     if (op2 != 0) {
1618         if (is_right) {
1619             if (is_arith) {
1620                 gen_exts(ot, s->T0);
1621                 tcg_gen_sari_tl(s->tmp4, s->T0, op2 - 1);
1622                 tcg_gen_sari_tl(s->T0, s->T0, op2);
1623             } else {
1624                 gen_extu(ot, s->T0);
1625                 tcg_gen_shri_tl(s->tmp4, s->T0, op2 - 1);
1626                 tcg_gen_shri_tl(s->T0, s->T0, op2);
1627             }
1628         } else {
1629             tcg_gen_shli_tl(s->tmp4, s->T0, op2 - 1);
1630             tcg_gen_shli_tl(s->T0, s->T0, op2);
1631         }
1632     }
1633 
1634     /* store */
1635     gen_op_st_rm_T0_A0(s, ot, op1);
1636 
1637     /* update eflags if non zero shift */
1638     if (op2 != 0) {
1639         tcg_gen_mov_tl(cpu_cc_src, s->tmp4);
1640         tcg_gen_mov_tl(cpu_cc_dst, s->T0);
1641         set_cc_op(s, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
1642     }
1643 }
1644 
1645 static void gen_rot_rm_T1(DisasContext *s, MemOp ot, int op1, int is_right)
1646 {
1647     target_ulong mask = (ot == MO_64 ? 0x3f : 0x1f);
1648     TCGv_i32 t0, t1;
1649 
1650     /* load */
1651     if (op1 == OR_TMP0) {
1652         gen_op_ld_v(s, ot, s->T0, s->A0);
1653     } else {
1654         gen_op_mov_v_reg(s, ot, s->T0, op1);
1655     }
1656 
1657     tcg_gen_andi_tl(s->T1, s->T1, mask);
1658 
1659     switch (ot) {
1660     case MO_8:
1661         /* Replicate the 8-bit input so that a 32-bit rotate works.  */
1662         tcg_gen_ext8u_tl(s->T0, s->T0);
1663         tcg_gen_muli_tl(s->T0, s->T0, 0x01010101);
1664         goto do_long;
1665     case MO_16:
1666         /* Replicate the 16-bit input so that a 32-bit rotate works.  */
1667         tcg_gen_deposit_tl(s->T0, s->T0, s->T0, 16, 16);
1668         goto do_long;
1669     do_long:
1670 #ifdef TARGET_X86_64
1671     case MO_32:
1672         tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
1673         tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
1674         if (is_right) {
1675             tcg_gen_rotr_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
1676         } else {
1677             tcg_gen_rotl_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
1678         }
1679         tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
1680         break;
1681 #endif
1682     default:
1683         if (is_right) {
1684             tcg_gen_rotr_tl(s->T0, s->T0, s->T1);
1685         } else {
1686             tcg_gen_rotl_tl(s->T0, s->T0, s->T1);
1687         }
1688         break;
1689     }
1690 
1691     /* store */
1692     gen_op_st_rm_T0_A0(s, ot, op1);
1693 
1694     /* We'll need the flags computed into CC_SRC.  */
1695     gen_compute_eflags(s);
1696 
1697     /* The value that was "rotated out" is now present at the other end
1698        of the word.  Compute C into CC_DST and O into CC_SRC2.  Note that
1699        since we've computed the flags into CC_SRC, these variables are
1700        currently dead.  */
1701     if (is_right) {
1702         tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask - 1);
1703         tcg_gen_shri_tl(cpu_cc_dst, s->T0, mask);
1704         tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
1705     } else {
1706         tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask);
1707         tcg_gen_andi_tl(cpu_cc_dst, s->T0, 1);
1708     }
1709     tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
1710     tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
1711 
1712     /* Now conditionally store the new CC_OP value.  If the shift count
1713        is 0 we keep the CC_OP_EFLAGS setting so that only CC_SRC is live.
1714        Otherwise reuse CC_OP_ADCOX which have the C and O flags split out
1715        exactly as we computed above.  */
1716     t0 = tcg_const_i32(0);
1717     t1 = tcg_temp_new_i32();
1718     tcg_gen_trunc_tl_i32(t1, s->T1);
1719     tcg_gen_movi_i32(s->tmp2_i32, CC_OP_ADCOX);
1720     tcg_gen_movi_i32(s->tmp3_i32, CC_OP_EFLAGS);
1721     tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, t1, t0,
1722                         s->tmp2_i32, s->tmp3_i32);
1723     tcg_temp_free_i32(t0);
1724     tcg_temp_free_i32(t1);
1725 
1726     /* The CC_OP value is no longer predictable.  */
1727     set_cc_op(s, CC_OP_DYNAMIC);
1728 }
1729 
1730 static void gen_rot_rm_im(DisasContext *s, MemOp ot, int op1, int op2,
1731                           int is_right)
1732 {
1733     int mask = (ot == MO_64 ? 0x3f : 0x1f);
1734     int shift;
1735 
1736     /* load */
1737     if (op1 == OR_TMP0) {
1738         gen_op_ld_v(s, ot, s->T0, s->A0);
1739     } else {
1740         gen_op_mov_v_reg(s, ot, s->T0, op1);
1741     }
1742 
1743     op2 &= mask;
1744     if (op2 != 0) {
1745         switch (ot) {
1746 #ifdef TARGET_X86_64
1747         case MO_32:
1748             tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
1749             if (is_right) {
1750                 tcg_gen_rotri_i32(s->tmp2_i32, s->tmp2_i32, op2);
1751             } else {
1752                 tcg_gen_rotli_i32(s->tmp2_i32, s->tmp2_i32, op2);
1753             }
1754             tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
1755             break;
1756 #endif
1757         default:
1758             if (is_right) {
1759                 tcg_gen_rotri_tl(s->T0, s->T0, op2);
1760             } else {
1761                 tcg_gen_rotli_tl(s->T0, s->T0, op2);
1762             }
1763             break;
1764         case MO_8:
1765             mask = 7;
1766             goto do_shifts;
1767         case MO_16:
1768             mask = 15;
1769         do_shifts:
1770             shift = op2 & mask;
1771             if (is_right) {
1772                 shift = mask + 1 - shift;
1773             }
1774             gen_extu(ot, s->T0);
1775             tcg_gen_shli_tl(s->tmp0, s->T0, shift);
1776             tcg_gen_shri_tl(s->T0, s->T0, mask + 1 - shift);
1777             tcg_gen_or_tl(s->T0, s->T0, s->tmp0);
1778             break;
1779         }
1780     }
1781 
1782     /* store */
1783     gen_op_st_rm_T0_A0(s, ot, op1);
1784 
1785     if (op2 != 0) {
1786         /* Compute the flags into CC_SRC.  */
1787         gen_compute_eflags(s);
1788 
1789         /* The value that was "rotated out" is now present at the other end
1790            of the word.  Compute C into CC_DST and O into CC_SRC2.  Note that
1791            since we've computed the flags into CC_SRC, these variables are
1792            currently dead.  */
1793         if (is_right) {
1794             tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask - 1);
1795             tcg_gen_shri_tl(cpu_cc_dst, s->T0, mask);
1796             tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
1797         } else {
1798             tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask);
1799             tcg_gen_andi_tl(cpu_cc_dst, s->T0, 1);
1800         }
1801         tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
1802         tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
1803         set_cc_op(s, CC_OP_ADCOX);
1804     }
1805 }
1806 
1807 /* XXX: add faster immediate = 1 case */
1808 static void gen_rotc_rm_T1(DisasContext *s, MemOp ot, int op1,
1809                            int is_right)
1810 {
1811     gen_compute_eflags(s);
1812     assert(s->cc_op == CC_OP_EFLAGS);
1813 
1814     /* load */
1815     if (op1 == OR_TMP0)
1816         gen_op_ld_v(s, ot, s->T0, s->A0);
1817     else
1818         gen_op_mov_v_reg(s, ot, s->T0, op1);
1819 
1820     if (is_right) {
1821         switch (ot) {
1822         case MO_8:
1823             gen_helper_rcrb(s->T0, cpu_env, s->T0, s->T1);
1824             break;
1825         case MO_16:
1826             gen_helper_rcrw(s->T0, cpu_env, s->T0, s->T1);
1827             break;
1828         case MO_32:
1829             gen_helper_rcrl(s->T0, cpu_env, s->T0, s->T1);
1830             break;
1831 #ifdef TARGET_X86_64
1832         case MO_64:
1833             gen_helper_rcrq(s->T0, cpu_env, s->T0, s->T1);
1834             break;
1835 #endif
1836         default:
1837             tcg_abort();
1838         }
1839     } else {
1840         switch (ot) {
1841         case MO_8:
1842             gen_helper_rclb(s->T0, cpu_env, s->T0, s->T1);
1843             break;
1844         case MO_16:
1845             gen_helper_rclw(s->T0, cpu_env, s->T0, s->T1);
1846             break;
1847         case MO_32:
1848             gen_helper_rcll(s->T0, cpu_env, s->T0, s->T1);
1849             break;
1850 #ifdef TARGET_X86_64
1851         case MO_64:
1852             gen_helper_rclq(s->T0, cpu_env, s->T0, s->T1);
1853             break;
1854 #endif
1855         default:
1856             tcg_abort();
1857         }
1858     }
1859     /* store */
1860     gen_op_st_rm_T0_A0(s, ot, op1);
1861 }
1862 
1863 /* XXX: add faster immediate case */
1864 static void gen_shiftd_rm_T1(DisasContext *s, MemOp ot, int op1,
1865                              bool is_right, TCGv count_in)
1866 {
1867     target_ulong mask = (ot == MO_64 ? 63 : 31);
1868     TCGv count;
1869 
1870     /* load */
1871     if (op1 == OR_TMP0) {
1872         gen_op_ld_v(s, ot, s->T0, s->A0);
1873     } else {
1874         gen_op_mov_v_reg(s, ot, s->T0, op1);
1875     }
1876 
1877     count = tcg_temp_new();
1878     tcg_gen_andi_tl(count, count_in, mask);
1879 
1880     switch (ot) {
1881     case MO_16:
1882         /* Note: we implement the Intel behaviour for shift count > 16.
1883            This means "shrdw C, B, A" shifts A:B:A >> C.  Build the B:A
1884            portion by constructing it as a 32-bit value.  */
1885         if (is_right) {
1886             tcg_gen_deposit_tl(s->tmp0, s->T0, s->T1, 16, 16);
1887             tcg_gen_mov_tl(s->T1, s->T0);
1888             tcg_gen_mov_tl(s->T0, s->tmp0);
1889         } else {
1890             tcg_gen_deposit_tl(s->T1, s->T0, s->T1, 16, 16);
1891         }
1892         /*
1893          * If TARGET_X86_64 defined then fall through into MO_32 case,
1894          * otherwise fall through default case.
1895          */
1896     case MO_32:
1897 #ifdef TARGET_X86_64
1898         /* Concatenate the two 32-bit values and use a 64-bit shift.  */
1899         tcg_gen_subi_tl(s->tmp0, count, 1);
1900         if (is_right) {
1901             tcg_gen_concat_tl_i64(s->T0, s->T0, s->T1);
1902             tcg_gen_shr_i64(s->tmp0, s->T0, s->tmp0);
1903             tcg_gen_shr_i64(s->T0, s->T0, count);
1904         } else {
1905             tcg_gen_concat_tl_i64(s->T0, s->T1, s->T0);
1906             tcg_gen_shl_i64(s->tmp0, s->T0, s->tmp0);
1907             tcg_gen_shl_i64(s->T0, s->T0, count);
1908             tcg_gen_shri_i64(s->tmp0, s->tmp0, 32);
1909             tcg_gen_shri_i64(s->T0, s->T0, 32);
1910         }
1911         break;
1912 #endif
1913     default:
1914         tcg_gen_subi_tl(s->tmp0, count, 1);
1915         if (is_right) {
1916             tcg_gen_shr_tl(s->tmp0, s->T0, s->tmp0);
1917 
1918             tcg_gen_subfi_tl(s->tmp4, mask + 1, count);
1919             tcg_gen_shr_tl(s->T0, s->T0, count);
1920             tcg_gen_shl_tl(s->T1, s->T1, s->tmp4);
1921         } else {
1922             tcg_gen_shl_tl(s->tmp0, s->T0, s->tmp0);
1923             if (ot == MO_16) {
1924                 /* Only needed if count > 16, for Intel behaviour.  */
1925                 tcg_gen_subfi_tl(s->tmp4, 33, count);
1926                 tcg_gen_shr_tl(s->tmp4, s->T1, s->tmp4);
1927                 tcg_gen_or_tl(s->tmp0, s->tmp0, s->tmp4);
1928             }
1929 
1930             tcg_gen_subfi_tl(s->tmp4, mask + 1, count);
1931             tcg_gen_shl_tl(s->T0, s->T0, count);
1932             tcg_gen_shr_tl(s->T1, s->T1, s->tmp4);
1933         }
1934         tcg_gen_movi_tl(s->tmp4, 0);
1935         tcg_gen_movcond_tl(TCG_COND_EQ, s->T1, count, s->tmp4,
1936                            s->tmp4, s->T1);
1937         tcg_gen_or_tl(s->T0, s->T0, s->T1);
1938         break;
1939     }
1940 
1941     /* store */
1942     gen_op_st_rm_T0_A0(s, ot, op1);
1943 
1944     gen_shift_flags(s, ot, s->T0, s->tmp0, count, is_right);
1945     tcg_temp_free(count);
1946 }
1947 
1948 static void gen_shift(DisasContext *s1, int op, MemOp ot, int d, int s)
1949 {
1950     if (s != OR_TMP1)
1951         gen_op_mov_v_reg(s1, ot, s1->T1, s);
1952     switch(op) {
1953     case OP_ROL:
1954         gen_rot_rm_T1(s1, ot, d, 0);
1955         break;
1956     case OP_ROR:
1957         gen_rot_rm_T1(s1, ot, d, 1);
1958         break;
1959     case OP_SHL:
1960     case OP_SHL1:
1961         gen_shift_rm_T1(s1, ot, d, 0, 0);
1962         break;
1963     case OP_SHR:
1964         gen_shift_rm_T1(s1, ot, d, 1, 0);
1965         break;
1966     case OP_SAR:
1967         gen_shift_rm_T1(s1, ot, d, 1, 1);
1968         break;
1969     case OP_RCL:
1970         gen_rotc_rm_T1(s1, ot, d, 0);
1971         break;
1972     case OP_RCR:
1973         gen_rotc_rm_T1(s1, ot, d, 1);
1974         break;
1975     }
1976 }
1977 
1978 static void gen_shifti(DisasContext *s1, int op, MemOp ot, int d, int c)
1979 {
1980     switch(op) {
1981     case OP_ROL:
1982         gen_rot_rm_im(s1, ot, d, c, 0);
1983         break;
1984     case OP_ROR:
1985         gen_rot_rm_im(s1, ot, d, c, 1);
1986         break;
1987     case OP_SHL:
1988     case OP_SHL1:
1989         gen_shift_rm_im(s1, ot, d, c, 0, 0);
1990         break;
1991     case OP_SHR:
1992         gen_shift_rm_im(s1, ot, d, c, 1, 0);
1993         break;
1994     case OP_SAR:
1995         gen_shift_rm_im(s1, ot, d, c, 1, 1);
1996         break;
1997     default:
1998         /* currently not optimized */
1999         tcg_gen_movi_tl(s1->T1, c);
2000         gen_shift(s1, op, ot, d, OR_TMP1);
2001         break;
2002     }
2003 }
2004 
2005 #define X86_MAX_INSN_LENGTH 15
2006 
2007 static uint64_t advance_pc(CPUX86State *env, DisasContext *s, int num_bytes)
2008 {
2009     uint64_t pc = s->pc;
2010 
2011     s->pc += num_bytes;
2012     if (unlikely(s->pc - s->pc_start > X86_MAX_INSN_LENGTH)) {
2013         /* If the instruction's 16th byte is on a different page than the 1st, a
2014          * page fault on the second page wins over the general protection fault
2015          * caused by the instruction being too long.
2016          * This can happen even if the operand is only one byte long!
2017          */
2018         if (((s->pc - 1) ^ (pc - 1)) & TARGET_PAGE_MASK) {
2019             volatile uint8_t unused =
2020                 cpu_ldub_code(env, (s->pc - 1) & TARGET_PAGE_MASK);
2021             (void) unused;
2022         }
2023         siglongjmp(s->jmpbuf, 1);
2024     }
2025 
2026     return pc;
2027 }
2028 
2029 static inline uint8_t x86_ldub_code(CPUX86State *env, DisasContext *s)
2030 {
2031     return translator_ldub(env, advance_pc(env, s, 1));
2032 }
2033 
2034 static inline int16_t x86_ldsw_code(CPUX86State *env, DisasContext *s)
2035 {
2036     return translator_ldsw(env, advance_pc(env, s, 2));
2037 }
2038 
2039 static inline uint16_t x86_lduw_code(CPUX86State *env, DisasContext *s)
2040 {
2041     return translator_lduw(env, advance_pc(env, s, 2));
2042 }
2043 
2044 static inline uint32_t x86_ldl_code(CPUX86State *env, DisasContext *s)
2045 {
2046     return translator_ldl(env, advance_pc(env, s, 4));
2047 }
2048 
2049 #ifdef TARGET_X86_64
2050 static inline uint64_t x86_ldq_code(CPUX86State *env, DisasContext *s)
2051 {
2052     return translator_ldq(env, advance_pc(env, s, 8));
2053 }
2054 #endif
2055 
2056 /* Decompose an address.  */
2057 
2058 typedef struct AddressParts {
2059     int def_seg;
2060     int base;
2061     int index;
2062     int scale;
2063     target_long disp;
2064 } AddressParts;
2065 
2066 static AddressParts gen_lea_modrm_0(CPUX86State *env, DisasContext *s,
2067                                     int modrm)
2068 {
2069     int def_seg, base, index, scale, mod, rm;
2070     target_long disp;
2071     bool havesib;
2072 
2073     def_seg = R_DS;
2074     index = -1;
2075     scale = 0;
2076     disp = 0;
2077 
2078     mod = (modrm >> 6) & 3;
2079     rm = modrm & 7;
2080     base = rm | REX_B(s);
2081 
2082     if (mod == 3) {
2083         /* Normally filtered out earlier, but including this path
2084            simplifies multi-byte nop, as well as bndcl, bndcu, bndcn.  */
2085         goto done;
2086     }
2087 
2088     switch (s->aflag) {
2089     case MO_64:
2090     case MO_32:
2091         havesib = 0;
2092         if (rm == 4) {
2093             int code = x86_ldub_code(env, s);
2094             scale = (code >> 6) & 3;
2095             index = ((code >> 3) & 7) | REX_X(s);
2096             if (index == 4) {
2097                 index = -1;  /* no index */
2098             }
2099             base = (code & 7) | REX_B(s);
2100             havesib = 1;
2101         }
2102 
2103         switch (mod) {
2104         case 0:
2105             if ((base & 7) == 5) {
2106                 base = -1;
2107                 disp = (int32_t)x86_ldl_code(env, s);
2108                 if (CODE64(s) && !havesib) {
2109                     base = -2;
2110                     disp += s->pc + s->rip_offset;
2111                 }
2112             }
2113             break;
2114         case 1:
2115             disp = (int8_t)x86_ldub_code(env, s);
2116             break;
2117         default:
2118         case 2:
2119             disp = (int32_t)x86_ldl_code(env, s);
2120             break;
2121         }
2122 
2123         /* For correct popl handling with esp.  */
2124         if (base == R_ESP && s->popl_esp_hack) {
2125             disp += s->popl_esp_hack;
2126         }
2127         if (base == R_EBP || base == R_ESP) {
2128             def_seg = R_SS;
2129         }
2130         break;
2131 
2132     case MO_16:
2133         if (mod == 0) {
2134             if (rm == 6) {
2135                 base = -1;
2136                 disp = x86_lduw_code(env, s);
2137                 break;
2138             }
2139         } else if (mod == 1) {
2140             disp = (int8_t)x86_ldub_code(env, s);
2141         } else {
2142             disp = (int16_t)x86_lduw_code(env, s);
2143         }
2144 
2145         switch (rm) {
2146         case 0:
2147             base = R_EBX;
2148             index = R_ESI;
2149             break;
2150         case 1:
2151             base = R_EBX;
2152             index = R_EDI;
2153             break;
2154         case 2:
2155             base = R_EBP;
2156             index = R_ESI;
2157             def_seg = R_SS;
2158             break;
2159         case 3:
2160             base = R_EBP;
2161             index = R_EDI;
2162             def_seg = R_SS;
2163             break;
2164         case 4:
2165             base = R_ESI;
2166             break;
2167         case 5:
2168             base = R_EDI;
2169             break;
2170         case 6:
2171             base = R_EBP;
2172             def_seg = R_SS;
2173             break;
2174         default:
2175         case 7:
2176             base = R_EBX;
2177             break;
2178         }
2179         break;
2180 
2181     default:
2182         tcg_abort();
2183     }
2184 
2185  done:
2186     return (AddressParts){ def_seg, base, index, scale, disp };
2187 }
2188 
2189 /* Compute the address, with a minimum number of TCG ops.  */
2190 static TCGv gen_lea_modrm_1(DisasContext *s, AddressParts a)
2191 {
2192     TCGv ea = NULL;
2193 
2194     if (a.index >= 0) {
2195         if (a.scale == 0) {
2196             ea = cpu_regs[a.index];
2197         } else {
2198             tcg_gen_shli_tl(s->A0, cpu_regs[a.index], a.scale);
2199             ea = s->A0;
2200         }
2201         if (a.base >= 0) {
2202             tcg_gen_add_tl(s->A0, ea, cpu_regs[a.base]);
2203             ea = s->A0;
2204         }
2205     } else if (a.base >= 0) {
2206         ea = cpu_regs[a.base];
2207     }
2208     if (!ea) {
2209         tcg_gen_movi_tl(s->A0, a.disp);
2210         ea = s->A0;
2211     } else if (a.disp != 0) {
2212         tcg_gen_addi_tl(s->A0, ea, a.disp);
2213         ea = s->A0;
2214     }
2215 
2216     return ea;
2217 }
2218 
2219 static void gen_lea_modrm(CPUX86State *env, DisasContext *s, int modrm)
2220 {
2221     AddressParts a = gen_lea_modrm_0(env, s, modrm);
2222     TCGv ea = gen_lea_modrm_1(s, a);
2223     gen_lea_v_seg(s, s->aflag, ea, a.def_seg, s->override);
2224 }
2225 
2226 static void gen_nop_modrm(CPUX86State *env, DisasContext *s, int modrm)
2227 {
2228     (void)gen_lea_modrm_0(env, s, modrm);
2229 }
2230 
2231 /* Used for BNDCL, BNDCU, BNDCN.  */
2232 static void gen_bndck(CPUX86State *env, DisasContext *s, int modrm,
2233                       TCGCond cond, TCGv_i64 bndv)
2234 {
2235     TCGv ea = gen_lea_modrm_1(s, gen_lea_modrm_0(env, s, modrm));
2236 
2237     tcg_gen_extu_tl_i64(s->tmp1_i64, ea);
2238     if (!CODE64(s)) {
2239         tcg_gen_ext32u_i64(s->tmp1_i64, s->tmp1_i64);
2240     }
2241     tcg_gen_setcond_i64(cond, s->tmp1_i64, s->tmp1_i64, bndv);
2242     tcg_gen_extrl_i64_i32(s->tmp2_i32, s->tmp1_i64);
2243     gen_helper_bndck(cpu_env, s->tmp2_i32);
2244 }
2245 
2246 /* used for LEA and MOV AX, mem */
2247 static void gen_add_A0_ds_seg(DisasContext *s)
2248 {
2249     gen_lea_v_seg(s, s->aflag, s->A0, R_DS, s->override);
2250 }
2251 
2252 /* generate modrm memory load or store of 'reg'. TMP0 is used if reg ==
2253    OR_TMP0 */
2254 static void gen_ldst_modrm(CPUX86State *env, DisasContext *s, int modrm,
2255                            MemOp ot, int reg, int is_store)
2256 {
2257     int mod, rm;
2258 
2259     mod = (modrm >> 6) & 3;
2260     rm = (modrm & 7) | REX_B(s);
2261     if (mod == 3) {
2262         if (is_store) {
2263             if (reg != OR_TMP0)
2264                 gen_op_mov_v_reg(s, ot, s->T0, reg);
2265             gen_op_mov_reg_v(s, ot, rm, s->T0);
2266         } else {
2267             gen_op_mov_v_reg(s, ot, s->T0, rm);
2268             if (reg != OR_TMP0)
2269                 gen_op_mov_reg_v(s, ot, reg, s->T0);
2270         }
2271     } else {
2272         gen_lea_modrm(env, s, modrm);
2273         if (is_store) {
2274             if (reg != OR_TMP0)
2275                 gen_op_mov_v_reg(s, ot, s->T0, reg);
2276             gen_op_st_v(s, ot, s->T0, s->A0);
2277         } else {
2278             gen_op_ld_v(s, ot, s->T0, s->A0);
2279             if (reg != OR_TMP0)
2280                 gen_op_mov_reg_v(s, ot, reg, s->T0);
2281         }
2282     }
2283 }
2284 
2285 static inline uint32_t insn_get(CPUX86State *env, DisasContext *s, MemOp ot)
2286 {
2287     uint32_t ret;
2288 
2289     switch (ot) {
2290     case MO_8:
2291         ret = x86_ldub_code(env, s);
2292         break;
2293     case MO_16:
2294         ret = x86_lduw_code(env, s);
2295         break;
2296     case MO_32:
2297 #ifdef TARGET_X86_64
2298     case MO_64:
2299 #endif
2300         ret = x86_ldl_code(env, s);
2301         break;
2302     default:
2303         tcg_abort();
2304     }
2305     return ret;
2306 }
2307 
2308 static inline int insn_const_size(MemOp ot)
2309 {
2310     if (ot <= MO_32) {
2311         return 1 << ot;
2312     } else {
2313         return 4;
2314     }
2315 }
2316 
2317 static void gen_goto_tb(DisasContext *s, int tb_num, target_ulong eip)
2318 {
2319     target_ulong pc = s->cs_base + eip;
2320 
2321     if (translator_use_goto_tb(&s->base, pc))  {
2322         /* jump to same page: we can use a direct jump */
2323         tcg_gen_goto_tb(tb_num);
2324         gen_jmp_im(s, eip);
2325         tcg_gen_exit_tb(s->base.tb, tb_num);
2326         s->base.is_jmp = DISAS_NORETURN;
2327     } else {
2328         /* jump to another page */
2329         gen_jmp_im(s, eip);
2330         gen_jr(s, s->tmp0);
2331     }
2332 }
2333 
2334 static inline void gen_jcc(DisasContext *s, int b,
2335                            target_ulong val, target_ulong next_eip)
2336 {
2337     TCGLabel *l1, *l2;
2338 
2339     if (s->jmp_opt) {
2340         l1 = gen_new_label();
2341         gen_jcc1(s, b, l1);
2342 
2343         gen_goto_tb(s, 0, next_eip);
2344 
2345         gen_set_label(l1);
2346         gen_goto_tb(s, 1, val);
2347     } else {
2348         l1 = gen_new_label();
2349         l2 = gen_new_label();
2350         gen_jcc1(s, b, l1);
2351 
2352         gen_jmp_im(s, next_eip);
2353         tcg_gen_br(l2);
2354 
2355         gen_set_label(l1);
2356         gen_jmp_im(s, val);
2357         gen_set_label(l2);
2358         gen_eob(s);
2359     }
2360 }
2361 
2362 static void gen_cmovcc1(CPUX86State *env, DisasContext *s, MemOp ot, int b,
2363                         int modrm, int reg)
2364 {
2365     CCPrepare cc;
2366 
2367     gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
2368 
2369     cc = gen_prepare_cc(s, b, s->T1);
2370     if (cc.mask != -1) {
2371         TCGv t0 = tcg_temp_new();
2372         tcg_gen_andi_tl(t0, cc.reg, cc.mask);
2373         cc.reg = t0;
2374     }
2375     if (!cc.use_reg2) {
2376         cc.reg2 = tcg_const_tl(cc.imm);
2377     }
2378 
2379     tcg_gen_movcond_tl(cc.cond, s->T0, cc.reg, cc.reg2,
2380                        s->T0, cpu_regs[reg]);
2381     gen_op_mov_reg_v(s, ot, reg, s->T0);
2382 
2383     if (cc.mask != -1) {
2384         tcg_temp_free(cc.reg);
2385     }
2386     if (!cc.use_reg2) {
2387         tcg_temp_free(cc.reg2);
2388     }
2389 }
2390 
2391 static inline void gen_op_movl_T0_seg(DisasContext *s, X86Seg seg_reg)
2392 {
2393     tcg_gen_ld32u_tl(s->T0, cpu_env,
2394                      offsetof(CPUX86State,segs[seg_reg].selector));
2395 }
2396 
2397 static inline void gen_op_movl_seg_T0_vm(DisasContext *s, X86Seg seg_reg)
2398 {
2399     tcg_gen_ext16u_tl(s->T0, s->T0);
2400     tcg_gen_st32_tl(s->T0, cpu_env,
2401                     offsetof(CPUX86State,segs[seg_reg].selector));
2402     tcg_gen_shli_tl(cpu_seg_base[seg_reg], s->T0, 4);
2403 }
2404 
2405 /* move T0 to seg_reg and compute if the CPU state may change. Never
2406    call this function with seg_reg == R_CS */
2407 static void gen_movl_seg_T0(DisasContext *s, X86Seg seg_reg)
2408 {
2409     if (PE(s) && !VM86(s)) {
2410         tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
2411         gen_helper_load_seg(cpu_env, tcg_const_i32(seg_reg), s->tmp2_i32);
2412         /* abort translation because the addseg value may change or
2413            because ss32 may change. For R_SS, translation must always
2414            stop as a special handling must be done to disable hardware
2415            interrupts for the next instruction */
2416         if (seg_reg == R_SS || (CODE32(s) && seg_reg < R_FS)) {
2417             s->base.is_jmp = DISAS_TOO_MANY;
2418         }
2419     } else {
2420         gen_op_movl_seg_T0_vm(s, seg_reg);
2421         if (seg_reg == R_SS) {
2422             s->base.is_jmp = DISAS_TOO_MANY;
2423         }
2424     }
2425 }
2426 
2427 static void gen_svm_check_intercept(DisasContext *s, uint32_t type)
2428 {
2429     /* no SVM activated; fast case */
2430     if (likely(!GUEST(s))) {
2431         return;
2432     }
2433     gen_helper_svm_check_intercept(cpu_env, tcg_constant_i32(type));
2434 }
2435 
2436 static inline void gen_stack_update(DisasContext *s, int addend)
2437 {
2438     gen_op_add_reg_im(s, mo_stacksize(s), R_ESP, addend);
2439 }
2440 
2441 /* Generate a push. It depends on ss32, addseg and dflag.  */
2442 static void gen_push_v(DisasContext *s, TCGv val)
2443 {
2444     MemOp d_ot = mo_pushpop(s, s->dflag);
2445     MemOp a_ot = mo_stacksize(s);
2446     int size = 1 << d_ot;
2447     TCGv new_esp = s->A0;
2448 
2449     tcg_gen_subi_tl(s->A0, cpu_regs[R_ESP], size);
2450 
2451     if (!CODE64(s)) {
2452         if (ADDSEG(s)) {
2453             new_esp = s->tmp4;
2454             tcg_gen_mov_tl(new_esp, s->A0);
2455         }
2456         gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2457     }
2458 
2459     gen_op_st_v(s, d_ot, val, s->A0);
2460     gen_op_mov_reg_v(s, a_ot, R_ESP, new_esp);
2461 }
2462 
2463 /* two step pop is necessary for precise exceptions */
2464 static MemOp gen_pop_T0(DisasContext *s)
2465 {
2466     MemOp d_ot = mo_pushpop(s, s->dflag);
2467 
2468     gen_lea_v_seg(s, mo_stacksize(s), cpu_regs[R_ESP], R_SS, -1);
2469     gen_op_ld_v(s, d_ot, s->T0, s->A0);
2470 
2471     return d_ot;
2472 }
2473 
2474 static inline void gen_pop_update(DisasContext *s, MemOp ot)
2475 {
2476     gen_stack_update(s, 1 << ot);
2477 }
2478 
2479 static inline void gen_stack_A0(DisasContext *s)
2480 {
2481     gen_lea_v_seg(s, SS32(s) ? MO_32 : MO_16, cpu_regs[R_ESP], R_SS, -1);
2482 }
2483 
2484 static void gen_pusha(DisasContext *s)
2485 {
2486     MemOp s_ot = SS32(s) ? MO_32 : MO_16;
2487     MemOp d_ot = s->dflag;
2488     int size = 1 << d_ot;
2489     int i;
2490 
2491     for (i = 0; i < 8; i++) {
2492         tcg_gen_addi_tl(s->A0, cpu_regs[R_ESP], (i - 8) * size);
2493         gen_lea_v_seg(s, s_ot, s->A0, R_SS, -1);
2494         gen_op_st_v(s, d_ot, cpu_regs[7 - i], s->A0);
2495     }
2496 
2497     gen_stack_update(s, -8 * size);
2498 }
2499 
2500 static void gen_popa(DisasContext *s)
2501 {
2502     MemOp s_ot = SS32(s) ? MO_32 : MO_16;
2503     MemOp d_ot = s->dflag;
2504     int size = 1 << d_ot;
2505     int i;
2506 
2507     for (i = 0; i < 8; i++) {
2508         /* ESP is not reloaded */
2509         if (7 - i == R_ESP) {
2510             continue;
2511         }
2512         tcg_gen_addi_tl(s->A0, cpu_regs[R_ESP], i * size);
2513         gen_lea_v_seg(s, s_ot, s->A0, R_SS, -1);
2514         gen_op_ld_v(s, d_ot, s->T0, s->A0);
2515         gen_op_mov_reg_v(s, d_ot, 7 - i, s->T0);
2516     }
2517 
2518     gen_stack_update(s, 8 * size);
2519 }
2520 
2521 static void gen_enter(DisasContext *s, int esp_addend, int level)
2522 {
2523     MemOp d_ot = mo_pushpop(s, s->dflag);
2524     MemOp a_ot = CODE64(s) ? MO_64 : SS32(s) ? MO_32 : MO_16;
2525     int size = 1 << d_ot;
2526 
2527     /* Push BP; compute FrameTemp into T1.  */
2528     tcg_gen_subi_tl(s->T1, cpu_regs[R_ESP], size);
2529     gen_lea_v_seg(s, a_ot, s->T1, R_SS, -1);
2530     gen_op_st_v(s, d_ot, cpu_regs[R_EBP], s->A0);
2531 
2532     level &= 31;
2533     if (level != 0) {
2534         int i;
2535 
2536         /* Copy level-1 pointers from the previous frame.  */
2537         for (i = 1; i < level; ++i) {
2538             tcg_gen_subi_tl(s->A0, cpu_regs[R_EBP], size * i);
2539             gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2540             gen_op_ld_v(s, d_ot, s->tmp0, s->A0);
2541 
2542             tcg_gen_subi_tl(s->A0, s->T1, size * i);
2543             gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2544             gen_op_st_v(s, d_ot, s->tmp0, s->A0);
2545         }
2546 
2547         /* Push the current FrameTemp as the last level.  */
2548         tcg_gen_subi_tl(s->A0, s->T1, size * level);
2549         gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2550         gen_op_st_v(s, d_ot, s->T1, s->A0);
2551     }
2552 
2553     /* Copy the FrameTemp value to EBP.  */
2554     gen_op_mov_reg_v(s, a_ot, R_EBP, s->T1);
2555 
2556     /* Compute the final value of ESP.  */
2557     tcg_gen_subi_tl(s->T1, s->T1, esp_addend + size * level);
2558     gen_op_mov_reg_v(s, a_ot, R_ESP, s->T1);
2559 }
2560 
2561 static void gen_leave(DisasContext *s)
2562 {
2563     MemOp d_ot = mo_pushpop(s, s->dflag);
2564     MemOp a_ot = mo_stacksize(s);
2565 
2566     gen_lea_v_seg(s, a_ot, cpu_regs[R_EBP], R_SS, -1);
2567     gen_op_ld_v(s, d_ot, s->T0, s->A0);
2568 
2569     tcg_gen_addi_tl(s->T1, cpu_regs[R_EBP], 1 << d_ot);
2570 
2571     gen_op_mov_reg_v(s, d_ot, R_EBP, s->T0);
2572     gen_op_mov_reg_v(s, a_ot, R_ESP, s->T1);
2573 }
2574 
2575 /* Similarly, except that the assumption here is that we don't decode
2576    the instruction at all -- either a missing opcode, an unimplemented
2577    feature, or just a bogus instruction stream.  */
2578 static void gen_unknown_opcode(CPUX86State *env, DisasContext *s)
2579 {
2580     gen_illegal_opcode(s);
2581 
2582     if (qemu_loglevel_mask(LOG_UNIMP)) {
2583         FILE *logfile = qemu_log_lock();
2584         target_ulong pc = s->pc_start, end = s->pc;
2585 
2586         qemu_log("ILLOPC: " TARGET_FMT_lx ":", pc);
2587         for (; pc < end; ++pc) {
2588             qemu_log(" %02x", cpu_ldub_code(env, pc));
2589         }
2590         qemu_log("\n");
2591         qemu_log_unlock(logfile);
2592     }
2593 }
2594 
2595 /* an interrupt is different from an exception because of the
2596    privilege checks */
2597 static void gen_interrupt(DisasContext *s, int intno,
2598                           target_ulong cur_eip, target_ulong next_eip)
2599 {
2600     gen_update_cc_op(s);
2601     gen_jmp_im(s, cur_eip);
2602     gen_helper_raise_interrupt(cpu_env, tcg_const_i32(intno),
2603                                tcg_const_i32(next_eip - cur_eip));
2604     s->base.is_jmp = DISAS_NORETURN;
2605 }
2606 
2607 static void gen_debug(DisasContext *s)
2608 {
2609     gen_update_cc_op(s);
2610     gen_jmp_im(s, s->base.pc_next - s->cs_base);
2611     gen_helper_debug(cpu_env);
2612     s->base.is_jmp = DISAS_NORETURN;
2613 }
2614 
2615 static void gen_set_hflag(DisasContext *s, uint32_t mask)
2616 {
2617     if ((s->flags & mask) == 0) {
2618         TCGv_i32 t = tcg_temp_new_i32();
2619         tcg_gen_ld_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2620         tcg_gen_ori_i32(t, t, mask);
2621         tcg_gen_st_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2622         tcg_temp_free_i32(t);
2623         s->flags |= mask;
2624     }
2625 }
2626 
2627 static void gen_reset_hflag(DisasContext *s, uint32_t mask)
2628 {
2629     if (s->flags & mask) {
2630         TCGv_i32 t = tcg_temp_new_i32();
2631         tcg_gen_ld_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2632         tcg_gen_andi_i32(t, t, ~mask);
2633         tcg_gen_st_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2634         tcg_temp_free_i32(t);
2635         s->flags &= ~mask;
2636     }
2637 }
2638 
2639 /* Clear BND registers during legacy branches.  */
2640 static void gen_bnd_jmp(DisasContext *s)
2641 {
2642     /* Clear the registers only if BND prefix is missing, MPX is enabled,
2643        and if the BNDREGs are known to be in use (non-zero) already.
2644        The helper itself will check BNDPRESERVE at runtime.  */
2645     if ((s->prefix & PREFIX_REPNZ) == 0
2646         && (s->flags & HF_MPX_EN_MASK) != 0
2647         && (s->flags & HF_MPX_IU_MASK) != 0) {
2648         gen_helper_bnd_jmp(cpu_env);
2649     }
2650 }
2651 
2652 /* Generate an end of block. Trace exception is also generated if needed.
2653    If INHIBIT, set HF_INHIBIT_IRQ_MASK if it isn't already set.
2654    If RECHECK_TF, emit a rechecking helper for #DB, ignoring the state of
2655    S->TF.  This is used by the syscall/sysret insns.  */
2656 static void
2657 do_gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf, bool jr)
2658 {
2659     gen_update_cc_op(s);
2660 
2661     /* If several instructions disable interrupts, only the first does it.  */
2662     if (inhibit && !(s->flags & HF_INHIBIT_IRQ_MASK)) {
2663         gen_set_hflag(s, HF_INHIBIT_IRQ_MASK);
2664     } else {
2665         gen_reset_hflag(s, HF_INHIBIT_IRQ_MASK);
2666     }
2667 
2668     if (s->base.tb->flags & HF_RF_MASK) {
2669         gen_helper_reset_rf(cpu_env);
2670     }
2671     if (s->base.singlestep_enabled) {
2672         gen_helper_debug(cpu_env);
2673     } else if (recheck_tf) {
2674         gen_helper_rechecking_single_step(cpu_env);
2675         tcg_gen_exit_tb(NULL, 0);
2676     } else if (s->flags & HF_TF_MASK) {
2677         gen_helper_single_step(cpu_env);
2678     } else if (jr) {
2679         tcg_gen_lookup_and_goto_ptr();
2680     } else {
2681         tcg_gen_exit_tb(NULL, 0);
2682     }
2683     s->base.is_jmp = DISAS_NORETURN;
2684 }
2685 
2686 static inline void
2687 gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf)
2688 {
2689     do_gen_eob_worker(s, inhibit, recheck_tf, false);
2690 }
2691 
2692 /* End of block.
2693    If INHIBIT, set HF_INHIBIT_IRQ_MASK if it isn't already set.  */
2694 static void gen_eob_inhibit_irq(DisasContext *s, bool inhibit)
2695 {
2696     gen_eob_worker(s, inhibit, false);
2697 }
2698 
2699 /* End of block, resetting the inhibit irq flag.  */
2700 static void gen_eob(DisasContext *s)
2701 {
2702     gen_eob_worker(s, false, false);
2703 }
2704 
2705 /* Jump to register */
2706 static void gen_jr(DisasContext *s, TCGv dest)
2707 {
2708     do_gen_eob_worker(s, false, false, true);
2709 }
2710 
2711 /* generate a jump to eip. No segment change must happen before as a
2712    direct call to the next block may occur */
2713 static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num)
2714 {
2715     gen_update_cc_op(s);
2716     set_cc_op(s, CC_OP_DYNAMIC);
2717     if (s->jmp_opt) {
2718         gen_goto_tb(s, tb_num, eip);
2719     } else {
2720         gen_jmp_im(s, eip);
2721         gen_eob(s);
2722     }
2723 }
2724 
2725 static void gen_jmp(DisasContext *s, target_ulong eip)
2726 {
2727     gen_jmp_tb(s, eip, 0);
2728 }
2729 
2730 static inline void gen_ldq_env_A0(DisasContext *s, int offset)
2731 {
2732     tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ);
2733     tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset);
2734 }
2735 
2736 static inline void gen_stq_env_A0(DisasContext *s, int offset)
2737 {
2738     tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset);
2739     tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ);
2740 }
2741 
2742 static inline void gen_ldo_env_A0(DisasContext *s, int offset)
2743 {
2744     int mem_index = s->mem_index;
2745     tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, mem_index, MO_LEQ);
2746     tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
2747     tcg_gen_addi_tl(s->tmp0, s->A0, 8);
2748     tcg_gen_qemu_ld_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEQ);
2749     tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
2750 }
2751 
2752 static inline void gen_sto_env_A0(DisasContext *s, int offset)
2753 {
2754     int mem_index = s->mem_index;
2755     tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
2756     tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, mem_index, MO_LEQ);
2757     tcg_gen_addi_tl(s->tmp0, s->A0, 8);
2758     tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
2759     tcg_gen_qemu_st_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEQ);
2760 }
2761 
2762 static inline void gen_op_movo(DisasContext *s, int d_offset, int s_offset)
2763 {
2764     tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(0)));
2765     tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(0)));
2766     tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(1)));
2767     tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(1)));
2768 }
2769 
2770 static inline void gen_op_movq(DisasContext *s, int d_offset, int s_offset)
2771 {
2772     tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset);
2773     tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset);
2774 }
2775 
2776 static inline void gen_op_movl(DisasContext *s, int d_offset, int s_offset)
2777 {
2778     tcg_gen_ld_i32(s->tmp2_i32, cpu_env, s_offset);
2779     tcg_gen_st_i32(s->tmp2_i32, cpu_env, d_offset);
2780 }
2781 
2782 static inline void gen_op_movq_env_0(DisasContext *s, int d_offset)
2783 {
2784     tcg_gen_movi_i64(s->tmp1_i64, 0);
2785     tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset);
2786 }
2787 
2788 typedef void (*SSEFunc_i_ep)(TCGv_i32 val, TCGv_ptr env, TCGv_ptr reg);
2789 typedef void (*SSEFunc_l_ep)(TCGv_i64 val, TCGv_ptr env, TCGv_ptr reg);
2790 typedef void (*SSEFunc_0_epi)(TCGv_ptr env, TCGv_ptr reg, TCGv_i32 val);
2791 typedef void (*SSEFunc_0_epl)(TCGv_ptr env, TCGv_ptr reg, TCGv_i64 val);
2792 typedef void (*SSEFunc_0_epp)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b);
2793 typedef void (*SSEFunc_0_eppi)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b,
2794                                TCGv_i32 val);
2795 typedef void (*SSEFunc_0_ppi)(TCGv_ptr reg_a, TCGv_ptr reg_b, TCGv_i32 val);
2796 typedef void (*SSEFunc_0_eppt)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b,
2797                                TCGv val);
2798 
2799 #define SSE_SPECIAL ((void *)1)
2800 #define SSE_DUMMY ((void *)2)
2801 
2802 #define MMX_OP2(x) { gen_helper_ ## x ## _mmx, gen_helper_ ## x ## _xmm }
2803 #define SSE_FOP(x) { gen_helper_ ## x ## ps, gen_helper_ ## x ## pd, \
2804                      gen_helper_ ## x ## ss, gen_helper_ ## x ## sd, }
2805 
2806 static const SSEFunc_0_epp sse_op_table1[256][4] = {
2807     /* 3DNow! extensions */
2808     [0x0e] = { SSE_DUMMY }, /* femms */
2809     [0x0f] = { SSE_DUMMY }, /* pf... */
2810     /* pure SSE operations */
2811     [0x10] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
2812     [0x11] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
2813     [0x12] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movlps, movlpd, movsldup, movddup */
2814     [0x13] = { SSE_SPECIAL, SSE_SPECIAL },  /* movlps, movlpd */
2815     [0x14] = { gen_helper_punpckldq_xmm, gen_helper_punpcklqdq_xmm },
2816     [0x15] = { gen_helper_punpckhdq_xmm, gen_helper_punpckhqdq_xmm },
2817     [0x16] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },  /* movhps, movhpd, movshdup */
2818     [0x17] = { SSE_SPECIAL, SSE_SPECIAL },  /* movhps, movhpd */
2819 
2820     [0x28] = { SSE_SPECIAL, SSE_SPECIAL },  /* movaps, movapd */
2821     [0x29] = { SSE_SPECIAL, SSE_SPECIAL },  /* movaps, movapd */
2822     [0x2a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtpi2ps, cvtpi2pd, cvtsi2ss, cvtsi2sd */
2823     [0x2b] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movntps, movntpd, movntss, movntsd */
2824     [0x2c] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvttps2pi, cvttpd2pi, cvttsd2si, cvttss2si */
2825     [0x2d] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtps2pi, cvtpd2pi, cvtsd2si, cvtss2si */
2826     [0x2e] = { gen_helper_ucomiss, gen_helper_ucomisd },
2827     [0x2f] = { gen_helper_comiss, gen_helper_comisd },
2828     [0x50] = { SSE_SPECIAL, SSE_SPECIAL }, /* movmskps, movmskpd */
2829     [0x51] = SSE_FOP(sqrt),
2830     [0x52] = { gen_helper_rsqrtps, NULL, gen_helper_rsqrtss, NULL },
2831     [0x53] = { gen_helper_rcpps, NULL, gen_helper_rcpss, NULL },
2832     [0x54] = { gen_helper_pand_xmm, gen_helper_pand_xmm }, /* andps, andpd */
2833     [0x55] = { gen_helper_pandn_xmm, gen_helper_pandn_xmm }, /* andnps, andnpd */
2834     [0x56] = { gen_helper_por_xmm, gen_helper_por_xmm }, /* orps, orpd */
2835     [0x57] = { gen_helper_pxor_xmm, gen_helper_pxor_xmm }, /* xorps, xorpd */
2836     [0x58] = SSE_FOP(add),
2837     [0x59] = SSE_FOP(mul),
2838     [0x5a] = { gen_helper_cvtps2pd, gen_helper_cvtpd2ps,
2839                gen_helper_cvtss2sd, gen_helper_cvtsd2ss },
2840     [0x5b] = { gen_helper_cvtdq2ps, gen_helper_cvtps2dq, gen_helper_cvttps2dq },
2841     [0x5c] = SSE_FOP(sub),
2842     [0x5d] = SSE_FOP(min),
2843     [0x5e] = SSE_FOP(div),
2844     [0x5f] = SSE_FOP(max),
2845 
2846     [0xc2] = SSE_FOP(cmpeq),
2847     [0xc6] = { (SSEFunc_0_epp)gen_helper_shufps,
2848                (SSEFunc_0_epp)gen_helper_shufpd }, /* XXX: casts */
2849 
2850     /* SSSE3, SSE4, MOVBE, CRC32, BMI1, BMI2, ADX.  */
2851     [0x38] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2852     [0x3a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2853 
2854     /* MMX ops and their SSE extensions */
2855     [0x60] = MMX_OP2(punpcklbw),
2856     [0x61] = MMX_OP2(punpcklwd),
2857     [0x62] = MMX_OP2(punpckldq),
2858     [0x63] = MMX_OP2(packsswb),
2859     [0x64] = MMX_OP2(pcmpgtb),
2860     [0x65] = MMX_OP2(pcmpgtw),
2861     [0x66] = MMX_OP2(pcmpgtl),
2862     [0x67] = MMX_OP2(packuswb),
2863     [0x68] = MMX_OP2(punpckhbw),
2864     [0x69] = MMX_OP2(punpckhwd),
2865     [0x6a] = MMX_OP2(punpckhdq),
2866     [0x6b] = MMX_OP2(packssdw),
2867     [0x6c] = { NULL, gen_helper_punpcklqdq_xmm },
2868     [0x6d] = { NULL, gen_helper_punpckhqdq_xmm },
2869     [0x6e] = { SSE_SPECIAL, SSE_SPECIAL }, /* movd mm, ea */
2870     [0x6f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, , movqdu */
2871     [0x70] = { (SSEFunc_0_epp)gen_helper_pshufw_mmx,
2872                (SSEFunc_0_epp)gen_helper_pshufd_xmm,
2873                (SSEFunc_0_epp)gen_helper_pshufhw_xmm,
2874                (SSEFunc_0_epp)gen_helper_pshuflw_xmm }, /* XXX: casts */
2875     [0x71] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftw */
2876     [0x72] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftd */
2877     [0x73] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftq */
2878     [0x74] = MMX_OP2(pcmpeqb),
2879     [0x75] = MMX_OP2(pcmpeqw),
2880     [0x76] = MMX_OP2(pcmpeql),
2881     [0x77] = { SSE_DUMMY }, /* emms */
2882     [0x78] = { NULL, SSE_SPECIAL, NULL, SSE_SPECIAL }, /* extrq_i, insertq_i */
2883     [0x79] = { NULL, gen_helper_extrq_r, NULL, gen_helper_insertq_r },
2884     [0x7c] = { NULL, gen_helper_haddpd, NULL, gen_helper_haddps },
2885     [0x7d] = { NULL, gen_helper_hsubpd, NULL, gen_helper_hsubps },
2886     [0x7e] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movd, movd, , movq */
2887     [0x7f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, movdqu */
2888     [0xc4] = { SSE_SPECIAL, SSE_SPECIAL }, /* pinsrw */
2889     [0xc5] = { SSE_SPECIAL, SSE_SPECIAL }, /* pextrw */
2890     [0xd0] = { NULL, gen_helper_addsubpd, NULL, gen_helper_addsubps },
2891     [0xd1] = MMX_OP2(psrlw),
2892     [0xd2] = MMX_OP2(psrld),
2893     [0xd3] = MMX_OP2(psrlq),
2894     [0xd4] = MMX_OP2(paddq),
2895     [0xd5] = MMX_OP2(pmullw),
2896     [0xd6] = { NULL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2897     [0xd7] = { SSE_SPECIAL, SSE_SPECIAL }, /* pmovmskb */
2898     [0xd8] = MMX_OP2(psubusb),
2899     [0xd9] = MMX_OP2(psubusw),
2900     [0xda] = MMX_OP2(pminub),
2901     [0xdb] = MMX_OP2(pand),
2902     [0xdc] = MMX_OP2(paddusb),
2903     [0xdd] = MMX_OP2(paddusw),
2904     [0xde] = MMX_OP2(pmaxub),
2905     [0xdf] = MMX_OP2(pandn),
2906     [0xe0] = MMX_OP2(pavgb),
2907     [0xe1] = MMX_OP2(psraw),
2908     [0xe2] = MMX_OP2(psrad),
2909     [0xe3] = MMX_OP2(pavgw),
2910     [0xe4] = MMX_OP2(pmulhuw),
2911     [0xe5] = MMX_OP2(pmulhw),
2912     [0xe6] = { NULL, gen_helper_cvttpd2dq, gen_helper_cvtdq2pd, gen_helper_cvtpd2dq },
2913     [0xe7] = { SSE_SPECIAL , SSE_SPECIAL },  /* movntq, movntq */
2914     [0xe8] = MMX_OP2(psubsb),
2915     [0xe9] = MMX_OP2(psubsw),
2916     [0xea] = MMX_OP2(pminsw),
2917     [0xeb] = MMX_OP2(por),
2918     [0xec] = MMX_OP2(paddsb),
2919     [0xed] = MMX_OP2(paddsw),
2920     [0xee] = MMX_OP2(pmaxsw),
2921     [0xef] = MMX_OP2(pxor),
2922     [0xf0] = { NULL, NULL, NULL, SSE_SPECIAL }, /* lddqu */
2923     [0xf1] = MMX_OP2(psllw),
2924     [0xf2] = MMX_OP2(pslld),
2925     [0xf3] = MMX_OP2(psllq),
2926     [0xf4] = MMX_OP2(pmuludq),
2927     [0xf5] = MMX_OP2(pmaddwd),
2928     [0xf6] = MMX_OP2(psadbw),
2929     [0xf7] = { (SSEFunc_0_epp)gen_helper_maskmov_mmx,
2930                (SSEFunc_0_epp)gen_helper_maskmov_xmm }, /* XXX: casts */
2931     [0xf8] = MMX_OP2(psubb),
2932     [0xf9] = MMX_OP2(psubw),
2933     [0xfa] = MMX_OP2(psubl),
2934     [0xfb] = MMX_OP2(psubq),
2935     [0xfc] = MMX_OP2(paddb),
2936     [0xfd] = MMX_OP2(paddw),
2937     [0xfe] = MMX_OP2(paddl),
2938 };
2939 
2940 static const SSEFunc_0_epp sse_op_table2[3 * 8][2] = {
2941     [0 + 2] = MMX_OP2(psrlw),
2942     [0 + 4] = MMX_OP2(psraw),
2943     [0 + 6] = MMX_OP2(psllw),
2944     [8 + 2] = MMX_OP2(psrld),
2945     [8 + 4] = MMX_OP2(psrad),
2946     [8 + 6] = MMX_OP2(pslld),
2947     [16 + 2] = MMX_OP2(psrlq),
2948     [16 + 3] = { NULL, gen_helper_psrldq_xmm },
2949     [16 + 6] = MMX_OP2(psllq),
2950     [16 + 7] = { NULL, gen_helper_pslldq_xmm },
2951 };
2952 
2953 static const SSEFunc_0_epi sse_op_table3ai[] = {
2954     gen_helper_cvtsi2ss,
2955     gen_helper_cvtsi2sd
2956 };
2957 
2958 #ifdef TARGET_X86_64
2959 static const SSEFunc_0_epl sse_op_table3aq[] = {
2960     gen_helper_cvtsq2ss,
2961     gen_helper_cvtsq2sd
2962 };
2963 #endif
2964 
2965 static const SSEFunc_i_ep sse_op_table3bi[] = {
2966     gen_helper_cvttss2si,
2967     gen_helper_cvtss2si,
2968     gen_helper_cvttsd2si,
2969     gen_helper_cvtsd2si
2970 };
2971 
2972 #ifdef TARGET_X86_64
2973 static const SSEFunc_l_ep sse_op_table3bq[] = {
2974     gen_helper_cvttss2sq,
2975     gen_helper_cvtss2sq,
2976     gen_helper_cvttsd2sq,
2977     gen_helper_cvtsd2sq
2978 };
2979 #endif
2980 
2981 static const SSEFunc_0_epp sse_op_table4[8][4] = {
2982     SSE_FOP(cmpeq),
2983     SSE_FOP(cmplt),
2984     SSE_FOP(cmple),
2985     SSE_FOP(cmpunord),
2986     SSE_FOP(cmpneq),
2987     SSE_FOP(cmpnlt),
2988     SSE_FOP(cmpnle),
2989     SSE_FOP(cmpord),
2990 };
2991 
2992 static const SSEFunc_0_epp sse_op_table5[256] = {
2993     [0x0c] = gen_helper_pi2fw,
2994     [0x0d] = gen_helper_pi2fd,
2995     [0x1c] = gen_helper_pf2iw,
2996     [0x1d] = gen_helper_pf2id,
2997     [0x8a] = gen_helper_pfnacc,
2998     [0x8e] = gen_helper_pfpnacc,
2999     [0x90] = gen_helper_pfcmpge,
3000     [0x94] = gen_helper_pfmin,
3001     [0x96] = gen_helper_pfrcp,
3002     [0x97] = gen_helper_pfrsqrt,
3003     [0x9a] = gen_helper_pfsub,
3004     [0x9e] = gen_helper_pfadd,
3005     [0xa0] = gen_helper_pfcmpgt,
3006     [0xa4] = gen_helper_pfmax,
3007     [0xa6] = gen_helper_movq, /* pfrcpit1; no need to actually increase precision */
3008     [0xa7] = gen_helper_movq, /* pfrsqit1 */
3009     [0xaa] = gen_helper_pfsubr,
3010     [0xae] = gen_helper_pfacc,
3011     [0xb0] = gen_helper_pfcmpeq,
3012     [0xb4] = gen_helper_pfmul,
3013     [0xb6] = gen_helper_movq, /* pfrcpit2 */
3014     [0xb7] = gen_helper_pmulhrw_mmx,
3015     [0xbb] = gen_helper_pswapd,
3016     [0xbf] = gen_helper_pavgb_mmx /* pavgusb */
3017 };
3018 
3019 struct SSEOpHelper_epp {
3020     SSEFunc_0_epp op[2];
3021     uint32_t ext_mask;
3022 };
3023 
3024 struct SSEOpHelper_eppi {
3025     SSEFunc_0_eppi op[2];
3026     uint32_t ext_mask;
3027 };
3028 
3029 #define SSSE3_OP(x) { MMX_OP2(x), CPUID_EXT_SSSE3 }
3030 #define SSE41_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE41 }
3031 #define SSE42_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE42 }
3032 #define SSE41_SPECIAL { { NULL, SSE_SPECIAL }, CPUID_EXT_SSE41 }
3033 #define PCLMULQDQ_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, \
3034         CPUID_EXT_PCLMULQDQ }
3035 #define AESNI_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_AES }
3036 
3037 static const struct SSEOpHelper_epp sse_op_table6[256] = {
3038     [0x00] = SSSE3_OP(pshufb),
3039     [0x01] = SSSE3_OP(phaddw),
3040     [0x02] = SSSE3_OP(phaddd),
3041     [0x03] = SSSE3_OP(phaddsw),
3042     [0x04] = SSSE3_OP(pmaddubsw),
3043     [0x05] = SSSE3_OP(phsubw),
3044     [0x06] = SSSE3_OP(phsubd),
3045     [0x07] = SSSE3_OP(phsubsw),
3046     [0x08] = SSSE3_OP(psignb),
3047     [0x09] = SSSE3_OP(psignw),
3048     [0x0a] = SSSE3_OP(psignd),
3049     [0x0b] = SSSE3_OP(pmulhrsw),
3050     [0x10] = SSE41_OP(pblendvb),
3051     [0x14] = SSE41_OP(blendvps),
3052     [0x15] = SSE41_OP(blendvpd),
3053     [0x17] = SSE41_OP(ptest),
3054     [0x1c] = SSSE3_OP(pabsb),
3055     [0x1d] = SSSE3_OP(pabsw),
3056     [0x1e] = SSSE3_OP(pabsd),
3057     [0x20] = SSE41_OP(pmovsxbw),
3058     [0x21] = SSE41_OP(pmovsxbd),
3059     [0x22] = SSE41_OP(pmovsxbq),
3060     [0x23] = SSE41_OP(pmovsxwd),
3061     [0x24] = SSE41_OP(pmovsxwq),
3062     [0x25] = SSE41_OP(pmovsxdq),
3063     [0x28] = SSE41_OP(pmuldq),
3064     [0x29] = SSE41_OP(pcmpeqq),
3065     [0x2a] = SSE41_SPECIAL, /* movntqda */
3066     [0x2b] = SSE41_OP(packusdw),
3067     [0x30] = SSE41_OP(pmovzxbw),
3068     [0x31] = SSE41_OP(pmovzxbd),
3069     [0x32] = SSE41_OP(pmovzxbq),
3070     [0x33] = SSE41_OP(pmovzxwd),
3071     [0x34] = SSE41_OP(pmovzxwq),
3072     [0x35] = SSE41_OP(pmovzxdq),
3073     [0x37] = SSE42_OP(pcmpgtq),
3074     [0x38] = SSE41_OP(pminsb),
3075     [0x39] = SSE41_OP(pminsd),
3076     [0x3a] = SSE41_OP(pminuw),
3077     [0x3b] = SSE41_OP(pminud),
3078     [0x3c] = SSE41_OP(pmaxsb),
3079     [0x3d] = SSE41_OP(pmaxsd),
3080     [0x3e] = SSE41_OP(pmaxuw),
3081     [0x3f] = SSE41_OP(pmaxud),
3082     [0x40] = SSE41_OP(pmulld),
3083     [0x41] = SSE41_OP(phminposuw),
3084     [0xdb] = AESNI_OP(aesimc),
3085     [0xdc] = AESNI_OP(aesenc),
3086     [0xdd] = AESNI_OP(aesenclast),
3087     [0xde] = AESNI_OP(aesdec),
3088     [0xdf] = AESNI_OP(aesdeclast),
3089 };
3090 
3091 static const struct SSEOpHelper_eppi sse_op_table7[256] = {
3092     [0x08] = SSE41_OP(roundps),
3093     [0x09] = SSE41_OP(roundpd),
3094     [0x0a] = SSE41_OP(roundss),
3095     [0x0b] = SSE41_OP(roundsd),
3096     [0x0c] = SSE41_OP(blendps),
3097     [0x0d] = SSE41_OP(blendpd),
3098     [0x0e] = SSE41_OP(pblendw),
3099     [0x0f] = SSSE3_OP(palignr),
3100     [0x14] = SSE41_SPECIAL, /* pextrb */
3101     [0x15] = SSE41_SPECIAL, /* pextrw */
3102     [0x16] = SSE41_SPECIAL, /* pextrd/pextrq */
3103     [0x17] = SSE41_SPECIAL, /* extractps */
3104     [0x20] = SSE41_SPECIAL, /* pinsrb */
3105     [0x21] = SSE41_SPECIAL, /* insertps */
3106     [0x22] = SSE41_SPECIAL, /* pinsrd/pinsrq */
3107     [0x40] = SSE41_OP(dpps),
3108     [0x41] = SSE41_OP(dppd),
3109     [0x42] = SSE41_OP(mpsadbw),
3110     [0x44] = PCLMULQDQ_OP(pclmulqdq),
3111     [0x60] = SSE42_OP(pcmpestrm),
3112     [0x61] = SSE42_OP(pcmpestri),
3113     [0x62] = SSE42_OP(pcmpistrm),
3114     [0x63] = SSE42_OP(pcmpistri),
3115     [0xdf] = AESNI_OP(aeskeygenassist),
3116 };
3117 
3118 static void gen_sse(CPUX86State *env, DisasContext *s, int b,
3119                     target_ulong pc_start)
3120 {
3121     int b1, op1_offset, op2_offset, is_xmm, val;
3122     int modrm, mod, rm, reg;
3123     SSEFunc_0_epp sse_fn_epp;
3124     SSEFunc_0_eppi sse_fn_eppi;
3125     SSEFunc_0_ppi sse_fn_ppi;
3126     SSEFunc_0_eppt sse_fn_eppt;
3127     MemOp ot;
3128 
3129     b &= 0xff;
3130     if (s->prefix & PREFIX_DATA)
3131         b1 = 1;
3132     else if (s->prefix & PREFIX_REPZ)
3133         b1 = 2;
3134     else if (s->prefix & PREFIX_REPNZ)
3135         b1 = 3;
3136     else
3137         b1 = 0;
3138     sse_fn_epp = sse_op_table1[b][b1];
3139     if (!sse_fn_epp) {
3140         goto unknown_op;
3141     }
3142     if ((b <= 0x5f && b >= 0x10) || b == 0xc6 || b == 0xc2) {
3143         is_xmm = 1;
3144     } else {
3145         if (b1 == 0) {
3146             /* MMX case */
3147             is_xmm = 0;
3148         } else {
3149             is_xmm = 1;
3150         }
3151     }
3152     /* simple MMX/SSE operation */
3153     if (s->flags & HF_TS_MASK) {
3154         gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
3155         return;
3156     }
3157     if (s->flags & HF_EM_MASK) {
3158     illegal_op:
3159         gen_illegal_opcode(s);
3160         return;
3161     }
3162     if (is_xmm
3163         && !(s->flags & HF_OSFXSR_MASK)
3164         && (b != 0x38 && b != 0x3a)) {
3165         goto unknown_op;
3166     }
3167     if (b == 0x0e) {
3168         if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) {
3169             /* If we were fully decoding this we might use illegal_op.  */
3170             goto unknown_op;
3171         }
3172         /* femms */
3173         gen_helper_emms(cpu_env);
3174         return;
3175     }
3176     if (b == 0x77) {
3177         /* emms */
3178         gen_helper_emms(cpu_env);
3179         return;
3180     }
3181     /* prepare MMX state (XXX: optimize by storing fptt and fptags in
3182        the static cpu state) */
3183     if (!is_xmm) {
3184         gen_helper_enter_mmx(cpu_env);
3185     }
3186 
3187     modrm = x86_ldub_code(env, s);
3188     reg = ((modrm >> 3) & 7);
3189     if (is_xmm) {
3190         reg |= REX_R(s);
3191     }
3192     mod = (modrm >> 6) & 3;
3193     if (sse_fn_epp == SSE_SPECIAL) {
3194         b |= (b1 << 8);
3195         switch(b) {
3196         case 0x0e7: /* movntq */
3197             if (mod == 3) {
3198                 goto illegal_op;
3199             }
3200             gen_lea_modrm(env, s, modrm);
3201             gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3202             break;
3203         case 0x1e7: /* movntdq */
3204         case 0x02b: /* movntps */
3205         case 0x12b: /* movntps */
3206             if (mod == 3)
3207                 goto illegal_op;
3208             gen_lea_modrm(env, s, modrm);
3209             gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3210             break;
3211         case 0x3f0: /* lddqu */
3212             if (mod == 3)
3213                 goto illegal_op;
3214             gen_lea_modrm(env, s, modrm);
3215             gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3216             break;
3217         case 0x22b: /* movntss */
3218         case 0x32b: /* movntsd */
3219             if (mod == 3)
3220                 goto illegal_op;
3221             gen_lea_modrm(env, s, modrm);
3222             if (b1 & 1) {
3223                 gen_stq_env_A0(s, offsetof(CPUX86State,
3224                                            xmm_regs[reg].ZMM_Q(0)));
3225             } else {
3226                 tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State,
3227                     xmm_regs[reg].ZMM_L(0)));
3228                 gen_op_st_v(s, MO_32, s->T0, s->A0);
3229             }
3230             break;
3231         case 0x6e: /* movd mm, ea */
3232 #ifdef TARGET_X86_64
3233             if (s->dflag == MO_64) {
3234                 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
3235                 tcg_gen_st_tl(s->T0, cpu_env,
3236                               offsetof(CPUX86State, fpregs[reg].mmx));
3237             } else
3238 #endif
3239             {
3240                 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
3241                 tcg_gen_addi_ptr(s->ptr0, cpu_env,
3242                                  offsetof(CPUX86State,fpregs[reg].mmx));
3243                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
3244                 gen_helper_movl_mm_T0_mmx(s->ptr0, s->tmp2_i32);
3245             }
3246             break;
3247         case 0x16e: /* movd xmm, ea */
3248 #ifdef TARGET_X86_64
3249             if (s->dflag == MO_64) {
3250                 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
3251                 tcg_gen_addi_ptr(s->ptr0, cpu_env,
3252                                  offsetof(CPUX86State,xmm_regs[reg]));
3253                 gen_helper_movq_mm_T0_xmm(s->ptr0, s->T0);
3254             } else
3255 #endif
3256             {
3257                 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
3258                 tcg_gen_addi_ptr(s->ptr0, cpu_env,
3259                                  offsetof(CPUX86State,xmm_regs[reg]));
3260                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
3261                 gen_helper_movl_mm_T0_xmm(s->ptr0, s->tmp2_i32);
3262             }
3263             break;
3264         case 0x6f: /* movq mm, ea */
3265             if (mod != 3) {
3266                 gen_lea_modrm(env, s, modrm);
3267                 gen_ldq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3268             } else {
3269                 rm = (modrm & 7);
3270                 tcg_gen_ld_i64(s->tmp1_i64, cpu_env,
3271                                offsetof(CPUX86State,fpregs[rm].mmx));
3272                 tcg_gen_st_i64(s->tmp1_i64, cpu_env,
3273                                offsetof(CPUX86State,fpregs[reg].mmx));
3274             }
3275             break;
3276         case 0x010: /* movups */
3277         case 0x110: /* movupd */
3278         case 0x028: /* movaps */
3279         case 0x128: /* movapd */
3280         case 0x16f: /* movdqa xmm, ea */
3281         case 0x26f: /* movdqu xmm, ea */
3282             if (mod != 3) {
3283                 gen_lea_modrm(env, s, modrm);
3284                 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3285             } else {
3286                 rm = (modrm & 7) | REX_B(s);
3287                 gen_op_movo(s, offsetof(CPUX86State, xmm_regs[reg]),
3288                             offsetof(CPUX86State,xmm_regs[rm]));
3289             }
3290             break;
3291         case 0x210: /* movss xmm, ea */
3292             if (mod != 3) {
3293                 gen_lea_modrm(env, s, modrm);
3294                 gen_op_ld_v(s, MO_32, s->T0, s->A0);
3295                 tcg_gen_st32_tl(s->T0, cpu_env,
3296                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)));
3297                 tcg_gen_movi_tl(s->T0, 0);
3298                 tcg_gen_st32_tl(s->T0, cpu_env,
3299                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1)));
3300                 tcg_gen_st32_tl(s->T0, cpu_env,
3301                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)));
3302                 tcg_gen_st32_tl(s->T0, cpu_env,
3303                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)));
3304             } else {
3305                 rm = (modrm & 7) | REX_B(s);
3306                 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)),
3307                             offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)));
3308             }
3309             break;
3310         case 0x310: /* movsd xmm, ea */
3311             if (mod != 3) {
3312                 gen_lea_modrm(env, s, modrm);
3313                 gen_ldq_env_A0(s, offsetof(CPUX86State,
3314                                            xmm_regs[reg].ZMM_Q(0)));
3315                 tcg_gen_movi_tl(s->T0, 0);
3316                 tcg_gen_st32_tl(s->T0, cpu_env,
3317                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)));
3318                 tcg_gen_st32_tl(s->T0, cpu_env,
3319                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)));
3320             } else {
3321                 rm = (modrm & 7) | REX_B(s);
3322                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3323                             offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3324             }
3325             break;
3326         case 0x012: /* movlps */
3327         case 0x112: /* movlpd */
3328             if (mod != 3) {
3329                 gen_lea_modrm(env, s, modrm);
3330                 gen_ldq_env_A0(s, offsetof(CPUX86State,
3331                                            xmm_regs[reg].ZMM_Q(0)));
3332             } else {
3333                 /* movhlps */
3334                 rm = (modrm & 7) | REX_B(s);
3335                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3336                             offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(1)));
3337             }
3338             break;
3339         case 0x212: /* movsldup */
3340             if (mod != 3) {
3341                 gen_lea_modrm(env, s, modrm);
3342                 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3343             } else {
3344                 rm = (modrm & 7) | REX_B(s);
3345                 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)),
3346                             offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)));
3347                 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)),
3348                             offsetof(CPUX86State,xmm_regs[rm].ZMM_L(2)));
3349             }
3350             gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1)),
3351                         offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3352             gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)),
3353                         offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)));
3354             break;
3355         case 0x312: /* movddup */
3356             if (mod != 3) {
3357                 gen_lea_modrm(env, s, modrm);
3358                 gen_ldq_env_A0(s, offsetof(CPUX86State,
3359                                            xmm_regs[reg].ZMM_Q(0)));
3360             } else {
3361                 rm = (modrm & 7) | REX_B(s);
3362                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3363                             offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3364             }
3365             gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)),
3366                         offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3367             break;
3368         case 0x016: /* movhps */
3369         case 0x116: /* movhpd */
3370             if (mod != 3) {
3371                 gen_lea_modrm(env, s, modrm);
3372                 gen_ldq_env_A0(s, offsetof(CPUX86State,
3373                                            xmm_regs[reg].ZMM_Q(1)));
3374             } else {
3375                 /* movlhps */
3376                 rm = (modrm & 7) | REX_B(s);
3377                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)),
3378                             offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3379             }
3380             break;
3381         case 0x216: /* movshdup */
3382             if (mod != 3) {
3383                 gen_lea_modrm(env, s, modrm);
3384                 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3385             } else {
3386                 rm = (modrm & 7) | REX_B(s);
3387                 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1)),
3388                             offsetof(CPUX86State,xmm_regs[rm].ZMM_L(1)));
3389                 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)),
3390                             offsetof(CPUX86State,xmm_regs[rm].ZMM_L(3)));
3391             }
3392             gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)),
3393                         offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)));
3394             gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)),
3395                         offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)));
3396             break;
3397         case 0x178:
3398         case 0x378:
3399             {
3400                 int bit_index, field_length;
3401 
3402                 if (b1 == 1 && reg != 0)
3403                     goto illegal_op;
3404                 field_length = x86_ldub_code(env, s) & 0x3F;
3405                 bit_index = x86_ldub_code(env, s) & 0x3F;
3406                 tcg_gen_addi_ptr(s->ptr0, cpu_env,
3407                     offsetof(CPUX86State,xmm_regs[reg]));
3408                 if (b1 == 1)
3409                     gen_helper_extrq_i(cpu_env, s->ptr0,
3410                                        tcg_const_i32(bit_index),
3411                                        tcg_const_i32(field_length));
3412                 else
3413                     gen_helper_insertq_i(cpu_env, s->ptr0,
3414                                          tcg_const_i32(bit_index),
3415                                          tcg_const_i32(field_length));
3416             }
3417             break;
3418         case 0x7e: /* movd ea, mm */
3419 #ifdef TARGET_X86_64
3420             if (s->dflag == MO_64) {
3421                 tcg_gen_ld_i64(s->T0, cpu_env,
3422                                offsetof(CPUX86State,fpregs[reg].mmx));
3423                 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
3424             } else
3425 #endif
3426             {
3427                 tcg_gen_ld32u_tl(s->T0, cpu_env,
3428                                  offsetof(CPUX86State,fpregs[reg].mmx.MMX_L(0)));
3429                 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
3430             }
3431             break;
3432         case 0x17e: /* movd ea, xmm */
3433 #ifdef TARGET_X86_64
3434             if (s->dflag == MO_64) {
3435                 tcg_gen_ld_i64(s->T0, cpu_env,
3436                                offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3437                 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
3438             } else
3439 #endif
3440             {
3441                 tcg_gen_ld32u_tl(s->T0, cpu_env,
3442                                  offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3443                 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
3444             }
3445             break;
3446         case 0x27e: /* movq xmm, ea */
3447             if (mod != 3) {
3448                 gen_lea_modrm(env, s, modrm);
3449                 gen_ldq_env_A0(s, offsetof(CPUX86State,
3450                                            xmm_regs[reg].ZMM_Q(0)));
3451             } else {
3452                 rm = (modrm & 7) | REX_B(s);
3453                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3454                             offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3455             }
3456             gen_op_movq_env_0(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)));
3457             break;
3458         case 0x7f: /* movq ea, mm */
3459             if (mod != 3) {
3460                 gen_lea_modrm(env, s, modrm);
3461                 gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3462             } else {
3463                 rm = (modrm & 7);
3464                 gen_op_movq(s, offsetof(CPUX86State, fpregs[rm].mmx),
3465                             offsetof(CPUX86State,fpregs[reg].mmx));
3466             }
3467             break;
3468         case 0x011: /* movups */
3469         case 0x111: /* movupd */
3470         case 0x029: /* movaps */
3471         case 0x129: /* movapd */
3472         case 0x17f: /* movdqa ea, xmm */
3473         case 0x27f: /* movdqu ea, xmm */
3474             if (mod != 3) {
3475                 gen_lea_modrm(env, s, modrm);
3476                 gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3477             } else {
3478                 rm = (modrm & 7) | REX_B(s);
3479                 gen_op_movo(s, offsetof(CPUX86State, xmm_regs[rm]),
3480                             offsetof(CPUX86State,xmm_regs[reg]));
3481             }
3482             break;
3483         case 0x211: /* movss ea, xmm */
3484             if (mod != 3) {
3485                 gen_lea_modrm(env, s, modrm);
3486                 tcg_gen_ld32u_tl(s->T0, cpu_env,
3487                                  offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)));
3488                 gen_op_st_v(s, MO_32, s->T0, s->A0);
3489             } else {
3490                 rm = (modrm & 7) | REX_B(s);
3491                 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[rm].ZMM_L(0)),
3492                             offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3493             }
3494             break;
3495         case 0x311: /* movsd ea, xmm */
3496             if (mod != 3) {
3497                 gen_lea_modrm(env, s, modrm);
3498                 gen_stq_env_A0(s, offsetof(CPUX86State,
3499                                            xmm_regs[reg].ZMM_Q(0)));
3500             } else {
3501                 rm = (modrm & 7) | REX_B(s);
3502                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[rm].ZMM_Q(0)),
3503                             offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3504             }
3505             break;
3506         case 0x013: /* movlps */
3507         case 0x113: /* movlpd */
3508             if (mod != 3) {
3509                 gen_lea_modrm(env, s, modrm);
3510                 gen_stq_env_A0(s, offsetof(CPUX86State,
3511                                            xmm_regs[reg].ZMM_Q(0)));
3512             } else {
3513                 goto illegal_op;
3514             }
3515             break;
3516         case 0x017: /* movhps */
3517         case 0x117: /* movhpd */
3518             if (mod != 3) {
3519                 gen_lea_modrm(env, s, modrm);
3520                 gen_stq_env_A0(s, offsetof(CPUX86State,
3521                                            xmm_regs[reg].ZMM_Q(1)));
3522             } else {
3523                 goto illegal_op;
3524             }
3525             break;
3526         case 0x71: /* shift mm, im */
3527         case 0x72:
3528         case 0x73:
3529         case 0x171: /* shift xmm, im */
3530         case 0x172:
3531         case 0x173:
3532             if (b1 >= 2) {
3533                 goto unknown_op;
3534             }
3535             val = x86_ldub_code(env, s);
3536             if (is_xmm) {
3537                 tcg_gen_movi_tl(s->T0, val);
3538                 tcg_gen_st32_tl(s->T0, cpu_env,
3539                                 offsetof(CPUX86State, xmm_t0.ZMM_L(0)));
3540                 tcg_gen_movi_tl(s->T0, 0);
3541                 tcg_gen_st32_tl(s->T0, cpu_env,
3542                                 offsetof(CPUX86State, xmm_t0.ZMM_L(1)));
3543                 op1_offset = offsetof(CPUX86State,xmm_t0);
3544             } else {
3545                 tcg_gen_movi_tl(s->T0, val);
3546                 tcg_gen_st32_tl(s->T0, cpu_env,
3547                                 offsetof(CPUX86State, mmx_t0.MMX_L(0)));
3548                 tcg_gen_movi_tl(s->T0, 0);
3549                 tcg_gen_st32_tl(s->T0, cpu_env,
3550                                 offsetof(CPUX86State, mmx_t0.MMX_L(1)));
3551                 op1_offset = offsetof(CPUX86State,mmx_t0);
3552             }
3553             sse_fn_epp = sse_op_table2[((b - 1) & 3) * 8 +
3554                                        (((modrm >> 3)) & 7)][b1];
3555             if (!sse_fn_epp) {
3556                 goto unknown_op;
3557             }
3558             if (is_xmm) {
3559                 rm = (modrm & 7) | REX_B(s);
3560                 op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3561             } else {
3562                 rm = (modrm & 7);
3563                 op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3564             }
3565             tcg_gen_addi_ptr(s->ptr0, cpu_env, op2_offset);
3566             tcg_gen_addi_ptr(s->ptr1, cpu_env, op1_offset);
3567             sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
3568             break;
3569         case 0x050: /* movmskps */
3570             rm = (modrm & 7) | REX_B(s);
3571             tcg_gen_addi_ptr(s->ptr0, cpu_env,
3572                              offsetof(CPUX86State,xmm_regs[rm]));
3573             gen_helper_movmskps(s->tmp2_i32, cpu_env, s->ptr0);
3574             tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
3575             break;
3576         case 0x150: /* movmskpd */
3577             rm = (modrm & 7) | REX_B(s);
3578             tcg_gen_addi_ptr(s->ptr0, cpu_env,
3579                              offsetof(CPUX86State,xmm_regs[rm]));
3580             gen_helper_movmskpd(s->tmp2_i32, cpu_env, s->ptr0);
3581             tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
3582             break;
3583         case 0x02a: /* cvtpi2ps */
3584         case 0x12a: /* cvtpi2pd */
3585             gen_helper_enter_mmx(cpu_env);
3586             if (mod != 3) {
3587                 gen_lea_modrm(env, s, modrm);
3588                 op2_offset = offsetof(CPUX86State,mmx_t0);
3589                 gen_ldq_env_A0(s, op2_offset);
3590             } else {
3591                 rm = (modrm & 7);
3592                 op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3593             }
3594             op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3595             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
3596             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
3597             switch(b >> 8) {
3598             case 0x0:
3599                 gen_helper_cvtpi2ps(cpu_env, s->ptr0, s->ptr1);
3600                 break;
3601             default:
3602             case 0x1:
3603                 gen_helper_cvtpi2pd(cpu_env, s->ptr0, s->ptr1);
3604                 break;
3605             }
3606             break;
3607         case 0x22a: /* cvtsi2ss */
3608         case 0x32a: /* cvtsi2sd */
3609             ot = mo_64_32(s->dflag);
3610             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3611             op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3612             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
3613             if (ot == MO_32) {
3614                 SSEFunc_0_epi sse_fn_epi = sse_op_table3ai[(b >> 8) & 1];
3615                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
3616                 sse_fn_epi(cpu_env, s->ptr0, s->tmp2_i32);
3617             } else {
3618 #ifdef TARGET_X86_64
3619                 SSEFunc_0_epl sse_fn_epl = sse_op_table3aq[(b >> 8) & 1];
3620                 sse_fn_epl(cpu_env, s->ptr0, s->T0);
3621 #else
3622                 goto illegal_op;
3623 #endif
3624             }
3625             break;
3626         case 0x02c: /* cvttps2pi */
3627         case 0x12c: /* cvttpd2pi */
3628         case 0x02d: /* cvtps2pi */
3629         case 0x12d: /* cvtpd2pi */
3630             gen_helper_enter_mmx(cpu_env);
3631             if (mod != 3) {
3632                 gen_lea_modrm(env, s, modrm);
3633                 op2_offset = offsetof(CPUX86State,xmm_t0);
3634                 gen_ldo_env_A0(s, op2_offset);
3635             } else {
3636                 rm = (modrm & 7) | REX_B(s);
3637                 op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3638             }
3639             op1_offset = offsetof(CPUX86State,fpregs[reg & 7].mmx);
3640             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
3641             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
3642             switch(b) {
3643             case 0x02c:
3644                 gen_helper_cvttps2pi(cpu_env, s->ptr0, s->ptr1);
3645                 break;
3646             case 0x12c:
3647                 gen_helper_cvttpd2pi(cpu_env, s->ptr0, s->ptr1);
3648                 break;
3649             case 0x02d:
3650                 gen_helper_cvtps2pi(cpu_env, s->ptr0, s->ptr1);
3651                 break;
3652             case 0x12d:
3653                 gen_helper_cvtpd2pi(cpu_env, s->ptr0, s->ptr1);
3654                 break;
3655             }
3656             break;
3657         case 0x22c: /* cvttss2si */
3658         case 0x32c: /* cvttsd2si */
3659         case 0x22d: /* cvtss2si */
3660         case 0x32d: /* cvtsd2si */
3661             ot = mo_64_32(s->dflag);
3662             if (mod != 3) {
3663                 gen_lea_modrm(env, s, modrm);
3664                 if ((b >> 8) & 1) {
3665                     gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.ZMM_Q(0)));
3666                 } else {
3667                     gen_op_ld_v(s, MO_32, s->T0, s->A0);
3668                     tcg_gen_st32_tl(s->T0, cpu_env,
3669                                     offsetof(CPUX86State, xmm_t0.ZMM_L(0)));
3670                 }
3671                 op2_offset = offsetof(CPUX86State,xmm_t0);
3672             } else {
3673                 rm = (modrm & 7) | REX_B(s);
3674                 op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3675             }
3676             tcg_gen_addi_ptr(s->ptr0, cpu_env, op2_offset);
3677             if (ot == MO_32) {
3678                 SSEFunc_i_ep sse_fn_i_ep =
3679                     sse_op_table3bi[((b >> 7) & 2) | (b & 1)];
3680                 sse_fn_i_ep(s->tmp2_i32, cpu_env, s->ptr0);
3681                 tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
3682             } else {
3683 #ifdef TARGET_X86_64
3684                 SSEFunc_l_ep sse_fn_l_ep =
3685                     sse_op_table3bq[((b >> 7) & 2) | (b & 1)];
3686                 sse_fn_l_ep(s->T0, cpu_env, s->ptr0);
3687 #else
3688                 goto illegal_op;
3689 #endif
3690             }
3691             gen_op_mov_reg_v(s, ot, reg, s->T0);
3692             break;
3693         case 0xc4: /* pinsrw */
3694         case 0x1c4:
3695             s->rip_offset = 1;
3696             gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
3697             val = x86_ldub_code(env, s);
3698             if (b1) {
3699                 val &= 7;
3700                 tcg_gen_st16_tl(s->T0, cpu_env,
3701                                 offsetof(CPUX86State,xmm_regs[reg].ZMM_W(val)));
3702             } else {
3703                 val &= 3;
3704                 tcg_gen_st16_tl(s->T0, cpu_env,
3705                                 offsetof(CPUX86State,fpregs[reg].mmx.MMX_W(val)));
3706             }
3707             break;
3708         case 0xc5: /* pextrw */
3709         case 0x1c5:
3710             if (mod != 3)
3711                 goto illegal_op;
3712             ot = mo_64_32(s->dflag);
3713             val = x86_ldub_code(env, s);
3714             if (b1) {
3715                 val &= 7;
3716                 rm = (modrm & 7) | REX_B(s);
3717                 tcg_gen_ld16u_tl(s->T0, cpu_env,
3718                                  offsetof(CPUX86State,xmm_regs[rm].ZMM_W(val)));
3719             } else {
3720                 val &= 3;
3721                 rm = (modrm & 7);
3722                 tcg_gen_ld16u_tl(s->T0, cpu_env,
3723                                 offsetof(CPUX86State,fpregs[rm].mmx.MMX_W(val)));
3724             }
3725             reg = ((modrm >> 3) & 7) | REX_R(s);
3726             gen_op_mov_reg_v(s, ot, reg, s->T0);
3727             break;
3728         case 0x1d6: /* movq ea, xmm */
3729             if (mod != 3) {
3730                 gen_lea_modrm(env, s, modrm);
3731                 gen_stq_env_A0(s, offsetof(CPUX86State,
3732                                            xmm_regs[reg].ZMM_Q(0)));
3733             } else {
3734                 rm = (modrm & 7) | REX_B(s);
3735                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[rm].ZMM_Q(0)),
3736                             offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3737                 gen_op_movq_env_0(s,
3738                                   offsetof(CPUX86State, xmm_regs[rm].ZMM_Q(1)));
3739             }
3740             break;
3741         case 0x2d6: /* movq2dq */
3742             gen_helper_enter_mmx(cpu_env);
3743             rm = (modrm & 7);
3744             gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3745                         offsetof(CPUX86State,fpregs[rm].mmx));
3746             gen_op_movq_env_0(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)));
3747             break;
3748         case 0x3d6: /* movdq2q */
3749             gen_helper_enter_mmx(cpu_env);
3750             rm = (modrm & 7) | REX_B(s);
3751             gen_op_movq(s, offsetof(CPUX86State, fpregs[reg & 7].mmx),
3752                         offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3753             break;
3754         case 0xd7: /* pmovmskb */
3755         case 0x1d7:
3756             if (mod != 3)
3757                 goto illegal_op;
3758             if (b1) {
3759                 rm = (modrm & 7) | REX_B(s);
3760                 tcg_gen_addi_ptr(s->ptr0, cpu_env,
3761                                  offsetof(CPUX86State, xmm_regs[rm]));
3762                 gen_helper_pmovmskb_xmm(s->tmp2_i32, cpu_env, s->ptr0);
3763             } else {
3764                 rm = (modrm & 7);
3765                 tcg_gen_addi_ptr(s->ptr0, cpu_env,
3766                                  offsetof(CPUX86State, fpregs[rm].mmx));
3767                 gen_helper_pmovmskb_mmx(s->tmp2_i32, cpu_env, s->ptr0);
3768             }
3769             reg = ((modrm >> 3) & 7) | REX_R(s);
3770             tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
3771             break;
3772 
3773         case 0x138:
3774         case 0x038:
3775             b = modrm;
3776             if ((b & 0xf0) == 0xf0) {
3777                 goto do_0f_38_fx;
3778             }
3779             modrm = x86_ldub_code(env, s);
3780             rm = modrm & 7;
3781             reg = ((modrm >> 3) & 7) | REX_R(s);
3782             mod = (modrm >> 6) & 3;
3783             if (b1 >= 2) {
3784                 goto unknown_op;
3785             }
3786 
3787             sse_fn_epp = sse_op_table6[b].op[b1];
3788             if (!sse_fn_epp) {
3789                 goto unknown_op;
3790             }
3791             if (!(s->cpuid_ext_features & sse_op_table6[b].ext_mask))
3792                 goto illegal_op;
3793 
3794             if (b1) {
3795                 op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3796                 if (mod == 3) {
3797                     op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
3798                 } else {
3799                     op2_offset = offsetof(CPUX86State,xmm_t0);
3800                     gen_lea_modrm(env, s, modrm);
3801                     switch (b) {
3802                     case 0x20: case 0x30: /* pmovsxbw, pmovzxbw */
3803                     case 0x23: case 0x33: /* pmovsxwd, pmovzxwd */
3804                     case 0x25: case 0x35: /* pmovsxdq, pmovzxdq */
3805                         gen_ldq_env_A0(s, op2_offset +
3806                                         offsetof(ZMMReg, ZMM_Q(0)));
3807                         break;
3808                     case 0x21: case 0x31: /* pmovsxbd, pmovzxbd */
3809                     case 0x24: case 0x34: /* pmovsxwq, pmovzxwq */
3810                         tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
3811                                             s->mem_index, MO_LEUL);
3812                         tcg_gen_st_i32(s->tmp2_i32, cpu_env, op2_offset +
3813                                         offsetof(ZMMReg, ZMM_L(0)));
3814                         break;
3815                     case 0x22: case 0x32: /* pmovsxbq, pmovzxbq */
3816                         tcg_gen_qemu_ld_tl(s->tmp0, s->A0,
3817                                            s->mem_index, MO_LEUW);
3818                         tcg_gen_st16_tl(s->tmp0, cpu_env, op2_offset +
3819                                         offsetof(ZMMReg, ZMM_W(0)));
3820                         break;
3821                     case 0x2a:            /* movntqda */
3822                         gen_ldo_env_A0(s, op1_offset);
3823                         return;
3824                     default:
3825                         gen_ldo_env_A0(s, op2_offset);
3826                     }
3827                 }
3828             } else {
3829                 op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
3830                 if (mod == 3) {
3831                     op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3832                 } else {
3833                     op2_offset = offsetof(CPUX86State,mmx_t0);
3834                     gen_lea_modrm(env, s, modrm);
3835                     gen_ldq_env_A0(s, op2_offset);
3836                 }
3837             }
3838             if (sse_fn_epp == SSE_SPECIAL) {
3839                 goto unknown_op;
3840             }
3841 
3842             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
3843             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
3844             sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
3845 
3846             if (b == 0x17) {
3847                 set_cc_op(s, CC_OP_EFLAGS);
3848             }
3849             break;
3850 
3851         case 0x238:
3852         case 0x338:
3853         do_0f_38_fx:
3854             /* Various integer extensions at 0f 38 f[0-f].  */
3855             b = modrm | (b1 << 8);
3856             modrm = x86_ldub_code(env, s);
3857             reg = ((modrm >> 3) & 7) | REX_R(s);
3858 
3859             switch (b) {
3860             case 0x3f0: /* crc32 Gd,Eb */
3861             case 0x3f1: /* crc32 Gd,Ey */
3862             do_crc32:
3863                 if (!(s->cpuid_ext_features & CPUID_EXT_SSE42)) {
3864                     goto illegal_op;
3865                 }
3866                 if ((b & 0xff) == 0xf0) {
3867                     ot = MO_8;
3868                 } else if (s->dflag != MO_64) {
3869                     ot = (s->prefix & PREFIX_DATA ? MO_16 : MO_32);
3870                 } else {
3871                     ot = MO_64;
3872                 }
3873 
3874                 tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[reg]);
3875                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3876                 gen_helper_crc32(s->T0, s->tmp2_i32,
3877                                  s->T0, tcg_const_i32(8 << ot));
3878 
3879                 ot = mo_64_32(s->dflag);
3880                 gen_op_mov_reg_v(s, ot, reg, s->T0);
3881                 break;
3882 
3883             case 0x1f0: /* crc32 or movbe */
3884             case 0x1f1:
3885                 /* For these insns, the f3 prefix is supposed to have priority
3886                    over the 66 prefix, but that's not what we implement above
3887                    setting b1.  */
3888                 if (s->prefix & PREFIX_REPNZ) {
3889                     goto do_crc32;
3890                 }
3891                 /* FALLTHRU */
3892             case 0x0f0: /* movbe Gy,My */
3893             case 0x0f1: /* movbe My,Gy */
3894                 if (!(s->cpuid_ext_features & CPUID_EXT_MOVBE)) {
3895                     goto illegal_op;
3896                 }
3897                 if (s->dflag != MO_64) {
3898                     ot = (s->prefix & PREFIX_DATA ? MO_16 : MO_32);
3899                 } else {
3900                     ot = MO_64;
3901                 }
3902 
3903                 gen_lea_modrm(env, s, modrm);
3904                 if ((b & 1) == 0) {
3905                     tcg_gen_qemu_ld_tl(s->T0, s->A0,
3906                                        s->mem_index, ot | MO_BE);
3907                     gen_op_mov_reg_v(s, ot, reg, s->T0);
3908                 } else {
3909                     tcg_gen_qemu_st_tl(cpu_regs[reg], s->A0,
3910                                        s->mem_index, ot | MO_BE);
3911                 }
3912                 break;
3913 
3914             case 0x0f2: /* andn Gy, By, Ey */
3915                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
3916                     || !(s->prefix & PREFIX_VEX)
3917                     || s->vex_l != 0) {
3918                     goto illegal_op;
3919                 }
3920                 ot = mo_64_32(s->dflag);
3921                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3922                 tcg_gen_andc_tl(s->T0, s->T0, cpu_regs[s->vex_v]);
3923                 gen_op_mov_reg_v(s, ot, reg, s->T0);
3924                 gen_op_update1_cc(s);
3925                 set_cc_op(s, CC_OP_LOGICB + ot);
3926                 break;
3927 
3928             case 0x0f7: /* bextr Gy, Ey, By */
3929                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
3930                     || !(s->prefix & PREFIX_VEX)
3931                     || s->vex_l != 0) {
3932                     goto illegal_op;
3933                 }
3934                 ot = mo_64_32(s->dflag);
3935                 {
3936                     TCGv bound, zero;
3937 
3938                     gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3939                     /* Extract START, and shift the operand.
3940                        Shifts larger than operand size get zeros.  */
3941                     tcg_gen_ext8u_tl(s->A0, cpu_regs[s->vex_v]);
3942                     tcg_gen_shr_tl(s->T0, s->T0, s->A0);
3943 
3944                     bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
3945                     zero = tcg_const_tl(0);
3946                     tcg_gen_movcond_tl(TCG_COND_LEU, s->T0, s->A0, bound,
3947                                        s->T0, zero);
3948                     tcg_temp_free(zero);
3949 
3950                     /* Extract the LEN into a mask.  Lengths larger than
3951                        operand size get all ones.  */
3952                     tcg_gen_extract_tl(s->A0, cpu_regs[s->vex_v], 8, 8);
3953                     tcg_gen_movcond_tl(TCG_COND_LEU, s->A0, s->A0, bound,
3954                                        s->A0, bound);
3955                     tcg_temp_free(bound);
3956                     tcg_gen_movi_tl(s->T1, 1);
3957                     tcg_gen_shl_tl(s->T1, s->T1, s->A0);
3958                     tcg_gen_subi_tl(s->T1, s->T1, 1);
3959                     tcg_gen_and_tl(s->T0, s->T0, s->T1);
3960 
3961                     gen_op_mov_reg_v(s, ot, reg, s->T0);
3962                     gen_op_update1_cc(s);
3963                     set_cc_op(s, CC_OP_LOGICB + ot);
3964                 }
3965                 break;
3966 
3967             case 0x0f5: /* bzhi Gy, Ey, By */
3968                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3969                     || !(s->prefix & PREFIX_VEX)
3970                     || s->vex_l != 0) {
3971                     goto illegal_op;
3972                 }
3973                 ot = mo_64_32(s->dflag);
3974                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3975                 tcg_gen_ext8u_tl(s->T1, cpu_regs[s->vex_v]);
3976                 {
3977                     TCGv bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
3978                     /* Note that since we're using BMILG (in order to get O
3979                        cleared) we need to store the inverse into C.  */
3980                     tcg_gen_setcond_tl(TCG_COND_LT, cpu_cc_src,
3981                                        s->T1, bound);
3982                     tcg_gen_movcond_tl(TCG_COND_GT, s->T1, s->T1,
3983                                        bound, bound, s->T1);
3984                     tcg_temp_free(bound);
3985                 }
3986                 tcg_gen_movi_tl(s->A0, -1);
3987                 tcg_gen_shl_tl(s->A0, s->A0, s->T1);
3988                 tcg_gen_andc_tl(s->T0, s->T0, s->A0);
3989                 gen_op_mov_reg_v(s, ot, reg, s->T0);
3990                 gen_op_update1_cc(s);
3991                 set_cc_op(s, CC_OP_BMILGB + ot);
3992                 break;
3993 
3994             case 0x3f6: /* mulx By, Gy, rdx, Ey */
3995                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3996                     || !(s->prefix & PREFIX_VEX)
3997                     || s->vex_l != 0) {
3998                     goto illegal_op;
3999                 }
4000                 ot = mo_64_32(s->dflag);
4001                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4002                 switch (ot) {
4003                 default:
4004                     tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
4005                     tcg_gen_trunc_tl_i32(s->tmp3_i32, cpu_regs[R_EDX]);
4006                     tcg_gen_mulu2_i32(s->tmp2_i32, s->tmp3_i32,
4007                                       s->tmp2_i32, s->tmp3_i32);
4008                     tcg_gen_extu_i32_tl(cpu_regs[s->vex_v], s->tmp2_i32);
4009                     tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp3_i32);
4010                     break;
4011 #ifdef TARGET_X86_64
4012                 case MO_64:
4013                     tcg_gen_mulu2_i64(s->T0, s->T1,
4014                                       s->T0, cpu_regs[R_EDX]);
4015                     tcg_gen_mov_i64(cpu_regs[s->vex_v], s->T0);
4016                     tcg_gen_mov_i64(cpu_regs[reg], s->T1);
4017                     break;
4018 #endif
4019                 }
4020                 break;
4021 
4022             case 0x3f5: /* pdep Gy, By, Ey */
4023                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
4024                     || !(s->prefix & PREFIX_VEX)
4025                     || s->vex_l != 0) {
4026                     goto illegal_op;
4027                 }
4028                 ot = mo_64_32(s->dflag);
4029                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4030                 /* Note that by zero-extending the source operand, we
4031                    automatically handle zero-extending the result.  */
4032                 if (ot == MO_64) {
4033                     tcg_gen_mov_tl(s->T1, cpu_regs[s->vex_v]);
4034                 } else {
4035                     tcg_gen_ext32u_tl(s->T1, cpu_regs[s->vex_v]);
4036                 }
4037                 gen_helper_pdep(cpu_regs[reg], s->T1, s->T0);
4038                 break;
4039 
4040             case 0x2f5: /* pext Gy, By, Ey */
4041                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
4042                     || !(s->prefix & PREFIX_VEX)
4043                     || s->vex_l != 0) {
4044                     goto illegal_op;
4045                 }
4046                 ot = mo_64_32(s->dflag);
4047                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4048                 /* Note that by zero-extending the source operand, we
4049                    automatically handle zero-extending the result.  */
4050                 if (ot == MO_64) {
4051                     tcg_gen_mov_tl(s->T1, cpu_regs[s->vex_v]);
4052                 } else {
4053                     tcg_gen_ext32u_tl(s->T1, cpu_regs[s->vex_v]);
4054                 }
4055                 gen_helper_pext(cpu_regs[reg], s->T1, s->T0);
4056                 break;
4057 
4058             case 0x1f6: /* adcx Gy, Ey */
4059             case 0x2f6: /* adox Gy, Ey */
4060                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_ADX)) {
4061                     goto illegal_op;
4062                 } else {
4063                     TCGv carry_in, carry_out, zero;
4064                     int end_op;
4065 
4066                     ot = mo_64_32(s->dflag);
4067                     gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4068 
4069                     /* Re-use the carry-out from a previous round.  */
4070                     carry_in = NULL;
4071                     carry_out = (b == 0x1f6 ? cpu_cc_dst : cpu_cc_src2);
4072                     switch (s->cc_op) {
4073                     case CC_OP_ADCX:
4074                         if (b == 0x1f6) {
4075                             carry_in = cpu_cc_dst;
4076                             end_op = CC_OP_ADCX;
4077                         } else {
4078                             end_op = CC_OP_ADCOX;
4079                         }
4080                         break;
4081                     case CC_OP_ADOX:
4082                         if (b == 0x1f6) {
4083                             end_op = CC_OP_ADCOX;
4084                         } else {
4085                             carry_in = cpu_cc_src2;
4086                             end_op = CC_OP_ADOX;
4087                         }
4088                         break;
4089                     case CC_OP_ADCOX:
4090                         end_op = CC_OP_ADCOX;
4091                         carry_in = carry_out;
4092                         break;
4093                     default:
4094                         end_op = (b == 0x1f6 ? CC_OP_ADCX : CC_OP_ADOX);
4095                         break;
4096                     }
4097                     /* If we can't reuse carry-out, get it out of EFLAGS.  */
4098                     if (!carry_in) {
4099                         if (s->cc_op != CC_OP_ADCX && s->cc_op != CC_OP_ADOX) {
4100                             gen_compute_eflags(s);
4101                         }
4102                         carry_in = s->tmp0;
4103                         tcg_gen_extract_tl(carry_in, cpu_cc_src,
4104                                            ctz32(b == 0x1f6 ? CC_C : CC_O), 1);
4105                     }
4106 
4107                     switch (ot) {
4108 #ifdef TARGET_X86_64
4109                     case MO_32:
4110                         /* If we know TL is 64-bit, and we want a 32-bit
4111                            result, just do everything in 64-bit arithmetic.  */
4112                         tcg_gen_ext32u_i64(cpu_regs[reg], cpu_regs[reg]);
4113                         tcg_gen_ext32u_i64(s->T0, s->T0);
4114                         tcg_gen_add_i64(s->T0, s->T0, cpu_regs[reg]);
4115                         tcg_gen_add_i64(s->T0, s->T0, carry_in);
4116                         tcg_gen_ext32u_i64(cpu_regs[reg], s->T0);
4117                         tcg_gen_shri_i64(carry_out, s->T0, 32);
4118                         break;
4119 #endif
4120                     default:
4121                         /* Otherwise compute the carry-out in two steps.  */
4122                         zero = tcg_const_tl(0);
4123                         tcg_gen_add2_tl(s->T0, carry_out,
4124                                         s->T0, zero,
4125                                         carry_in, zero);
4126                         tcg_gen_add2_tl(cpu_regs[reg], carry_out,
4127                                         cpu_regs[reg], carry_out,
4128                                         s->T0, zero);
4129                         tcg_temp_free(zero);
4130                         break;
4131                     }
4132                     set_cc_op(s, end_op);
4133                 }
4134                 break;
4135 
4136             case 0x1f7: /* shlx Gy, Ey, By */
4137             case 0x2f7: /* sarx Gy, Ey, By */
4138             case 0x3f7: /* shrx Gy, Ey, By */
4139                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
4140                     || !(s->prefix & PREFIX_VEX)
4141                     || s->vex_l != 0) {
4142                     goto illegal_op;
4143                 }
4144                 ot = mo_64_32(s->dflag);
4145                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4146                 if (ot == MO_64) {
4147                     tcg_gen_andi_tl(s->T1, cpu_regs[s->vex_v], 63);
4148                 } else {
4149                     tcg_gen_andi_tl(s->T1, cpu_regs[s->vex_v], 31);
4150                 }
4151                 if (b == 0x1f7) {
4152                     tcg_gen_shl_tl(s->T0, s->T0, s->T1);
4153                 } else if (b == 0x2f7) {
4154                     if (ot != MO_64) {
4155                         tcg_gen_ext32s_tl(s->T0, s->T0);
4156                     }
4157                     tcg_gen_sar_tl(s->T0, s->T0, s->T1);
4158                 } else {
4159                     if (ot != MO_64) {
4160                         tcg_gen_ext32u_tl(s->T0, s->T0);
4161                     }
4162                     tcg_gen_shr_tl(s->T0, s->T0, s->T1);
4163                 }
4164                 gen_op_mov_reg_v(s, ot, reg, s->T0);
4165                 break;
4166 
4167             case 0x0f3:
4168             case 0x1f3:
4169             case 0x2f3:
4170             case 0x3f3: /* Group 17 */
4171                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
4172                     || !(s->prefix & PREFIX_VEX)
4173                     || s->vex_l != 0) {
4174                     goto illegal_op;
4175                 }
4176                 ot = mo_64_32(s->dflag);
4177                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4178 
4179                 tcg_gen_mov_tl(cpu_cc_src, s->T0);
4180                 switch (reg & 7) {
4181                 case 1: /* blsr By,Ey */
4182                     tcg_gen_subi_tl(s->T1, s->T0, 1);
4183                     tcg_gen_and_tl(s->T0, s->T0, s->T1);
4184                     break;
4185                 case 2: /* blsmsk By,Ey */
4186                     tcg_gen_subi_tl(s->T1, s->T0, 1);
4187                     tcg_gen_xor_tl(s->T0, s->T0, s->T1);
4188                     break;
4189                 case 3: /* blsi By, Ey */
4190                     tcg_gen_neg_tl(s->T1, s->T0);
4191                     tcg_gen_and_tl(s->T0, s->T0, s->T1);
4192                     break;
4193                 default:
4194                     goto unknown_op;
4195                 }
4196                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
4197                 gen_op_mov_reg_v(s, ot, s->vex_v, s->T0);
4198                 set_cc_op(s, CC_OP_BMILGB + ot);
4199                 break;
4200 
4201             default:
4202                 goto unknown_op;
4203             }
4204             break;
4205 
4206         case 0x03a:
4207         case 0x13a:
4208             b = modrm;
4209             modrm = x86_ldub_code(env, s);
4210             rm = modrm & 7;
4211             reg = ((modrm >> 3) & 7) | REX_R(s);
4212             mod = (modrm >> 6) & 3;
4213             if (b1 >= 2) {
4214                 goto unknown_op;
4215             }
4216 
4217             sse_fn_eppi = sse_op_table7[b].op[b1];
4218             if (!sse_fn_eppi) {
4219                 goto unknown_op;
4220             }
4221             if (!(s->cpuid_ext_features & sse_op_table7[b].ext_mask))
4222                 goto illegal_op;
4223 
4224             s->rip_offset = 1;
4225 
4226             if (sse_fn_eppi == SSE_SPECIAL) {
4227                 ot = mo_64_32(s->dflag);
4228                 rm = (modrm & 7) | REX_B(s);
4229                 if (mod != 3)
4230                     gen_lea_modrm(env, s, modrm);
4231                 reg = ((modrm >> 3) & 7) | REX_R(s);
4232                 val = x86_ldub_code(env, s);
4233                 switch (b) {
4234                 case 0x14: /* pextrb */
4235                     tcg_gen_ld8u_tl(s->T0, cpu_env, offsetof(CPUX86State,
4236                                             xmm_regs[reg].ZMM_B(val & 15)));
4237                     if (mod == 3) {
4238                         gen_op_mov_reg_v(s, ot, rm, s->T0);
4239                     } else {
4240                         tcg_gen_qemu_st_tl(s->T0, s->A0,
4241                                            s->mem_index, MO_UB);
4242                     }
4243                     break;
4244                 case 0x15: /* pextrw */
4245                     tcg_gen_ld16u_tl(s->T0, cpu_env, offsetof(CPUX86State,
4246                                             xmm_regs[reg].ZMM_W(val & 7)));
4247                     if (mod == 3) {
4248                         gen_op_mov_reg_v(s, ot, rm, s->T0);
4249                     } else {
4250                         tcg_gen_qemu_st_tl(s->T0, s->A0,
4251                                            s->mem_index, MO_LEUW);
4252                     }
4253                     break;
4254                 case 0x16:
4255                     if (ot == MO_32) { /* pextrd */
4256                         tcg_gen_ld_i32(s->tmp2_i32, cpu_env,
4257                                         offsetof(CPUX86State,
4258                                                 xmm_regs[reg].ZMM_L(val & 3)));
4259                         if (mod == 3) {
4260                             tcg_gen_extu_i32_tl(cpu_regs[rm], s->tmp2_i32);
4261                         } else {
4262                             tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
4263                                                 s->mem_index, MO_LEUL);
4264                         }
4265                     } else { /* pextrq */
4266 #ifdef TARGET_X86_64
4267                         tcg_gen_ld_i64(s->tmp1_i64, cpu_env,
4268                                         offsetof(CPUX86State,
4269                                                 xmm_regs[reg].ZMM_Q(val & 1)));
4270                         if (mod == 3) {
4271                             tcg_gen_mov_i64(cpu_regs[rm], s->tmp1_i64);
4272                         } else {
4273                             tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
4274                                                 s->mem_index, MO_LEQ);
4275                         }
4276 #else
4277                         goto illegal_op;
4278 #endif
4279                     }
4280                     break;
4281                 case 0x17: /* extractps */
4282                     tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State,
4283                                             xmm_regs[reg].ZMM_L(val & 3)));
4284                     if (mod == 3) {
4285                         gen_op_mov_reg_v(s, ot, rm, s->T0);
4286                     } else {
4287                         tcg_gen_qemu_st_tl(s->T0, s->A0,
4288                                            s->mem_index, MO_LEUL);
4289                     }
4290                     break;
4291                 case 0x20: /* pinsrb */
4292                     if (mod == 3) {
4293                         gen_op_mov_v_reg(s, MO_32, s->T0, rm);
4294                     } else {
4295                         tcg_gen_qemu_ld_tl(s->T0, s->A0,
4296                                            s->mem_index, MO_UB);
4297                     }
4298                     tcg_gen_st8_tl(s->T0, cpu_env, offsetof(CPUX86State,
4299                                             xmm_regs[reg].ZMM_B(val & 15)));
4300                     break;
4301                 case 0x21: /* insertps */
4302                     if (mod == 3) {
4303                         tcg_gen_ld_i32(s->tmp2_i32, cpu_env,
4304                                         offsetof(CPUX86State,xmm_regs[rm]
4305                                                 .ZMM_L((val >> 6) & 3)));
4306                     } else {
4307                         tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
4308                                             s->mem_index, MO_LEUL);
4309                     }
4310                     tcg_gen_st_i32(s->tmp2_i32, cpu_env,
4311                                     offsetof(CPUX86State,xmm_regs[reg]
4312                                             .ZMM_L((val >> 4) & 3)));
4313                     if ((val >> 0) & 1)
4314                         tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4315                                         cpu_env, offsetof(CPUX86State,
4316                                                 xmm_regs[reg].ZMM_L(0)));
4317                     if ((val >> 1) & 1)
4318                         tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4319                                         cpu_env, offsetof(CPUX86State,
4320                                                 xmm_regs[reg].ZMM_L(1)));
4321                     if ((val >> 2) & 1)
4322                         tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4323                                         cpu_env, offsetof(CPUX86State,
4324                                                 xmm_regs[reg].ZMM_L(2)));
4325                     if ((val >> 3) & 1)
4326                         tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4327                                         cpu_env, offsetof(CPUX86State,
4328                                                 xmm_regs[reg].ZMM_L(3)));
4329                     break;
4330                 case 0x22:
4331                     if (ot == MO_32) { /* pinsrd */
4332                         if (mod == 3) {
4333                             tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[rm]);
4334                         } else {
4335                             tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
4336                                                 s->mem_index, MO_LEUL);
4337                         }
4338                         tcg_gen_st_i32(s->tmp2_i32, cpu_env,
4339                                         offsetof(CPUX86State,
4340                                                 xmm_regs[reg].ZMM_L(val & 3)));
4341                     } else { /* pinsrq */
4342 #ifdef TARGET_X86_64
4343                         if (mod == 3) {
4344                             gen_op_mov_v_reg(s, ot, s->tmp1_i64, rm);
4345                         } else {
4346                             tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
4347                                                 s->mem_index, MO_LEQ);
4348                         }
4349                         tcg_gen_st_i64(s->tmp1_i64, cpu_env,
4350                                         offsetof(CPUX86State,
4351                                                 xmm_regs[reg].ZMM_Q(val & 1)));
4352 #else
4353                         goto illegal_op;
4354 #endif
4355                     }
4356                     break;
4357                 }
4358                 return;
4359             }
4360 
4361             if (b1) {
4362                 op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
4363                 if (mod == 3) {
4364                     op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
4365                 } else {
4366                     op2_offset = offsetof(CPUX86State,xmm_t0);
4367                     gen_lea_modrm(env, s, modrm);
4368                     gen_ldo_env_A0(s, op2_offset);
4369                 }
4370             } else {
4371                 op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
4372                 if (mod == 3) {
4373                     op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
4374                 } else {
4375                     op2_offset = offsetof(CPUX86State,mmx_t0);
4376                     gen_lea_modrm(env, s, modrm);
4377                     gen_ldq_env_A0(s, op2_offset);
4378                 }
4379             }
4380             val = x86_ldub_code(env, s);
4381 
4382             if ((b & 0xfc) == 0x60) { /* pcmpXstrX */
4383                 set_cc_op(s, CC_OP_EFLAGS);
4384 
4385                 if (s->dflag == MO_64) {
4386                     /* The helper must use entire 64-bit gp registers */
4387                     val |= 1 << 8;
4388                 }
4389             }
4390 
4391             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4392             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4393             sse_fn_eppi(cpu_env, s->ptr0, s->ptr1, tcg_const_i32(val));
4394             break;
4395 
4396         case 0x33a:
4397             /* Various integer extensions at 0f 3a f[0-f].  */
4398             b = modrm | (b1 << 8);
4399             modrm = x86_ldub_code(env, s);
4400             reg = ((modrm >> 3) & 7) | REX_R(s);
4401 
4402             switch (b) {
4403             case 0x3f0: /* rorx Gy,Ey, Ib */
4404                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
4405                     || !(s->prefix & PREFIX_VEX)
4406                     || s->vex_l != 0) {
4407                     goto illegal_op;
4408                 }
4409                 ot = mo_64_32(s->dflag);
4410                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4411                 b = x86_ldub_code(env, s);
4412                 if (ot == MO_64) {
4413                     tcg_gen_rotri_tl(s->T0, s->T0, b & 63);
4414                 } else {
4415                     tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
4416                     tcg_gen_rotri_i32(s->tmp2_i32, s->tmp2_i32, b & 31);
4417                     tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
4418                 }
4419                 gen_op_mov_reg_v(s, ot, reg, s->T0);
4420                 break;
4421 
4422             default:
4423                 goto unknown_op;
4424             }
4425             break;
4426 
4427         default:
4428         unknown_op:
4429             gen_unknown_opcode(env, s);
4430             return;
4431         }
4432     } else {
4433         /* generic MMX or SSE operation */
4434         switch(b) {
4435         case 0x70: /* pshufx insn */
4436         case 0xc6: /* pshufx insn */
4437         case 0xc2: /* compare insns */
4438             s->rip_offset = 1;
4439             break;
4440         default:
4441             break;
4442         }
4443         if (is_xmm) {
4444             op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
4445             if (mod != 3) {
4446                 int sz = 4;
4447 
4448                 gen_lea_modrm(env, s, modrm);
4449                 op2_offset = offsetof(CPUX86State,xmm_t0);
4450 
4451                 switch (b) {
4452                 case 0x50 ... 0x5a:
4453                 case 0x5c ... 0x5f:
4454                 case 0xc2:
4455                     /* Most sse scalar operations.  */
4456                     if (b1 == 2) {
4457                         sz = 2;
4458                     } else if (b1 == 3) {
4459                         sz = 3;
4460                     }
4461                     break;
4462 
4463                 case 0x2e:  /* ucomis[sd] */
4464                 case 0x2f:  /* comis[sd] */
4465                     if (b1 == 0) {
4466                         sz = 2;
4467                     } else {
4468                         sz = 3;
4469                     }
4470                     break;
4471                 }
4472 
4473                 switch (sz) {
4474                 case 2:
4475                     /* 32 bit access */
4476                     gen_op_ld_v(s, MO_32, s->T0, s->A0);
4477                     tcg_gen_st32_tl(s->T0, cpu_env,
4478                                     offsetof(CPUX86State,xmm_t0.ZMM_L(0)));
4479                     break;
4480                 case 3:
4481                     /* 64 bit access */
4482                     gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.ZMM_D(0)));
4483                     break;
4484                 default:
4485                     /* 128 bit access */
4486                     gen_ldo_env_A0(s, op2_offset);
4487                     break;
4488                 }
4489             } else {
4490                 rm = (modrm & 7) | REX_B(s);
4491                 op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
4492             }
4493         } else {
4494             op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
4495             if (mod != 3) {
4496                 gen_lea_modrm(env, s, modrm);
4497                 op2_offset = offsetof(CPUX86State,mmx_t0);
4498                 gen_ldq_env_A0(s, op2_offset);
4499             } else {
4500                 rm = (modrm & 7);
4501                 op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
4502             }
4503         }
4504         switch(b) {
4505         case 0x0f: /* 3DNow! data insns */
4506             val = x86_ldub_code(env, s);
4507             sse_fn_epp = sse_op_table5[val];
4508             if (!sse_fn_epp) {
4509                 goto unknown_op;
4510             }
4511             if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) {
4512                 goto illegal_op;
4513             }
4514             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4515             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4516             sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
4517             break;
4518         case 0x70: /* pshufx insn */
4519         case 0xc6: /* pshufx insn */
4520             val = x86_ldub_code(env, s);
4521             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4522             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4523             /* XXX: introduce a new table? */
4524             sse_fn_ppi = (SSEFunc_0_ppi)sse_fn_epp;
4525             sse_fn_ppi(s->ptr0, s->ptr1, tcg_const_i32(val));
4526             break;
4527         case 0xc2:
4528             /* compare insns */
4529             val = x86_ldub_code(env, s);
4530             if (val >= 8)
4531                 goto unknown_op;
4532             sse_fn_epp = sse_op_table4[val][b1];
4533 
4534             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4535             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4536             sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
4537             break;
4538         case 0xf7:
4539             /* maskmov : we must prepare A0 */
4540             if (mod != 3)
4541                 goto illegal_op;
4542             tcg_gen_mov_tl(s->A0, cpu_regs[R_EDI]);
4543             gen_extu(s->aflag, s->A0);
4544             gen_add_A0_ds_seg(s);
4545 
4546             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4547             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4548             /* XXX: introduce a new table? */
4549             sse_fn_eppt = (SSEFunc_0_eppt)sse_fn_epp;
4550             sse_fn_eppt(cpu_env, s->ptr0, s->ptr1, s->A0);
4551             break;
4552         default:
4553             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4554             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4555             sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
4556             break;
4557         }
4558         if (b == 0x2e || b == 0x2f) {
4559             set_cc_op(s, CC_OP_EFLAGS);
4560         }
4561     }
4562 }
4563 
4564 /* convert one instruction. s->base.is_jmp is set if the translation must
4565    be stopped. Return the next pc value */
4566 static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
4567 {
4568     CPUX86State *env = cpu->env_ptr;
4569     int b, prefixes;
4570     int shift;
4571     MemOp ot, aflag, dflag;
4572     int modrm, reg, rm, mod, op, opreg, val;
4573     target_ulong next_eip, tval;
4574     target_ulong pc_start = s->base.pc_next;
4575 
4576     s->pc_start = s->pc = pc_start;
4577     s->override = -1;
4578 #ifdef TARGET_X86_64
4579     s->rex_w = false;
4580     s->rex_r = 0;
4581     s->rex_x = 0;
4582     s->rex_b = 0;
4583 #endif
4584     s->rip_offset = 0; /* for relative ip address */
4585     s->vex_l = 0;
4586     s->vex_v = 0;
4587     if (sigsetjmp(s->jmpbuf, 0) != 0) {
4588         gen_exception_gpf(s);
4589         return s->pc;
4590     }
4591 
4592     prefixes = 0;
4593 
4594  next_byte:
4595     b = x86_ldub_code(env, s);
4596     /* Collect prefixes.  */
4597     switch (b) {
4598     case 0xf3:
4599         prefixes |= PREFIX_REPZ;
4600         goto next_byte;
4601     case 0xf2:
4602         prefixes |= PREFIX_REPNZ;
4603         goto next_byte;
4604     case 0xf0:
4605         prefixes |= PREFIX_LOCK;
4606         goto next_byte;
4607     case 0x2e:
4608         s->override = R_CS;
4609         goto next_byte;
4610     case 0x36:
4611         s->override = R_SS;
4612         goto next_byte;
4613     case 0x3e:
4614         s->override = R_DS;
4615         goto next_byte;
4616     case 0x26:
4617         s->override = R_ES;
4618         goto next_byte;
4619     case 0x64:
4620         s->override = R_FS;
4621         goto next_byte;
4622     case 0x65:
4623         s->override = R_GS;
4624         goto next_byte;
4625     case 0x66:
4626         prefixes |= PREFIX_DATA;
4627         goto next_byte;
4628     case 0x67:
4629         prefixes |= PREFIX_ADR;
4630         goto next_byte;
4631 #ifdef TARGET_X86_64
4632     case 0x40 ... 0x4f:
4633         if (CODE64(s)) {
4634             /* REX prefix */
4635             prefixes |= PREFIX_REX;
4636             s->rex_w = (b >> 3) & 1;
4637             s->rex_r = (b & 0x4) << 1;
4638             s->rex_x = (b & 0x2) << 2;
4639             s->rex_b = (b & 0x1) << 3;
4640             goto next_byte;
4641         }
4642         break;
4643 #endif
4644     case 0xc5: /* 2-byte VEX */
4645     case 0xc4: /* 3-byte VEX */
4646         /* VEX prefixes cannot be used except in 32-bit mode.
4647            Otherwise the instruction is LES or LDS.  */
4648         if (CODE32(s) && !VM86(s)) {
4649             static const int pp_prefix[4] = {
4650                 0, PREFIX_DATA, PREFIX_REPZ, PREFIX_REPNZ
4651             };
4652             int vex3, vex2 = x86_ldub_code(env, s);
4653 
4654             if (!CODE64(s) && (vex2 & 0xc0) != 0xc0) {
4655                 /* 4.1.4.6: In 32-bit mode, bits [7:6] must be 11b,
4656                    otherwise the instruction is LES or LDS.  */
4657                 s->pc--; /* rewind the advance_pc() x86_ldub_code() did */
4658                 break;
4659             }
4660 
4661             /* 4.1.1-4.1.3: No preceding lock, 66, f2, f3, or rex prefixes. */
4662             if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ
4663                             | PREFIX_LOCK | PREFIX_DATA | PREFIX_REX)) {
4664                 goto illegal_op;
4665             }
4666 #ifdef TARGET_X86_64
4667             s->rex_r = (~vex2 >> 4) & 8;
4668 #endif
4669             if (b == 0xc5) {
4670                 /* 2-byte VEX prefix: RVVVVlpp, implied 0f leading opcode byte */
4671                 vex3 = vex2;
4672                 b = x86_ldub_code(env, s) | 0x100;
4673             } else {
4674                 /* 3-byte VEX prefix: RXBmmmmm wVVVVlpp */
4675                 vex3 = x86_ldub_code(env, s);
4676 #ifdef TARGET_X86_64
4677                 s->rex_x = (~vex2 >> 3) & 8;
4678                 s->rex_b = (~vex2 >> 2) & 8;
4679                 s->rex_w = (vex3 >> 7) & 1;
4680 #endif
4681                 switch (vex2 & 0x1f) {
4682                 case 0x01: /* Implied 0f leading opcode bytes.  */
4683                     b = x86_ldub_code(env, s) | 0x100;
4684                     break;
4685                 case 0x02: /* Implied 0f 38 leading opcode bytes.  */
4686                     b = 0x138;
4687                     break;
4688                 case 0x03: /* Implied 0f 3a leading opcode bytes.  */
4689                     b = 0x13a;
4690                     break;
4691                 default:   /* Reserved for future use.  */
4692                     goto unknown_op;
4693                 }
4694             }
4695             s->vex_v = (~vex3 >> 3) & 0xf;
4696             s->vex_l = (vex3 >> 2) & 1;
4697             prefixes |= pp_prefix[vex3 & 3] | PREFIX_VEX;
4698         }
4699         break;
4700     }
4701 
4702     /* Post-process prefixes.  */
4703     if (CODE64(s)) {
4704         /* In 64-bit mode, the default data size is 32-bit.  Select 64-bit
4705            data with rex_w, and 16-bit data with 0x66; rex_w takes precedence
4706            over 0x66 if both are present.  */
4707         dflag = (REX_W(s) ? MO_64 : prefixes & PREFIX_DATA ? MO_16 : MO_32);
4708         /* In 64-bit mode, 0x67 selects 32-bit addressing.  */
4709         aflag = (prefixes & PREFIX_ADR ? MO_32 : MO_64);
4710     } else {
4711         /* In 16/32-bit mode, 0x66 selects the opposite data size.  */
4712         if (CODE32(s) ^ ((prefixes & PREFIX_DATA) != 0)) {
4713             dflag = MO_32;
4714         } else {
4715             dflag = MO_16;
4716         }
4717         /* In 16/32-bit mode, 0x67 selects the opposite addressing.  */
4718         if (CODE32(s) ^ ((prefixes & PREFIX_ADR) != 0)) {
4719             aflag = MO_32;
4720         }  else {
4721             aflag = MO_16;
4722         }
4723     }
4724 
4725     s->prefix = prefixes;
4726     s->aflag = aflag;
4727     s->dflag = dflag;
4728 
4729     /* now check op code */
4730  reswitch:
4731     switch(b) {
4732     case 0x0f:
4733         /**************************/
4734         /* extended op code */
4735         b = x86_ldub_code(env, s) | 0x100;
4736         goto reswitch;
4737 
4738         /**************************/
4739         /* arith & logic */
4740     case 0x00 ... 0x05:
4741     case 0x08 ... 0x0d:
4742     case 0x10 ... 0x15:
4743     case 0x18 ... 0x1d:
4744     case 0x20 ... 0x25:
4745     case 0x28 ... 0x2d:
4746     case 0x30 ... 0x35:
4747     case 0x38 ... 0x3d:
4748         {
4749             int op, f, val;
4750             op = (b >> 3) & 7;
4751             f = (b >> 1) & 3;
4752 
4753             ot = mo_b_d(b, dflag);
4754 
4755             switch(f) {
4756             case 0: /* OP Ev, Gv */
4757                 modrm = x86_ldub_code(env, s);
4758                 reg = ((modrm >> 3) & 7) | REX_R(s);
4759                 mod = (modrm >> 6) & 3;
4760                 rm = (modrm & 7) | REX_B(s);
4761                 if (mod != 3) {
4762                     gen_lea_modrm(env, s, modrm);
4763                     opreg = OR_TMP0;
4764                 } else if (op == OP_XORL && rm == reg) {
4765                 xor_zero:
4766                     /* xor reg, reg optimisation */
4767                     set_cc_op(s, CC_OP_CLR);
4768                     tcg_gen_movi_tl(s->T0, 0);
4769                     gen_op_mov_reg_v(s, ot, reg, s->T0);
4770                     break;
4771                 } else {
4772                     opreg = rm;
4773                 }
4774                 gen_op_mov_v_reg(s, ot, s->T1, reg);
4775                 gen_op(s, op, ot, opreg);
4776                 break;
4777             case 1: /* OP Gv, Ev */
4778                 modrm = x86_ldub_code(env, s);
4779                 mod = (modrm >> 6) & 3;
4780                 reg = ((modrm >> 3) & 7) | REX_R(s);
4781                 rm = (modrm & 7) | REX_B(s);
4782                 if (mod != 3) {
4783                     gen_lea_modrm(env, s, modrm);
4784                     gen_op_ld_v(s, ot, s->T1, s->A0);
4785                 } else if (op == OP_XORL && rm == reg) {
4786                     goto xor_zero;
4787                 } else {
4788                     gen_op_mov_v_reg(s, ot, s->T1, rm);
4789                 }
4790                 gen_op(s, op, ot, reg);
4791                 break;
4792             case 2: /* OP A, Iv */
4793                 val = insn_get(env, s, ot);
4794                 tcg_gen_movi_tl(s->T1, val);
4795                 gen_op(s, op, ot, OR_EAX);
4796                 break;
4797             }
4798         }
4799         break;
4800 
4801     case 0x82:
4802         if (CODE64(s))
4803             goto illegal_op;
4804         /* fall through */
4805     case 0x80: /* GRP1 */
4806     case 0x81:
4807     case 0x83:
4808         {
4809             int val;
4810 
4811             ot = mo_b_d(b, dflag);
4812 
4813             modrm = x86_ldub_code(env, s);
4814             mod = (modrm >> 6) & 3;
4815             rm = (modrm & 7) | REX_B(s);
4816             op = (modrm >> 3) & 7;
4817 
4818             if (mod != 3) {
4819                 if (b == 0x83)
4820                     s->rip_offset = 1;
4821                 else
4822                     s->rip_offset = insn_const_size(ot);
4823                 gen_lea_modrm(env, s, modrm);
4824                 opreg = OR_TMP0;
4825             } else {
4826                 opreg = rm;
4827             }
4828 
4829             switch(b) {
4830             default:
4831             case 0x80:
4832             case 0x81:
4833             case 0x82:
4834                 val = insn_get(env, s, ot);
4835                 break;
4836             case 0x83:
4837                 val = (int8_t)insn_get(env, s, MO_8);
4838                 break;
4839             }
4840             tcg_gen_movi_tl(s->T1, val);
4841             gen_op(s, op, ot, opreg);
4842         }
4843         break;
4844 
4845         /**************************/
4846         /* inc, dec, and other misc arith */
4847     case 0x40 ... 0x47: /* inc Gv */
4848         ot = dflag;
4849         gen_inc(s, ot, OR_EAX + (b & 7), 1);
4850         break;
4851     case 0x48 ... 0x4f: /* dec Gv */
4852         ot = dflag;
4853         gen_inc(s, ot, OR_EAX + (b & 7), -1);
4854         break;
4855     case 0xf6: /* GRP3 */
4856     case 0xf7:
4857         ot = mo_b_d(b, dflag);
4858 
4859         modrm = x86_ldub_code(env, s);
4860         mod = (modrm >> 6) & 3;
4861         rm = (modrm & 7) | REX_B(s);
4862         op = (modrm >> 3) & 7;
4863         if (mod != 3) {
4864             if (op == 0) {
4865                 s->rip_offset = insn_const_size(ot);
4866             }
4867             gen_lea_modrm(env, s, modrm);
4868             /* For those below that handle locked memory, don't load here.  */
4869             if (!(s->prefix & PREFIX_LOCK)
4870                 || op != 2) {
4871                 gen_op_ld_v(s, ot, s->T0, s->A0);
4872             }
4873         } else {
4874             gen_op_mov_v_reg(s, ot, s->T0, rm);
4875         }
4876 
4877         switch(op) {
4878         case 0: /* test */
4879             val = insn_get(env, s, ot);
4880             tcg_gen_movi_tl(s->T1, val);
4881             gen_op_testl_T0_T1_cc(s);
4882             set_cc_op(s, CC_OP_LOGICB + ot);
4883             break;
4884         case 2: /* not */
4885             if (s->prefix & PREFIX_LOCK) {
4886                 if (mod == 3) {
4887                     goto illegal_op;
4888                 }
4889                 tcg_gen_movi_tl(s->T0, ~0);
4890                 tcg_gen_atomic_xor_fetch_tl(s->T0, s->A0, s->T0,
4891                                             s->mem_index, ot | MO_LE);
4892             } else {
4893                 tcg_gen_not_tl(s->T0, s->T0);
4894                 if (mod != 3) {
4895                     gen_op_st_v(s, ot, s->T0, s->A0);
4896                 } else {
4897                     gen_op_mov_reg_v(s, ot, rm, s->T0);
4898                 }
4899             }
4900             break;
4901         case 3: /* neg */
4902             if (s->prefix & PREFIX_LOCK) {
4903                 TCGLabel *label1;
4904                 TCGv a0, t0, t1, t2;
4905 
4906                 if (mod == 3) {
4907                     goto illegal_op;
4908                 }
4909                 a0 = tcg_temp_local_new();
4910                 t0 = tcg_temp_local_new();
4911                 label1 = gen_new_label();
4912 
4913                 tcg_gen_mov_tl(a0, s->A0);
4914                 tcg_gen_mov_tl(t0, s->T0);
4915 
4916                 gen_set_label(label1);
4917                 t1 = tcg_temp_new();
4918                 t2 = tcg_temp_new();
4919                 tcg_gen_mov_tl(t2, t0);
4920                 tcg_gen_neg_tl(t1, t0);
4921                 tcg_gen_atomic_cmpxchg_tl(t0, a0, t0, t1,
4922                                           s->mem_index, ot | MO_LE);
4923                 tcg_temp_free(t1);
4924                 tcg_gen_brcond_tl(TCG_COND_NE, t0, t2, label1);
4925 
4926                 tcg_temp_free(t2);
4927                 tcg_temp_free(a0);
4928                 tcg_gen_mov_tl(s->T0, t0);
4929                 tcg_temp_free(t0);
4930             } else {
4931                 tcg_gen_neg_tl(s->T0, s->T0);
4932                 if (mod != 3) {
4933                     gen_op_st_v(s, ot, s->T0, s->A0);
4934                 } else {
4935                     gen_op_mov_reg_v(s, ot, rm, s->T0);
4936                 }
4937             }
4938             gen_op_update_neg_cc(s);
4939             set_cc_op(s, CC_OP_SUBB + ot);
4940             break;
4941         case 4: /* mul */
4942             switch(ot) {
4943             case MO_8:
4944                 gen_op_mov_v_reg(s, MO_8, s->T1, R_EAX);
4945                 tcg_gen_ext8u_tl(s->T0, s->T0);
4946                 tcg_gen_ext8u_tl(s->T1, s->T1);
4947                 /* XXX: use 32 bit mul which could be faster */
4948                 tcg_gen_mul_tl(s->T0, s->T0, s->T1);
4949                 gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
4950                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
4951                 tcg_gen_andi_tl(cpu_cc_src, s->T0, 0xff00);
4952                 set_cc_op(s, CC_OP_MULB);
4953                 break;
4954             case MO_16:
4955                 gen_op_mov_v_reg(s, MO_16, s->T1, R_EAX);
4956                 tcg_gen_ext16u_tl(s->T0, s->T0);
4957                 tcg_gen_ext16u_tl(s->T1, s->T1);
4958                 /* XXX: use 32 bit mul which could be faster */
4959                 tcg_gen_mul_tl(s->T0, s->T0, s->T1);
4960                 gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
4961                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
4962                 tcg_gen_shri_tl(s->T0, s->T0, 16);
4963                 gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0);
4964                 tcg_gen_mov_tl(cpu_cc_src, s->T0);
4965                 set_cc_op(s, CC_OP_MULW);
4966                 break;
4967             default:
4968             case MO_32:
4969                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
4970                 tcg_gen_trunc_tl_i32(s->tmp3_i32, cpu_regs[R_EAX]);
4971                 tcg_gen_mulu2_i32(s->tmp2_i32, s->tmp3_i32,
4972                                   s->tmp2_i32, s->tmp3_i32);
4973                 tcg_gen_extu_i32_tl(cpu_regs[R_EAX], s->tmp2_i32);
4974                 tcg_gen_extu_i32_tl(cpu_regs[R_EDX], s->tmp3_i32);
4975                 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4976                 tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]);
4977                 set_cc_op(s, CC_OP_MULL);
4978                 break;
4979 #ifdef TARGET_X86_64
4980             case MO_64:
4981                 tcg_gen_mulu2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
4982                                   s->T0, cpu_regs[R_EAX]);
4983                 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4984                 tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]);
4985                 set_cc_op(s, CC_OP_MULQ);
4986                 break;
4987 #endif
4988             }
4989             break;
4990         case 5: /* imul */
4991             switch(ot) {
4992             case MO_8:
4993                 gen_op_mov_v_reg(s, MO_8, s->T1, R_EAX);
4994                 tcg_gen_ext8s_tl(s->T0, s->T0);
4995                 tcg_gen_ext8s_tl(s->T1, s->T1);
4996                 /* XXX: use 32 bit mul which could be faster */
4997                 tcg_gen_mul_tl(s->T0, s->T0, s->T1);
4998                 gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
4999                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
5000                 tcg_gen_ext8s_tl(s->tmp0, s->T0);
5001                 tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
5002                 set_cc_op(s, CC_OP_MULB);
5003                 break;
5004             case MO_16:
5005                 gen_op_mov_v_reg(s, MO_16, s->T1, R_EAX);
5006                 tcg_gen_ext16s_tl(s->T0, s->T0);
5007                 tcg_gen_ext16s_tl(s->T1, s->T1);
5008                 /* XXX: use 32 bit mul which could be faster */
5009                 tcg_gen_mul_tl(s->T0, s->T0, s->T1);
5010                 gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
5011                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
5012                 tcg_gen_ext16s_tl(s->tmp0, s->T0);
5013                 tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
5014                 tcg_gen_shri_tl(s->T0, s->T0, 16);
5015                 gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0);
5016                 set_cc_op(s, CC_OP_MULW);
5017                 break;
5018             default:
5019             case MO_32:
5020                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5021                 tcg_gen_trunc_tl_i32(s->tmp3_i32, cpu_regs[R_EAX]);
5022                 tcg_gen_muls2_i32(s->tmp2_i32, s->tmp3_i32,
5023                                   s->tmp2_i32, s->tmp3_i32);
5024                 tcg_gen_extu_i32_tl(cpu_regs[R_EAX], s->tmp2_i32);
5025                 tcg_gen_extu_i32_tl(cpu_regs[R_EDX], s->tmp3_i32);
5026                 tcg_gen_sari_i32(s->tmp2_i32, s->tmp2_i32, 31);
5027                 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
5028                 tcg_gen_sub_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
5029                 tcg_gen_extu_i32_tl(cpu_cc_src, s->tmp2_i32);
5030                 set_cc_op(s, CC_OP_MULL);
5031                 break;
5032 #ifdef TARGET_X86_64
5033             case MO_64:
5034                 tcg_gen_muls2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
5035                                   s->T0, cpu_regs[R_EAX]);
5036                 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
5037                 tcg_gen_sari_tl(cpu_cc_src, cpu_regs[R_EAX], 63);
5038                 tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, cpu_regs[R_EDX]);
5039                 set_cc_op(s, CC_OP_MULQ);
5040                 break;
5041 #endif
5042             }
5043             break;
5044         case 6: /* div */
5045             switch(ot) {
5046             case MO_8:
5047                 gen_helper_divb_AL(cpu_env, s->T0);
5048                 break;
5049             case MO_16:
5050                 gen_helper_divw_AX(cpu_env, s->T0);
5051                 break;
5052             default:
5053             case MO_32:
5054                 gen_helper_divl_EAX(cpu_env, s->T0);
5055                 break;
5056 #ifdef TARGET_X86_64
5057             case MO_64:
5058                 gen_helper_divq_EAX(cpu_env, s->T0);
5059                 break;
5060 #endif
5061             }
5062             break;
5063         case 7: /* idiv */
5064             switch(ot) {
5065             case MO_8:
5066                 gen_helper_idivb_AL(cpu_env, s->T0);
5067                 break;
5068             case MO_16:
5069                 gen_helper_idivw_AX(cpu_env, s->T0);
5070                 break;
5071             default:
5072             case MO_32:
5073                 gen_helper_idivl_EAX(cpu_env, s->T0);
5074                 break;
5075 #ifdef TARGET_X86_64
5076             case MO_64:
5077                 gen_helper_idivq_EAX(cpu_env, s->T0);
5078                 break;
5079 #endif
5080             }
5081             break;
5082         default:
5083             goto unknown_op;
5084         }
5085         break;
5086 
5087     case 0xfe: /* GRP4 */
5088     case 0xff: /* GRP5 */
5089         ot = mo_b_d(b, dflag);
5090 
5091         modrm = x86_ldub_code(env, s);
5092         mod = (modrm >> 6) & 3;
5093         rm = (modrm & 7) | REX_B(s);
5094         op = (modrm >> 3) & 7;
5095         if (op >= 2 && b == 0xfe) {
5096             goto unknown_op;
5097         }
5098         if (CODE64(s)) {
5099             if (op == 2 || op == 4) {
5100                 /* operand size for jumps is 64 bit */
5101                 ot = MO_64;
5102             } else if (op == 3 || op == 5) {
5103                 ot = dflag != MO_16 ? MO_32 + REX_W(s) : MO_16;
5104             } else if (op == 6) {
5105                 /* default push size is 64 bit */
5106                 ot = mo_pushpop(s, dflag);
5107             }
5108         }
5109         if (mod != 3) {
5110             gen_lea_modrm(env, s, modrm);
5111             if (op >= 2 && op != 3 && op != 5)
5112                 gen_op_ld_v(s, ot, s->T0, s->A0);
5113         } else {
5114             gen_op_mov_v_reg(s, ot, s->T0, rm);
5115         }
5116 
5117         switch(op) {
5118         case 0: /* inc Ev */
5119             if (mod != 3)
5120                 opreg = OR_TMP0;
5121             else
5122                 opreg = rm;
5123             gen_inc(s, ot, opreg, 1);
5124             break;
5125         case 1: /* dec Ev */
5126             if (mod != 3)
5127                 opreg = OR_TMP0;
5128             else
5129                 opreg = rm;
5130             gen_inc(s, ot, opreg, -1);
5131             break;
5132         case 2: /* call Ev */
5133             /* XXX: optimize if memory (no 'and' is necessary) */
5134             if (dflag == MO_16) {
5135                 tcg_gen_ext16u_tl(s->T0, s->T0);
5136             }
5137             next_eip = s->pc - s->cs_base;
5138             tcg_gen_movi_tl(s->T1, next_eip);
5139             gen_push_v(s, s->T1);
5140             gen_op_jmp_v(s->T0);
5141             gen_bnd_jmp(s);
5142             gen_jr(s, s->T0);
5143             break;
5144         case 3: /* lcall Ev */
5145             if (mod == 3) {
5146                 goto illegal_op;
5147             }
5148             gen_op_ld_v(s, ot, s->T1, s->A0);
5149             gen_add_A0_im(s, 1 << ot);
5150             gen_op_ld_v(s, MO_16, s->T0, s->A0);
5151         do_lcall:
5152             if (PE(s) && !VM86(s)) {
5153                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5154                 gen_helper_lcall_protected(cpu_env, s->tmp2_i32, s->T1,
5155                                            tcg_const_i32(dflag - 1),
5156                                            tcg_const_tl(s->pc - s->cs_base));
5157             } else {
5158                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5159                 gen_helper_lcall_real(cpu_env, s->tmp2_i32, s->T1,
5160                                       tcg_const_i32(dflag - 1),
5161                                       tcg_const_i32(s->pc - s->cs_base));
5162             }
5163             tcg_gen_ld_tl(s->tmp4, cpu_env, offsetof(CPUX86State, eip));
5164             gen_jr(s, s->tmp4);
5165             break;
5166         case 4: /* jmp Ev */
5167             if (dflag == MO_16) {
5168                 tcg_gen_ext16u_tl(s->T0, s->T0);
5169             }
5170             gen_op_jmp_v(s->T0);
5171             gen_bnd_jmp(s);
5172             gen_jr(s, s->T0);
5173             break;
5174         case 5: /* ljmp Ev */
5175             if (mod == 3) {
5176                 goto illegal_op;
5177             }
5178             gen_op_ld_v(s, ot, s->T1, s->A0);
5179             gen_add_A0_im(s, 1 << ot);
5180             gen_op_ld_v(s, MO_16, s->T0, s->A0);
5181         do_ljmp:
5182             if (PE(s) && !VM86(s)) {
5183                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5184                 gen_helper_ljmp_protected(cpu_env, s->tmp2_i32, s->T1,
5185                                           tcg_const_tl(s->pc - s->cs_base));
5186             } else {
5187                 gen_op_movl_seg_T0_vm(s, R_CS);
5188                 gen_op_jmp_v(s->T1);
5189             }
5190             tcg_gen_ld_tl(s->tmp4, cpu_env, offsetof(CPUX86State, eip));
5191             gen_jr(s, s->tmp4);
5192             break;
5193         case 6: /* push Ev */
5194             gen_push_v(s, s->T0);
5195             break;
5196         default:
5197             goto unknown_op;
5198         }
5199         break;
5200 
5201     case 0x84: /* test Ev, Gv */
5202     case 0x85:
5203         ot = mo_b_d(b, dflag);
5204 
5205         modrm = x86_ldub_code(env, s);
5206         reg = ((modrm >> 3) & 7) | REX_R(s);
5207 
5208         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
5209         gen_op_mov_v_reg(s, ot, s->T1, reg);
5210         gen_op_testl_T0_T1_cc(s);
5211         set_cc_op(s, CC_OP_LOGICB + ot);
5212         break;
5213 
5214     case 0xa8: /* test eAX, Iv */
5215     case 0xa9:
5216         ot = mo_b_d(b, dflag);
5217         val = insn_get(env, s, ot);
5218 
5219         gen_op_mov_v_reg(s, ot, s->T0, OR_EAX);
5220         tcg_gen_movi_tl(s->T1, val);
5221         gen_op_testl_T0_T1_cc(s);
5222         set_cc_op(s, CC_OP_LOGICB + ot);
5223         break;
5224 
5225     case 0x98: /* CWDE/CBW */
5226         switch (dflag) {
5227 #ifdef TARGET_X86_64
5228         case MO_64:
5229             gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
5230             tcg_gen_ext32s_tl(s->T0, s->T0);
5231             gen_op_mov_reg_v(s, MO_64, R_EAX, s->T0);
5232             break;
5233 #endif
5234         case MO_32:
5235             gen_op_mov_v_reg(s, MO_16, s->T0, R_EAX);
5236             tcg_gen_ext16s_tl(s->T0, s->T0);
5237             gen_op_mov_reg_v(s, MO_32, R_EAX, s->T0);
5238             break;
5239         case MO_16:
5240             gen_op_mov_v_reg(s, MO_8, s->T0, R_EAX);
5241             tcg_gen_ext8s_tl(s->T0, s->T0);
5242             gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
5243             break;
5244         default:
5245             tcg_abort();
5246         }
5247         break;
5248     case 0x99: /* CDQ/CWD */
5249         switch (dflag) {
5250 #ifdef TARGET_X86_64
5251         case MO_64:
5252             gen_op_mov_v_reg(s, MO_64, s->T0, R_EAX);
5253             tcg_gen_sari_tl(s->T0, s->T0, 63);
5254             gen_op_mov_reg_v(s, MO_64, R_EDX, s->T0);
5255             break;
5256 #endif
5257         case MO_32:
5258             gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
5259             tcg_gen_ext32s_tl(s->T0, s->T0);
5260             tcg_gen_sari_tl(s->T0, s->T0, 31);
5261             gen_op_mov_reg_v(s, MO_32, R_EDX, s->T0);
5262             break;
5263         case MO_16:
5264             gen_op_mov_v_reg(s, MO_16, s->T0, R_EAX);
5265             tcg_gen_ext16s_tl(s->T0, s->T0);
5266             tcg_gen_sari_tl(s->T0, s->T0, 15);
5267             gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0);
5268             break;
5269         default:
5270             tcg_abort();
5271         }
5272         break;
5273     case 0x1af: /* imul Gv, Ev */
5274     case 0x69: /* imul Gv, Ev, I */
5275     case 0x6b:
5276         ot = dflag;
5277         modrm = x86_ldub_code(env, s);
5278         reg = ((modrm >> 3) & 7) | REX_R(s);
5279         if (b == 0x69)
5280             s->rip_offset = insn_const_size(ot);
5281         else if (b == 0x6b)
5282             s->rip_offset = 1;
5283         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
5284         if (b == 0x69) {
5285             val = insn_get(env, s, ot);
5286             tcg_gen_movi_tl(s->T1, val);
5287         } else if (b == 0x6b) {
5288             val = (int8_t)insn_get(env, s, MO_8);
5289             tcg_gen_movi_tl(s->T1, val);
5290         } else {
5291             gen_op_mov_v_reg(s, ot, s->T1, reg);
5292         }
5293         switch (ot) {
5294 #ifdef TARGET_X86_64
5295         case MO_64:
5296             tcg_gen_muls2_i64(cpu_regs[reg], s->T1, s->T0, s->T1);
5297             tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
5298             tcg_gen_sari_tl(cpu_cc_src, cpu_cc_dst, 63);
5299             tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, s->T1);
5300             break;
5301 #endif
5302         case MO_32:
5303             tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5304             tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
5305             tcg_gen_muls2_i32(s->tmp2_i32, s->tmp3_i32,
5306                               s->tmp2_i32, s->tmp3_i32);
5307             tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
5308             tcg_gen_sari_i32(s->tmp2_i32, s->tmp2_i32, 31);
5309             tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
5310             tcg_gen_sub_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
5311             tcg_gen_extu_i32_tl(cpu_cc_src, s->tmp2_i32);
5312             break;
5313         default:
5314             tcg_gen_ext16s_tl(s->T0, s->T0);
5315             tcg_gen_ext16s_tl(s->T1, s->T1);
5316             /* XXX: use 32 bit mul which could be faster */
5317             tcg_gen_mul_tl(s->T0, s->T0, s->T1);
5318             tcg_gen_mov_tl(cpu_cc_dst, s->T0);
5319             tcg_gen_ext16s_tl(s->tmp0, s->T0);
5320             tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
5321             gen_op_mov_reg_v(s, ot, reg, s->T0);
5322             break;
5323         }
5324         set_cc_op(s, CC_OP_MULB + ot);
5325         break;
5326     case 0x1c0:
5327     case 0x1c1: /* xadd Ev, Gv */
5328         ot = mo_b_d(b, dflag);
5329         modrm = x86_ldub_code(env, s);
5330         reg = ((modrm >> 3) & 7) | REX_R(s);
5331         mod = (modrm >> 6) & 3;
5332         gen_op_mov_v_reg(s, ot, s->T0, reg);
5333         if (mod == 3) {
5334             rm = (modrm & 7) | REX_B(s);
5335             gen_op_mov_v_reg(s, ot, s->T1, rm);
5336             tcg_gen_add_tl(s->T0, s->T0, s->T1);
5337             gen_op_mov_reg_v(s, ot, reg, s->T1);
5338             gen_op_mov_reg_v(s, ot, rm, s->T0);
5339         } else {
5340             gen_lea_modrm(env, s, modrm);
5341             if (s->prefix & PREFIX_LOCK) {
5342                 tcg_gen_atomic_fetch_add_tl(s->T1, s->A0, s->T0,
5343                                             s->mem_index, ot | MO_LE);
5344                 tcg_gen_add_tl(s->T0, s->T0, s->T1);
5345             } else {
5346                 gen_op_ld_v(s, ot, s->T1, s->A0);
5347                 tcg_gen_add_tl(s->T0, s->T0, s->T1);
5348                 gen_op_st_v(s, ot, s->T0, s->A0);
5349             }
5350             gen_op_mov_reg_v(s, ot, reg, s->T1);
5351         }
5352         gen_op_update2_cc(s);
5353         set_cc_op(s, CC_OP_ADDB + ot);
5354         break;
5355     case 0x1b0:
5356     case 0x1b1: /* cmpxchg Ev, Gv */
5357         {
5358             TCGv oldv, newv, cmpv;
5359 
5360             ot = mo_b_d(b, dflag);
5361             modrm = x86_ldub_code(env, s);
5362             reg = ((modrm >> 3) & 7) | REX_R(s);
5363             mod = (modrm >> 6) & 3;
5364             oldv = tcg_temp_new();
5365             newv = tcg_temp_new();
5366             cmpv = tcg_temp_new();
5367             gen_op_mov_v_reg(s, ot, newv, reg);
5368             tcg_gen_mov_tl(cmpv, cpu_regs[R_EAX]);
5369 
5370             if (s->prefix & PREFIX_LOCK) {
5371                 if (mod == 3) {
5372                     goto illegal_op;
5373                 }
5374                 gen_lea_modrm(env, s, modrm);
5375                 tcg_gen_atomic_cmpxchg_tl(oldv, s->A0, cmpv, newv,
5376                                           s->mem_index, ot | MO_LE);
5377                 gen_op_mov_reg_v(s, ot, R_EAX, oldv);
5378             } else {
5379                 if (mod == 3) {
5380                     rm = (modrm & 7) | REX_B(s);
5381                     gen_op_mov_v_reg(s, ot, oldv, rm);
5382                 } else {
5383                     gen_lea_modrm(env, s, modrm);
5384                     gen_op_ld_v(s, ot, oldv, s->A0);
5385                     rm = 0; /* avoid warning */
5386                 }
5387                 gen_extu(ot, oldv);
5388                 gen_extu(ot, cmpv);
5389                 /* store value = (old == cmp ? new : old);  */
5390                 tcg_gen_movcond_tl(TCG_COND_EQ, newv, oldv, cmpv, newv, oldv);
5391                 if (mod == 3) {
5392                     gen_op_mov_reg_v(s, ot, R_EAX, oldv);
5393                     gen_op_mov_reg_v(s, ot, rm, newv);
5394                 } else {
5395                     /* Perform an unconditional store cycle like physical cpu;
5396                        must be before changing accumulator to ensure
5397                        idempotency if the store faults and the instruction
5398                        is restarted */
5399                     gen_op_st_v(s, ot, newv, s->A0);
5400                     gen_op_mov_reg_v(s, ot, R_EAX, oldv);
5401                 }
5402             }
5403             tcg_gen_mov_tl(cpu_cc_src, oldv);
5404             tcg_gen_mov_tl(s->cc_srcT, cmpv);
5405             tcg_gen_sub_tl(cpu_cc_dst, cmpv, oldv);
5406             set_cc_op(s, CC_OP_SUBB + ot);
5407             tcg_temp_free(oldv);
5408             tcg_temp_free(newv);
5409             tcg_temp_free(cmpv);
5410         }
5411         break;
5412     case 0x1c7: /* cmpxchg8b */
5413         modrm = x86_ldub_code(env, s);
5414         mod = (modrm >> 6) & 3;
5415         switch ((modrm >> 3) & 7) {
5416         case 1: /* CMPXCHG8, CMPXCHG16 */
5417             if (mod == 3) {
5418                 goto illegal_op;
5419             }
5420 #ifdef TARGET_X86_64
5421             if (dflag == MO_64) {
5422                 if (!(s->cpuid_ext_features & CPUID_EXT_CX16)) {
5423                     goto illegal_op;
5424                 }
5425                 gen_lea_modrm(env, s, modrm);
5426                 if ((s->prefix & PREFIX_LOCK) &&
5427                     (tb_cflags(s->base.tb) & CF_PARALLEL)) {
5428                     gen_helper_cmpxchg16b(cpu_env, s->A0);
5429                 } else {
5430                     gen_helper_cmpxchg16b_unlocked(cpu_env, s->A0);
5431                 }
5432                 set_cc_op(s, CC_OP_EFLAGS);
5433                 break;
5434             }
5435 #endif
5436             if (!(s->cpuid_features & CPUID_CX8)) {
5437                 goto illegal_op;
5438             }
5439             gen_lea_modrm(env, s, modrm);
5440             if ((s->prefix & PREFIX_LOCK) &&
5441                 (tb_cflags(s->base.tb) & CF_PARALLEL)) {
5442                 gen_helper_cmpxchg8b(cpu_env, s->A0);
5443             } else {
5444                 gen_helper_cmpxchg8b_unlocked(cpu_env, s->A0);
5445             }
5446             set_cc_op(s, CC_OP_EFLAGS);
5447             break;
5448 
5449         case 7: /* RDSEED */
5450         case 6: /* RDRAND */
5451             if (mod != 3 ||
5452                 (s->prefix & (PREFIX_LOCK | PREFIX_REPZ | PREFIX_REPNZ)) ||
5453                 !(s->cpuid_ext_features & CPUID_EXT_RDRAND)) {
5454                 goto illegal_op;
5455             }
5456             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
5457                 gen_io_start();
5458             }
5459             gen_helper_rdrand(s->T0, cpu_env);
5460             rm = (modrm & 7) | REX_B(s);
5461             gen_op_mov_reg_v(s, dflag, rm, s->T0);
5462             set_cc_op(s, CC_OP_EFLAGS);
5463             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
5464                 gen_jmp(s, s->pc - s->cs_base);
5465             }
5466             break;
5467 
5468         default:
5469             goto illegal_op;
5470         }
5471         break;
5472 
5473         /**************************/
5474         /* push/pop */
5475     case 0x50 ... 0x57: /* push */
5476         gen_op_mov_v_reg(s, MO_32, s->T0, (b & 7) | REX_B(s));
5477         gen_push_v(s, s->T0);
5478         break;
5479     case 0x58 ... 0x5f: /* pop */
5480         ot = gen_pop_T0(s);
5481         /* NOTE: order is important for pop %sp */
5482         gen_pop_update(s, ot);
5483         gen_op_mov_reg_v(s, ot, (b & 7) | REX_B(s), s->T0);
5484         break;
5485     case 0x60: /* pusha */
5486         if (CODE64(s))
5487             goto illegal_op;
5488         gen_pusha(s);
5489         break;
5490     case 0x61: /* popa */
5491         if (CODE64(s))
5492             goto illegal_op;
5493         gen_popa(s);
5494         break;
5495     case 0x68: /* push Iv */
5496     case 0x6a:
5497         ot = mo_pushpop(s, dflag);
5498         if (b == 0x68)
5499             val = insn_get(env, s, ot);
5500         else
5501             val = (int8_t)insn_get(env, s, MO_8);
5502         tcg_gen_movi_tl(s->T0, val);
5503         gen_push_v(s, s->T0);
5504         break;
5505     case 0x8f: /* pop Ev */
5506         modrm = x86_ldub_code(env, s);
5507         mod = (modrm >> 6) & 3;
5508         ot = gen_pop_T0(s);
5509         if (mod == 3) {
5510             /* NOTE: order is important for pop %sp */
5511             gen_pop_update(s, ot);
5512             rm = (modrm & 7) | REX_B(s);
5513             gen_op_mov_reg_v(s, ot, rm, s->T0);
5514         } else {
5515             /* NOTE: order is important too for MMU exceptions */
5516             s->popl_esp_hack = 1 << ot;
5517             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
5518             s->popl_esp_hack = 0;
5519             gen_pop_update(s, ot);
5520         }
5521         break;
5522     case 0xc8: /* enter */
5523         {
5524             int level;
5525             val = x86_lduw_code(env, s);
5526             level = x86_ldub_code(env, s);
5527             gen_enter(s, val, level);
5528         }
5529         break;
5530     case 0xc9: /* leave */
5531         gen_leave(s);
5532         break;
5533     case 0x06: /* push es */
5534     case 0x0e: /* push cs */
5535     case 0x16: /* push ss */
5536     case 0x1e: /* push ds */
5537         if (CODE64(s))
5538             goto illegal_op;
5539         gen_op_movl_T0_seg(s, b >> 3);
5540         gen_push_v(s, s->T0);
5541         break;
5542     case 0x1a0: /* push fs */
5543     case 0x1a8: /* push gs */
5544         gen_op_movl_T0_seg(s, (b >> 3) & 7);
5545         gen_push_v(s, s->T0);
5546         break;
5547     case 0x07: /* pop es */
5548     case 0x17: /* pop ss */
5549     case 0x1f: /* pop ds */
5550         if (CODE64(s))
5551             goto illegal_op;
5552         reg = b >> 3;
5553         ot = gen_pop_T0(s);
5554         gen_movl_seg_T0(s, reg);
5555         gen_pop_update(s, ot);
5556         /* Note that reg == R_SS in gen_movl_seg_T0 always sets is_jmp.  */
5557         if (s->base.is_jmp) {
5558             gen_jmp_im(s, s->pc - s->cs_base);
5559             if (reg == R_SS) {
5560                 s->flags &= ~HF_TF_MASK;
5561                 gen_eob_inhibit_irq(s, true);
5562             } else {
5563                 gen_eob(s);
5564             }
5565         }
5566         break;
5567     case 0x1a1: /* pop fs */
5568     case 0x1a9: /* pop gs */
5569         ot = gen_pop_T0(s);
5570         gen_movl_seg_T0(s, (b >> 3) & 7);
5571         gen_pop_update(s, ot);
5572         if (s->base.is_jmp) {
5573             gen_jmp_im(s, s->pc - s->cs_base);
5574             gen_eob(s);
5575         }
5576         break;
5577 
5578         /**************************/
5579         /* mov */
5580     case 0x88:
5581     case 0x89: /* mov Gv, Ev */
5582         ot = mo_b_d(b, dflag);
5583         modrm = x86_ldub_code(env, s);
5584         reg = ((modrm >> 3) & 7) | REX_R(s);
5585 
5586         /* generate a generic store */
5587         gen_ldst_modrm(env, s, modrm, ot, reg, 1);
5588         break;
5589     case 0xc6:
5590     case 0xc7: /* mov Ev, Iv */
5591         ot = mo_b_d(b, dflag);
5592         modrm = x86_ldub_code(env, s);
5593         mod = (modrm >> 6) & 3;
5594         if (mod != 3) {
5595             s->rip_offset = insn_const_size(ot);
5596             gen_lea_modrm(env, s, modrm);
5597         }
5598         val = insn_get(env, s, ot);
5599         tcg_gen_movi_tl(s->T0, val);
5600         if (mod != 3) {
5601             gen_op_st_v(s, ot, s->T0, s->A0);
5602         } else {
5603             gen_op_mov_reg_v(s, ot, (modrm & 7) | REX_B(s), s->T0);
5604         }
5605         break;
5606     case 0x8a:
5607     case 0x8b: /* mov Ev, Gv */
5608         ot = mo_b_d(b, dflag);
5609         modrm = x86_ldub_code(env, s);
5610         reg = ((modrm >> 3) & 7) | REX_R(s);
5611 
5612         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
5613         gen_op_mov_reg_v(s, ot, reg, s->T0);
5614         break;
5615     case 0x8e: /* mov seg, Gv */
5616         modrm = x86_ldub_code(env, s);
5617         reg = (modrm >> 3) & 7;
5618         if (reg >= 6 || reg == R_CS)
5619             goto illegal_op;
5620         gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
5621         gen_movl_seg_T0(s, reg);
5622         /* Note that reg == R_SS in gen_movl_seg_T0 always sets is_jmp.  */
5623         if (s->base.is_jmp) {
5624             gen_jmp_im(s, s->pc - s->cs_base);
5625             if (reg == R_SS) {
5626                 s->flags &= ~HF_TF_MASK;
5627                 gen_eob_inhibit_irq(s, true);
5628             } else {
5629                 gen_eob(s);
5630             }
5631         }
5632         break;
5633     case 0x8c: /* mov Gv, seg */
5634         modrm = x86_ldub_code(env, s);
5635         reg = (modrm >> 3) & 7;
5636         mod = (modrm >> 6) & 3;
5637         if (reg >= 6)
5638             goto illegal_op;
5639         gen_op_movl_T0_seg(s, reg);
5640         ot = mod == 3 ? dflag : MO_16;
5641         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
5642         break;
5643 
5644     case 0x1b6: /* movzbS Gv, Eb */
5645     case 0x1b7: /* movzwS Gv, Eb */
5646     case 0x1be: /* movsbS Gv, Eb */
5647     case 0x1bf: /* movswS Gv, Eb */
5648         {
5649             MemOp d_ot;
5650             MemOp s_ot;
5651 
5652             /* d_ot is the size of destination */
5653             d_ot = dflag;
5654             /* ot is the size of source */
5655             ot = (b & 1) + MO_8;
5656             /* s_ot is the sign+size of source */
5657             s_ot = b & 8 ? MO_SIGN | ot : ot;
5658 
5659             modrm = x86_ldub_code(env, s);
5660             reg = ((modrm >> 3) & 7) | REX_R(s);
5661             mod = (modrm >> 6) & 3;
5662             rm = (modrm & 7) | REX_B(s);
5663 
5664             if (mod == 3) {
5665                 if (s_ot == MO_SB && byte_reg_is_xH(s, rm)) {
5666                     tcg_gen_sextract_tl(s->T0, cpu_regs[rm - 4], 8, 8);
5667                 } else {
5668                     gen_op_mov_v_reg(s, ot, s->T0, rm);
5669                     switch (s_ot) {
5670                     case MO_UB:
5671                         tcg_gen_ext8u_tl(s->T0, s->T0);
5672                         break;
5673                     case MO_SB:
5674                         tcg_gen_ext8s_tl(s->T0, s->T0);
5675                         break;
5676                     case MO_UW:
5677                         tcg_gen_ext16u_tl(s->T0, s->T0);
5678                         break;
5679                     default:
5680                     case MO_SW:
5681                         tcg_gen_ext16s_tl(s->T0, s->T0);
5682                         break;
5683                     }
5684                 }
5685                 gen_op_mov_reg_v(s, d_ot, reg, s->T0);
5686             } else {
5687                 gen_lea_modrm(env, s, modrm);
5688                 gen_op_ld_v(s, s_ot, s->T0, s->A0);
5689                 gen_op_mov_reg_v(s, d_ot, reg, s->T0);
5690             }
5691         }
5692         break;
5693 
5694     case 0x8d: /* lea */
5695         modrm = x86_ldub_code(env, s);
5696         mod = (modrm >> 6) & 3;
5697         if (mod == 3)
5698             goto illegal_op;
5699         reg = ((modrm >> 3) & 7) | REX_R(s);
5700         {
5701             AddressParts a = gen_lea_modrm_0(env, s, modrm);
5702             TCGv ea = gen_lea_modrm_1(s, a);
5703             gen_lea_v_seg(s, s->aflag, ea, -1, -1);
5704             gen_op_mov_reg_v(s, dflag, reg, s->A0);
5705         }
5706         break;
5707 
5708     case 0xa0: /* mov EAX, Ov */
5709     case 0xa1:
5710     case 0xa2: /* mov Ov, EAX */
5711     case 0xa3:
5712         {
5713             target_ulong offset_addr;
5714 
5715             ot = mo_b_d(b, dflag);
5716             switch (s->aflag) {
5717 #ifdef TARGET_X86_64
5718             case MO_64:
5719                 offset_addr = x86_ldq_code(env, s);
5720                 break;
5721 #endif
5722             default:
5723                 offset_addr = insn_get(env, s, s->aflag);
5724                 break;
5725             }
5726             tcg_gen_movi_tl(s->A0, offset_addr);
5727             gen_add_A0_ds_seg(s);
5728             if ((b & 2) == 0) {
5729                 gen_op_ld_v(s, ot, s->T0, s->A0);
5730                 gen_op_mov_reg_v(s, ot, R_EAX, s->T0);
5731             } else {
5732                 gen_op_mov_v_reg(s, ot, s->T0, R_EAX);
5733                 gen_op_st_v(s, ot, s->T0, s->A0);
5734             }
5735         }
5736         break;
5737     case 0xd7: /* xlat */
5738         tcg_gen_mov_tl(s->A0, cpu_regs[R_EBX]);
5739         tcg_gen_ext8u_tl(s->T0, cpu_regs[R_EAX]);
5740         tcg_gen_add_tl(s->A0, s->A0, s->T0);
5741         gen_extu(s->aflag, s->A0);
5742         gen_add_A0_ds_seg(s);
5743         gen_op_ld_v(s, MO_8, s->T0, s->A0);
5744         gen_op_mov_reg_v(s, MO_8, R_EAX, s->T0);
5745         break;
5746     case 0xb0 ... 0xb7: /* mov R, Ib */
5747         val = insn_get(env, s, MO_8);
5748         tcg_gen_movi_tl(s->T0, val);
5749         gen_op_mov_reg_v(s, MO_8, (b & 7) | REX_B(s), s->T0);
5750         break;
5751     case 0xb8 ... 0xbf: /* mov R, Iv */
5752 #ifdef TARGET_X86_64
5753         if (dflag == MO_64) {
5754             uint64_t tmp;
5755             /* 64 bit case */
5756             tmp = x86_ldq_code(env, s);
5757             reg = (b & 7) | REX_B(s);
5758             tcg_gen_movi_tl(s->T0, tmp);
5759             gen_op_mov_reg_v(s, MO_64, reg, s->T0);
5760         } else
5761 #endif
5762         {
5763             ot = dflag;
5764             val = insn_get(env, s, ot);
5765             reg = (b & 7) | REX_B(s);
5766             tcg_gen_movi_tl(s->T0, val);
5767             gen_op_mov_reg_v(s, ot, reg, s->T0);
5768         }
5769         break;
5770 
5771     case 0x91 ... 0x97: /* xchg R, EAX */
5772     do_xchg_reg_eax:
5773         ot = dflag;
5774         reg = (b & 7) | REX_B(s);
5775         rm = R_EAX;
5776         goto do_xchg_reg;
5777     case 0x86:
5778     case 0x87: /* xchg Ev, Gv */
5779         ot = mo_b_d(b, dflag);
5780         modrm = x86_ldub_code(env, s);
5781         reg = ((modrm >> 3) & 7) | REX_R(s);
5782         mod = (modrm >> 6) & 3;
5783         if (mod == 3) {
5784             rm = (modrm & 7) | REX_B(s);
5785         do_xchg_reg:
5786             gen_op_mov_v_reg(s, ot, s->T0, reg);
5787             gen_op_mov_v_reg(s, ot, s->T1, rm);
5788             gen_op_mov_reg_v(s, ot, rm, s->T0);
5789             gen_op_mov_reg_v(s, ot, reg, s->T1);
5790         } else {
5791             gen_lea_modrm(env, s, modrm);
5792             gen_op_mov_v_reg(s, ot, s->T0, reg);
5793             /* for xchg, lock is implicit */
5794             tcg_gen_atomic_xchg_tl(s->T1, s->A0, s->T0,
5795                                    s->mem_index, ot | MO_LE);
5796             gen_op_mov_reg_v(s, ot, reg, s->T1);
5797         }
5798         break;
5799     case 0xc4: /* les Gv */
5800         /* In CODE64 this is VEX3; see above.  */
5801         op = R_ES;
5802         goto do_lxx;
5803     case 0xc5: /* lds Gv */
5804         /* In CODE64 this is VEX2; see above.  */
5805         op = R_DS;
5806         goto do_lxx;
5807     case 0x1b2: /* lss Gv */
5808         op = R_SS;
5809         goto do_lxx;
5810     case 0x1b4: /* lfs Gv */
5811         op = R_FS;
5812         goto do_lxx;
5813     case 0x1b5: /* lgs Gv */
5814         op = R_GS;
5815     do_lxx:
5816         ot = dflag != MO_16 ? MO_32 : MO_16;
5817         modrm = x86_ldub_code(env, s);
5818         reg = ((modrm >> 3) & 7) | REX_R(s);
5819         mod = (modrm >> 6) & 3;
5820         if (mod == 3)
5821             goto illegal_op;
5822         gen_lea_modrm(env, s, modrm);
5823         gen_op_ld_v(s, ot, s->T1, s->A0);
5824         gen_add_A0_im(s, 1 << ot);
5825         /* load the segment first to handle exceptions properly */
5826         gen_op_ld_v(s, MO_16, s->T0, s->A0);
5827         gen_movl_seg_T0(s, op);
5828         /* then put the data */
5829         gen_op_mov_reg_v(s, ot, reg, s->T1);
5830         if (s->base.is_jmp) {
5831             gen_jmp_im(s, s->pc - s->cs_base);
5832             gen_eob(s);
5833         }
5834         break;
5835 
5836         /************************/
5837         /* shifts */
5838     case 0xc0:
5839     case 0xc1:
5840         /* shift Ev,Ib */
5841         shift = 2;
5842     grp2:
5843         {
5844             ot = mo_b_d(b, dflag);
5845             modrm = x86_ldub_code(env, s);
5846             mod = (modrm >> 6) & 3;
5847             op = (modrm >> 3) & 7;
5848 
5849             if (mod != 3) {
5850                 if (shift == 2) {
5851                     s->rip_offset = 1;
5852                 }
5853                 gen_lea_modrm(env, s, modrm);
5854                 opreg = OR_TMP0;
5855             } else {
5856                 opreg = (modrm & 7) | REX_B(s);
5857             }
5858 
5859             /* simpler op */
5860             if (shift == 0) {
5861                 gen_shift(s, op, ot, opreg, OR_ECX);
5862             } else {
5863                 if (shift == 2) {
5864                     shift = x86_ldub_code(env, s);
5865                 }
5866                 gen_shifti(s, op, ot, opreg, shift);
5867             }
5868         }
5869         break;
5870     case 0xd0:
5871     case 0xd1:
5872         /* shift Ev,1 */
5873         shift = 1;
5874         goto grp2;
5875     case 0xd2:
5876     case 0xd3:
5877         /* shift Ev,cl */
5878         shift = 0;
5879         goto grp2;
5880 
5881     case 0x1a4: /* shld imm */
5882         op = 0;
5883         shift = 1;
5884         goto do_shiftd;
5885     case 0x1a5: /* shld cl */
5886         op = 0;
5887         shift = 0;
5888         goto do_shiftd;
5889     case 0x1ac: /* shrd imm */
5890         op = 1;
5891         shift = 1;
5892         goto do_shiftd;
5893     case 0x1ad: /* shrd cl */
5894         op = 1;
5895         shift = 0;
5896     do_shiftd:
5897         ot = dflag;
5898         modrm = x86_ldub_code(env, s);
5899         mod = (modrm >> 6) & 3;
5900         rm = (modrm & 7) | REX_B(s);
5901         reg = ((modrm >> 3) & 7) | REX_R(s);
5902         if (mod != 3) {
5903             gen_lea_modrm(env, s, modrm);
5904             opreg = OR_TMP0;
5905         } else {
5906             opreg = rm;
5907         }
5908         gen_op_mov_v_reg(s, ot, s->T1, reg);
5909 
5910         if (shift) {
5911             TCGv imm = tcg_const_tl(x86_ldub_code(env, s));
5912             gen_shiftd_rm_T1(s, ot, opreg, op, imm);
5913             tcg_temp_free(imm);
5914         } else {
5915             gen_shiftd_rm_T1(s, ot, opreg, op, cpu_regs[R_ECX]);
5916         }
5917         break;
5918 
5919         /************************/
5920         /* floats */
5921     case 0xd8 ... 0xdf:
5922         {
5923             bool update_fip = true;
5924 
5925             if (s->flags & (HF_EM_MASK | HF_TS_MASK)) {
5926                 /* if CR0.EM or CR0.TS are set, generate an FPU exception */
5927                 /* XXX: what to do if illegal op ? */
5928                 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
5929                 break;
5930             }
5931             modrm = x86_ldub_code(env, s);
5932             mod = (modrm >> 6) & 3;
5933             rm = modrm & 7;
5934             op = ((b & 7) << 3) | ((modrm >> 3) & 7);
5935             if (mod != 3) {
5936                 /* memory op */
5937                 AddressParts a = gen_lea_modrm_0(env, s, modrm);
5938                 TCGv ea = gen_lea_modrm_1(s, a);
5939                 TCGv last_addr = tcg_temp_new();
5940                 bool update_fdp = true;
5941 
5942                 tcg_gen_mov_tl(last_addr, ea);
5943                 gen_lea_v_seg(s, s->aflag, ea, a.def_seg, s->override);
5944 
5945                 switch (op) {
5946                 case 0x00 ... 0x07: /* fxxxs */
5947                 case 0x10 ... 0x17: /* fixxxl */
5948                 case 0x20 ... 0x27: /* fxxxl */
5949                 case 0x30 ... 0x37: /* fixxx */
5950                     {
5951                         int op1;
5952                         op1 = op & 7;
5953 
5954                         switch (op >> 4) {
5955                         case 0:
5956                             tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5957                                                 s->mem_index, MO_LEUL);
5958                             gen_helper_flds_FT0(cpu_env, s->tmp2_i32);
5959                             break;
5960                         case 1:
5961                             tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5962                                                 s->mem_index, MO_LEUL);
5963                             gen_helper_fildl_FT0(cpu_env, s->tmp2_i32);
5964                             break;
5965                         case 2:
5966                             tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
5967                                                 s->mem_index, MO_LEQ);
5968                             gen_helper_fldl_FT0(cpu_env, s->tmp1_i64);
5969                             break;
5970                         case 3:
5971                         default:
5972                             tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5973                                                 s->mem_index, MO_LESW);
5974                             gen_helper_fildl_FT0(cpu_env, s->tmp2_i32);
5975                             break;
5976                         }
5977 
5978                         gen_helper_fp_arith_ST0_FT0(op1);
5979                         if (op1 == 3) {
5980                             /* fcomp needs pop */
5981                             gen_helper_fpop(cpu_env);
5982                         }
5983                     }
5984                     break;
5985                 case 0x08: /* flds */
5986                 case 0x0a: /* fsts */
5987                 case 0x0b: /* fstps */
5988                 case 0x18 ... 0x1b: /* fildl, fisttpl, fistl, fistpl */
5989                 case 0x28 ... 0x2b: /* fldl, fisttpll, fstl, fstpl */
5990                 case 0x38 ... 0x3b: /* filds, fisttps, fists, fistps */
5991                     switch (op & 7) {
5992                     case 0:
5993                         switch (op >> 4) {
5994                         case 0:
5995                             tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5996                                                 s->mem_index, MO_LEUL);
5997                             gen_helper_flds_ST0(cpu_env, s->tmp2_i32);
5998                             break;
5999                         case 1:
6000                             tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
6001                                                 s->mem_index, MO_LEUL);
6002                             gen_helper_fildl_ST0(cpu_env, s->tmp2_i32);
6003                             break;
6004                         case 2:
6005                             tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
6006                                                 s->mem_index, MO_LEQ);
6007                             gen_helper_fldl_ST0(cpu_env, s->tmp1_i64);
6008                             break;
6009                         case 3:
6010                         default:
6011                             tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
6012                                                 s->mem_index, MO_LESW);
6013                             gen_helper_fildl_ST0(cpu_env, s->tmp2_i32);
6014                             break;
6015                         }
6016                         break;
6017                     case 1:
6018                         /* XXX: the corresponding CPUID bit must be tested ! */
6019                         switch (op >> 4) {
6020                         case 1:
6021                             gen_helper_fisttl_ST0(s->tmp2_i32, cpu_env);
6022                             tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6023                                                 s->mem_index, MO_LEUL);
6024                             break;
6025                         case 2:
6026                             gen_helper_fisttll_ST0(s->tmp1_i64, cpu_env);
6027                             tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
6028                                                 s->mem_index, MO_LEQ);
6029                             break;
6030                         case 3:
6031                         default:
6032                             gen_helper_fistt_ST0(s->tmp2_i32, cpu_env);
6033                             tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6034                                                 s->mem_index, MO_LEUW);
6035                             break;
6036                         }
6037                         gen_helper_fpop(cpu_env);
6038                         break;
6039                     default:
6040                         switch (op >> 4) {
6041                         case 0:
6042                             gen_helper_fsts_ST0(s->tmp2_i32, cpu_env);
6043                             tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6044                                                 s->mem_index, MO_LEUL);
6045                             break;
6046                         case 1:
6047                             gen_helper_fistl_ST0(s->tmp2_i32, cpu_env);
6048                             tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6049                                                 s->mem_index, MO_LEUL);
6050                             break;
6051                         case 2:
6052                             gen_helper_fstl_ST0(s->tmp1_i64, cpu_env);
6053                             tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
6054                                                 s->mem_index, MO_LEQ);
6055                             break;
6056                         case 3:
6057                         default:
6058                             gen_helper_fist_ST0(s->tmp2_i32, cpu_env);
6059                             tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6060                                                 s->mem_index, MO_LEUW);
6061                             break;
6062                         }
6063                         if ((op & 7) == 3) {
6064                             gen_helper_fpop(cpu_env);
6065                         }
6066                         break;
6067                     }
6068                     break;
6069                 case 0x0c: /* fldenv mem */
6070                     gen_helper_fldenv(cpu_env, s->A0,
6071                                       tcg_const_i32(dflag - 1));
6072                     update_fip = update_fdp = false;
6073                     break;
6074                 case 0x0d: /* fldcw mem */
6075                     tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
6076                                         s->mem_index, MO_LEUW);
6077                     gen_helper_fldcw(cpu_env, s->tmp2_i32);
6078                     update_fip = update_fdp = false;
6079                     break;
6080                 case 0x0e: /* fnstenv mem */
6081                     gen_helper_fstenv(cpu_env, s->A0,
6082                                       tcg_const_i32(dflag - 1));
6083                     update_fip = update_fdp = false;
6084                     break;
6085                 case 0x0f: /* fnstcw mem */
6086                     gen_helper_fnstcw(s->tmp2_i32, cpu_env);
6087                     tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6088                                         s->mem_index, MO_LEUW);
6089                     update_fip = update_fdp = false;
6090                     break;
6091                 case 0x1d: /* fldt mem */
6092                     gen_helper_fldt_ST0(cpu_env, s->A0);
6093                     break;
6094                 case 0x1f: /* fstpt mem */
6095                     gen_helper_fstt_ST0(cpu_env, s->A0);
6096                     gen_helper_fpop(cpu_env);
6097                     break;
6098                 case 0x2c: /* frstor mem */
6099                     gen_helper_frstor(cpu_env, s->A0,
6100                                       tcg_const_i32(dflag - 1));
6101                     update_fip = update_fdp = false;
6102                     break;
6103                 case 0x2e: /* fnsave mem */
6104                     gen_helper_fsave(cpu_env, s->A0,
6105                                      tcg_const_i32(dflag - 1));
6106                     update_fip = update_fdp = false;
6107                     break;
6108                 case 0x2f: /* fnstsw mem */
6109                     gen_helper_fnstsw(s->tmp2_i32, cpu_env);
6110                     tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6111                                         s->mem_index, MO_LEUW);
6112                     update_fip = update_fdp = false;
6113                     break;
6114                 case 0x3c: /* fbld */
6115                     gen_helper_fbld_ST0(cpu_env, s->A0);
6116                     break;
6117                 case 0x3e: /* fbstp */
6118                     gen_helper_fbst_ST0(cpu_env, s->A0);
6119                     gen_helper_fpop(cpu_env);
6120                     break;
6121                 case 0x3d: /* fildll */
6122                     tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
6123                                         s->mem_index, MO_LEQ);
6124                     gen_helper_fildll_ST0(cpu_env, s->tmp1_i64);
6125                     break;
6126                 case 0x3f: /* fistpll */
6127                     gen_helper_fistll_ST0(s->tmp1_i64, cpu_env);
6128                     tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
6129                                         s->mem_index, MO_LEQ);
6130                     gen_helper_fpop(cpu_env);
6131                     break;
6132                 default:
6133                     goto unknown_op;
6134                 }
6135 
6136                 if (update_fdp) {
6137                     int last_seg = s->override >= 0 ? s->override : a.def_seg;
6138 
6139                     tcg_gen_ld_i32(s->tmp2_i32, cpu_env,
6140                                    offsetof(CPUX86State,
6141                                             segs[last_seg].selector));
6142                     tcg_gen_st16_i32(s->tmp2_i32, cpu_env,
6143                                      offsetof(CPUX86State, fpds));
6144                     tcg_gen_st_tl(last_addr, cpu_env,
6145                                   offsetof(CPUX86State, fpdp));
6146                 }
6147                 tcg_temp_free(last_addr);
6148             } else {
6149                 /* register float ops */
6150                 opreg = rm;
6151 
6152                 switch (op) {
6153                 case 0x08: /* fld sti */
6154                     gen_helper_fpush(cpu_env);
6155                     gen_helper_fmov_ST0_STN(cpu_env,
6156                                             tcg_const_i32((opreg + 1) & 7));
6157                     break;
6158                 case 0x09: /* fxchg sti */
6159                 case 0x29: /* fxchg4 sti, undocumented op */
6160                 case 0x39: /* fxchg7 sti, undocumented op */
6161                     gen_helper_fxchg_ST0_STN(cpu_env, tcg_const_i32(opreg));
6162                     break;
6163                 case 0x0a: /* grp d9/2 */
6164                     switch (rm) {
6165                     case 0: /* fnop */
6166                         /* check exceptions (FreeBSD FPU probe) */
6167                         gen_helper_fwait(cpu_env);
6168                         update_fip = false;
6169                         break;
6170                     default:
6171                         goto unknown_op;
6172                     }
6173                     break;
6174                 case 0x0c: /* grp d9/4 */
6175                     switch (rm) {
6176                     case 0: /* fchs */
6177                         gen_helper_fchs_ST0(cpu_env);
6178                         break;
6179                     case 1: /* fabs */
6180                         gen_helper_fabs_ST0(cpu_env);
6181                         break;
6182                     case 4: /* ftst */
6183                         gen_helper_fldz_FT0(cpu_env);
6184                         gen_helper_fcom_ST0_FT0(cpu_env);
6185                         break;
6186                     case 5: /* fxam */
6187                         gen_helper_fxam_ST0(cpu_env);
6188                         break;
6189                     default:
6190                         goto unknown_op;
6191                     }
6192                     break;
6193                 case 0x0d: /* grp d9/5 */
6194                     {
6195                         switch (rm) {
6196                         case 0:
6197                             gen_helper_fpush(cpu_env);
6198                             gen_helper_fld1_ST0(cpu_env);
6199                             break;
6200                         case 1:
6201                             gen_helper_fpush(cpu_env);
6202                             gen_helper_fldl2t_ST0(cpu_env);
6203                             break;
6204                         case 2:
6205                             gen_helper_fpush(cpu_env);
6206                             gen_helper_fldl2e_ST0(cpu_env);
6207                             break;
6208                         case 3:
6209                             gen_helper_fpush(cpu_env);
6210                             gen_helper_fldpi_ST0(cpu_env);
6211                             break;
6212                         case 4:
6213                             gen_helper_fpush(cpu_env);
6214                             gen_helper_fldlg2_ST0(cpu_env);
6215                             break;
6216                         case 5:
6217                             gen_helper_fpush(cpu_env);
6218                             gen_helper_fldln2_ST0(cpu_env);
6219                             break;
6220                         case 6:
6221                             gen_helper_fpush(cpu_env);
6222                             gen_helper_fldz_ST0(cpu_env);
6223                             break;
6224                         default:
6225                             goto unknown_op;
6226                         }
6227                     }
6228                     break;
6229                 case 0x0e: /* grp d9/6 */
6230                     switch (rm) {
6231                     case 0: /* f2xm1 */
6232                         gen_helper_f2xm1(cpu_env);
6233                         break;
6234                     case 1: /* fyl2x */
6235                         gen_helper_fyl2x(cpu_env);
6236                         break;
6237                     case 2: /* fptan */
6238                         gen_helper_fptan(cpu_env);
6239                         break;
6240                     case 3: /* fpatan */
6241                         gen_helper_fpatan(cpu_env);
6242                         break;
6243                     case 4: /* fxtract */
6244                         gen_helper_fxtract(cpu_env);
6245                         break;
6246                     case 5: /* fprem1 */
6247                         gen_helper_fprem1(cpu_env);
6248                         break;
6249                     case 6: /* fdecstp */
6250                         gen_helper_fdecstp(cpu_env);
6251                         break;
6252                     default:
6253                     case 7: /* fincstp */
6254                         gen_helper_fincstp(cpu_env);
6255                         break;
6256                     }
6257                     break;
6258                 case 0x0f: /* grp d9/7 */
6259                     switch (rm) {
6260                     case 0: /* fprem */
6261                         gen_helper_fprem(cpu_env);
6262                         break;
6263                     case 1: /* fyl2xp1 */
6264                         gen_helper_fyl2xp1(cpu_env);
6265                         break;
6266                     case 2: /* fsqrt */
6267                         gen_helper_fsqrt(cpu_env);
6268                         break;
6269                     case 3: /* fsincos */
6270                         gen_helper_fsincos(cpu_env);
6271                         break;
6272                     case 5: /* fscale */
6273                         gen_helper_fscale(cpu_env);
6274                         break;
6275                     case 4: /* frndint */
6276                         gen_helper_frndint(cpu_env);
6277                         break;
6278                     case 6: /* fsin */
6279                         gen_helper_fsin(cpu_env);
6280                         break;
6281                     default:
6282                     case 7: /* fcos */
6283                         gen_helper_fcos(cpu_env);
6284                         break;
6285                     }
6286                     break;
6287                 case 0x00: case 0x01: case 0x04 ... 0x07: /* fxxx st, sti */
6288                 case 0x20: case 0x21: case 0x24 ... 0x27: /* fxxx sti, st */
6289                 case 0x30: case 0x31: case 0x34 ... 0x37: /* fxxxp sti, st */
6290                     {
6291                         int op1;
6292 
6293                         op1 = op & 7;
6294                         if (op >= 0x20) {
6295                             gen_helper_fp_arith_STN_ST0(op1, opreg);
6296                             if (op >= 0x30) {
6297                                 gen_helper_fpop(cpu_env);
6298                             }
6299                         } else {
6300                             gen_helper_fmov_FT0_STN(cpu_env,
6301                                                     tcg_const_i32(opreg));
6302                             gen_helper_fp_arith_ST0_FT0(op1);
6303                         }
6304                     }
6305                     break;
6306                 case 0x02: /* fcom */
6307                 case 0x22: /* fcom2, undocumented op */
6308                     gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6309                     gen_helper_fcom_ST0_FT0(cpu_env);
6310                     break;
6311                 case 0x03: /* fcomp */
6312                 case 0x23: /* fcomp3, undocumented op */
6313                 case 0x32: /* fcomp5, undocumented op */
6314                     gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6315                     gen_helper_fcom_ST0_FT0(cpu_env);
6316                     gen_helper_fpop(cpu_env);
6317                     break;
6318                 case 0x15: /* da/5 */
6319                     switch (rm) {
6320                     case 1: /* fucompp */
6321                         gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(1));
6322                         gen_helper_fucom_ST0_FT0(cpu_env);
6323                         gen_helper_fpop(cpu_env);
6324                         gen_helper_fpop(cpu_env);
6325                         break;
6326                     default:
6327                         goto unknown_op;
6328                     }
6329                     break;
6330                 case 0x1c:
6331                     switch (rm) {
6332                     case 0: /* feni (287 only, just do nop here) */
6333                         break;
6334                     case 1: /* fdisi (287 only, just do nop here) */
6335                         break;
6336                     case 2: /* fclex */
6337                         gen_helper_fclex(cpu_env);
6338                         update_fip = false;
6339                         break;
6340                     case 3: /* fninit */
6341                         gen_helper_fninit(cpu_env);
6342                         update_fip = false;
6343                         break;
6344                     case 4: /* fsetpm (287 only, just do nop here) */
6345                         break;
6346                     default:
6347                         goto unknown_op;
6348                     }
6349                     break;
6350                 case 0x1d: /* fucomi */
6351                     if (!(s->cpuid_features & CPUID_CMOV)) {
6352                         goto illegal_op;
6353                     }
6354                     gen_update_cc_op(s);
6355                     gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6356                     gen_helper_fucomi_ST0_FT0(cpu_env);
6357                     set_cc_op(s, CC_OP_EFLAGS);
6358                     break;
6359                 case 0x1e: /* fcomi */
6360                     if (!(s->cpuid_features & CPUID_CMOV)) {
6361                         goto illegal_op;
6362                     }
6363                     gen_update_cc_op(s);
6364                     gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6365                     gen_helper_fcomi_ST0_FT0(cpu_env);
6366                     set_cc_op(s, CC_OP_EFLAGS);
6367                     break;
6368                 case 0x28: /* ffree sti */
6369                     gen_helper_ffree_STN(cpu_env, tcg_const_i32(opreg));
6370                     break;
6371                 case 0x2a: /* fst sti */
6372                     gen_helper_fmov_STN_ST0(cpu_env, tcg_const_i32(opreg));
6373                     break;
6374                 case 0x2b: /* fstp sti */
6375                 case 0x0b: /* fstp1 sti, undocumented op */
6376                 case 0x3a: /* fstp8 sti, undocumented op */
6377                 case 0x3b: /* fstp9 sti, undocumented op */
6378                     gen_helper_fmov_STN_ST0(cpu_env, tcg_const_i32(opreg));
6379                     gen_helper_fpop(cpu_env);
6380                     break;
6381                 case 0x2c: /* fucom st(i) */
6382                     gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6383                     gen_helper_fucom_ST0_FT0(cpu_env);
6384                     break;
6385                 case 0x2d: /* fucomp st(i) */
6386                     gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6387                     gen_helper_fucom_ST0_FT0(cpu_env);
6388                     gen_helper_fpop(cpu_env);
6389                     break;
6390                 case 0x33: /* de/3 */
6391                     switch (rm) {
6392                     case 1: /* fcompp */
6393                         gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(1));
6394                         gen_helper_fcom_ST0_FT0(cpu_env);
6395                         gen_helper_fpop(cpu_env);
6396                         gen_helper_fpop(cpu_env);
6397                         break;
6398                     default:
6399                         goto unknown_op;
6400                     }
6401                     break;
6402                 case 0x38: /* ffreep sti, undocumented op */
6403                     gen_helper_ffree_STN(cpu_env, tcg_const_i32(opreg));
6404                     gen_helper_fpop(cpu_env);
6405                     break;
6406                 case 0x3c: /* df/4 */
6407                     switch (rm) {
6408                     case 0:
6409                         gen_helper_fnstsw(s->tmp2_i32, cpu_env);
6410                         tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
6411                         gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
6412                         break;
6413                     default:
6414                         goto unknown_op;
6415                     }
6416                     break;
6417                 case 0x3d: /* fucomip */
6418                     if (!(s->cpuid_features & CPUID_CMOV)) {
6419                         goto illegal_op;
6420                     }
6421                     gen_update_cc_op(s);
6422                     gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6423                     gen_helper_fucomi_ST0_FT0(cpu_env);
6424                     gen_helper_fpop(cpu_env);
6425                     set_cc_op(s, CC_OP_EFLAGS);
6426                     break;
6427                 case 0x3e: /* fcomip */
6428                     if (!(s->cpuid_features & CPUID_CMOV)) {
6429                         goto illegal_op;
6430                     }
6431                     gen_update_cc_op(s);
6432                     gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6433                     gen_helper_fcomi_ST0_FT0(cpu_env);
6434                     gen_helper_fpop(cpu_env);
6435                     set_cc_op(s, CC_OP_EFLAGS);
6436                     break;
6437                 case 0x10 ... 0x13: /* fcmovxx */
6438                 case 0x18 ... 0x1b:
6439                     {
6440                         int op1;
6441                         TCGLabel *l1;
6442                         static const uint8_t fcmov_cc[8] = {
6443                             (JCC_B << 1),
6444                             (JCC_Z << 1),
6445                             (JCC_BE << 1),
6446                             (JCC_P << 1),
6447                         };
6448 
6449                         if (!(s->cpuid_features & CPUID_CMOV)) {
6450                             goto illegal_op;
6451                         }
6452                         op1 = fcmov_cc[op & 3] | (((op >> 3) & 1) ^ 1);
6453                         l1 = gen_new_label();
6454                         gen_jcc1_noeob(s, op1, l1);
6455                         gen_helper_fmov_ST0_STN(cpu_env, tcg_const_i32(opreg));
6456                         gen_set_label(l1);
6457                     }
6458                     break;
6459                 default:
6460                     goto unknown_op;
6461                 }
6462             }
6463 
6464             if (update_fip) {
6465                 tcg_gen_ld_i32(s->tmp2_i32, cpu_env,
6466                                offsetof(CPUX86State, segs[R_CS].selector));
6467                 tcg_gen_st16_i32(s->tmp2_i32, cpu_env,
6468                                  offsetof(CPUX86State, fpcs));
6469                 tcg_gen_st_tl(tcg_constant_tl(pc_start - s->cs_base),
6470                               cpu_env, offsetof(CPUX86State, fpip));
6471             }
6472         }
6473         break;
6474         /************************/
6475         /* string ops */
6476 
6477     case 0xa4: /* movsS */
6478     case 0xa5:
6479         ot = mo_b_d(b, dflag);
6480         if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6481             gen_repz_movs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6482         } else {
6483             gen_movs(s, ot);
6484         }
6485         break;
6486 
6487     case 0xaa: /* stosS */
6488     case 0xab:
6489         ot = mo_b_d(b, dflag);
6490         if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6491             gen_repz_stos(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6492         } else {
6493             gen_stos(s, ot);
6494         }
6495         break;
6496     case 0xac: /* lodsS */
6497     case 0xad:
6498         ot = mo_b_d(b, dflag);
6499         if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6500             gen_repz_lods(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6501         } else {
6502             gen_lods(s, ot);
6503         }
6504         break;
6505     case 0xae: /* scasS */
6506     case 0xaf:
6507         ot = mo_b_d(b, dflag);
6508         if (prefixes & PREFIX_REPNZ) {
6509             gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1);
6510         } else if (prefixes & PREFIX_REPZ) {
6511             gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0);
6512         } else {
6513             gen_scas(s, ot);
6514         }
6515         break;
6516 
6517     case 0xa6: /* cmpsS */
6518     case 0xa7:
6519         ot = mo_b_d(b, dflag);
6520         if (prefixes & PREFIX_REPNZ) {
6521             gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1);
6522         } else if (prefixes & PREFIX_REPZ) {
6523             gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0);
6524         } else {
6525             gen_cmps(s, ot);
6526         }
6527         break;
6528     case 0x6c: /* insS */
6529     case 0x6d:
6530         ot = mo_b_d32(b, dflag);
6531         tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
6532         tcg_gen_ext16u_i32(s->tmp2_i32, s->tmp2_i32);
6533         if (!gen_check_io(s, ot, s->tmp2_i32,
6534                           SVM_IOIO_TYPE_MASK | SVM_IOIO_STR_MASK)) {
6535             break;
6536         }
6537         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6538             gen_io_start();
6539         }
6540         if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6541             gen_repz_ins(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6542             /* jump generated by gen_repz_ins */
6543         } else {
6544             gen_ins(s, ot);
6545             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6546                 gen_jmp(s, s->pc - s->cs_base);
6547             }
6548         }
6549         break;
6550     case 0x6e: /* outsS */
6551     case 0x6f:
6552         ot = mo_b_d32(b, dflag);
6553         tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
6554         tcg_gen_ext16u_i32(s->tmp2_i32, s->tmp2_i32);
6555         if (!gen_check_io(s, ot, s->tmp2_i32, SVM_IOIO_STR_MASK)) {
6556             break;
6557         }
6558         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6559             gen_io_start();
6560         }
6561         if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6562             gen_repz_outs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6563             /* jump generated by gen_repz_outs */
6564         } else {
6565             gen_outs(s, ot);
6566             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6567                 gen_jmp(s, s->pc - s->cs_base);
6568             }
6569         }
6570         break;
6571 
6572         /************************/
6573         /* port I/O */
6574 
6575     case 0xe4:
6576     case 0xe5:
6577         ot = mo_b_d32(b, dflag);
6578         val = x86_ldub_code(env, s);
6579         tcg_gen_movi_i32(s->tmp2_i32, val);
6580         if (!gen_check_io(s, ot, s->tmp2_i32, SVM_IOIO_TYPE_MASK)) {
6581             break;
6582         }
6583         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6584             gen_io_start();
6585         }
6586         gen_helper_in_func(ot, s->T1, s->tmp2_i32);
6587         gen_op_mov_reg_v(s, ot, R_EAX, s->T1);
6588         gen_bpt_io(s, s->tmp2_i32, ot);
6589         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6590             gen_jmp(s, s->pc - s->cs_base);
6591         }
6592         break;
6593     case 0xe6:
6594     case 0xe7:
6595         ot = mo_b_d32(b, dflag);
6596         val = x86_ldub_code(env, s);
6597         tcg_gen_movi_i32(s->tmp2_i32, val);
6598         if (!gen_check_io(s, ot, s->tmp2_i32, 0)) {
6599             break;
6600         }
6601         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6602             gen_io_start();
6603         }
6604         gen_op_mov_v_reg(s, ot, s->T1, R_EAX);
6605         tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
6606         gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32);
6607         gen_bpt_io(s, s->tmp2_i32, ot);
6608         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6609             gen_jmp(s, s->pc - s->cs_base);
6610         }
6611         break;
6612     case 0xec:
6613     case 0xed:
6614         ot = mo_b_d32(b, dflag);
6615         tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
6616         tcg_gen_ext16u_i32(s->tmp2_i32, s->tmp2_i32);
6617         if (!gen_check_io(s, ot, s->tmp2_i32, SVM_IOIO_TYPE_MASK)) {
6618             break;
6619         }
6620         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6621             gen_io_start();
6622         }
6623         gen_helper_in_func(ot, s->T1, s->tmp2_i32);
6624         gen_op_mov_reg_v(s, ot, R_EAX, s->T1);
6625         gen_bpt_io(s, s->tmp2_i32, ot);
6626         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6627             gen_jmp(s, s->pc - s->cs_base);
6628         }
6629         break;
6630     case 0xee:
6631     case 0xef:
6632         ot = mo_b_d32(b, dflag);
6633         tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
6634         tcg_gen_ext16u_i32(s->tmp2_i32, s->tmp2_i32);
6635         if (!gen_check_io(s, ot, s->tmp2_i32, 0)) {
6636             break;
6637         }
6638         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6639             gen_io_start();
6640         }
6641         gen_op_mov_v_reg(s, ot, s->T1, R_EAX);
6642         tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
6643         gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32);
6644         gen_bpt_io(s, s->tmp2_i32, ot);
6645         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6646             gen_jmp(s, s->pc - s->cs_base);
6647         }
6648         break;
6649 
6650         /************************/
6651         /* control */
6652     case 0xc2: /* ret im */
6653         val = x86_ldsw_code(env, s);
6654         ot = gen_pop_T0(s);
6655         gen_stack_update(s, val + (1 << ot));
6656         /* Note that gen_pop_T0 uses a zero-extending load.  */
6657         gen_op_jmp_v(s->T0);
6658         gen_bnd_jmp(s);
6659         gen_jr(s, s->T0);
6660         break;
6661     case 0xc3: /* ret */
6662         ot = gen_pop_T0(s);
6663         gen_pop_update(s, ot);
6664         /* Note that gen_pop_T0 uses a zero-extending load.  */
6665         gen_op_jmp_v(s->T0);
6666         gen_bnd_jmp(s);
6667         gen_jr(s, s->T0);
6668         break;
6669     case 0xca: /* lret im */
6670         val = x86_ldsw_code(env, s);
6671     do_lret:
6672         if (PE(s) && !VM86(s)) {
6673             gen_update_cc_op(s);
6674             gen_jmp_im(s, pc_start - s->cs_base);
6675             gen_helper_lret_protected(cpu_env, tcg_const_i32(dflag - 1),
6676                                       tcg_const_i32(val));
6677         } else {
6678             gen_stack_A0(s);
6679             /* pop offset */
6680             gen_op_ld_v(s, dflag, s->T0, s->A0);
6681             /* NOTE: keeping EIP updated is not a problem in case of
6682                exception */
6683             gen_op_jmp_v(s->T0);
6684             /* pop selector */
6685             gen_add_A0_im(s, 1 << dflag);
6686             gen_op_ld_v(s, dflag, s->T0, s->A0);
6687             gen_op_movl_seg_T0_vm(s, R_CS);
6688             /* add stack offset */
6689             gen_stack_update(s, val + (2 << dflag));
6690         }
6691         gen_eob(s);
6692         break;
6693     case 0xcb: /* lret */
6694         val = 0;
6695         goto do_lret;
6696     case 0xcf: /* iret */
6697         gen_svm_check_intercept(s, SVM_EXIT_IRET);
6698         if (!PE(s) || VM86(s)) {
6699             /* real mode or vm86 mode */
6700             if (!check_vm86_iopl(s)) {
6701                 break;
6702             }
6703             gen_helper_iret_real(cpu_env, tcg_const_i32(dflag - 1));
6704         } else {
6705             gen_helper_iret_protected(cpu_env, tcg_const_i32(dflag - 1),
6706                                       tcg_const_i32(s->pc - s->cs_base));
6707         }
6708         set_cc_op(s, CC_OP_EFLAGS);
6709         gen_eob(s);
6710         break;
6711     case 0xe8: /* call im */
6712         {
6713             if (dflag != MO_16) {
6714                 tval = (int32_t)insn_get(env, s, MO_32);
6715             } else {
6716                 tval = (int16_t)insn_get(env, s, MO_16);
6717             }
6718             next_eip = s->pc - s->cs_base;
6719             tval += next_eip;
6720             if (dflag == MO_16) {
6721                 tval &= 0xffff;
6722             } else if (!CODE64(s)) {
6723                 tval &= 0xffffffff;
6724             }
6725             tcg_gen_movi_tl(s->T0, next_eip);
6726             gen_push_v(s, s->T0);
6727             gen_bnd_jmp(s);
6728             gen_jmp(s, tval);
6729         }
6730         break;
6731     case 0x9a: /* lcall im */
6732         {
6733             unsigned int selector, offset;
6734 
6735             if (CODE64(s))
6736                 goto illegal_op;
6737             ot = dflag;
6738             offset = insn_get(env, s, ot);
6739             selector = insn_get(env, s, MO_16);
6740 
6741             tcg_gen_movi_tl(s->T0, selector);
6742             tcg_gen_movi_tl(s->T1, offset);
6743         }
6744         goto do_lcall;
6745     case 0xe9: /* jmp im */
6746         if (dflag != MO_16) {
6747             tval = (int32_t)insn_get(env, s, MO_32);
6748         } else {
6749             tval = (int16_t)insn_get(env, s, MO_16);
6750         }
6751         tval += s->pc - s->cs_base;
6752         if (dflag == MO_16) {
6753             tval &= 0xffff;
6754         } else if (!CODE64(s)) {
6755             tval &= 0xffffffff;
6756         }
6757         gen_bnd_jmp(s);
6758         gen_jmp(s, tval);
6759         break;
6760     case 0xea: /* ljmp im */
6761         {
6762             unsigned int selector, offset;
6763 
6764             if (CODE64(s))
6765                 goto illegal_op;
6766             ot = dflag;
6767             offset = insn_get(env, s, ot);
6768             selector = insn_get(env, s, MO_16);
6769 
6770             tcg_gen_movi_tl(s->T0, selector);
6771             tcg_gen_movi_tl(s->T1, offset);
6772         }
6773         goto do_ljmp;
6774     case 0xeb: /* jmp Jb */
6775         tval = (int8_t)insn_get(env, s, MO_8);
6776         tval += s->pc - s->cs_base;
6777         if (dflag == MO_16) {
6778             tval &= 0xffff;
6779         }
6780         gen_jmp(s, tval);
6781         break;
6782     case 0x70 ... 0x7f: /* jcc Jb */
6783         tval = (int8_t)insn_get(env, s, MO_8);
6784         goto do_jcc;
6785     case 0x180 ... 0x18f: /* jcc Jv */
6786         if (dflag != MO_16) {
6787             tval = (int32_t)insn_get(env, s, MO_32);
6788         } else {
6789             tval = (int16_t)insn_get(env, s, MO_16);
6790         }
6791     do_jcc:
6792         next_eip = s->pc - s->cs_base;
6793         tval += next_eip;
6794         if (dflag == MO_16) {
6795             tval &= 0xffff;
6796         }
6797         gen_bnd_jmp(s);
6798         gen_jcc(s, b, tval, next_eip);
6799         break;
6800 
6801     case 0x190 ... 0x19f: /* setcc Gv */
6802         modrm = x86_ldub_code(env, s);
6803         gen_setcc1(s, b, s->T0);
6804         gen_ldst_modrm(env, s, modrm, MO_8, OR_TMP0, 1);
6805         break;
6806     case 0x140 ... 0x14f: /* cmov Gv, Ev */
6807         if (!(s->cpuid_features & CPUID_CMOV)) {
6808             goto illegal_op;
6809         }
6810         ot = dflag;
6811         modrm = x86_ldub_code(env, s);
6812         reg = ((modrm >> 3) & 7) | REX_R(s);
6813         gen_cmovcc1(env, s, ot, b, modrm, reg);
6814         break;
6815 
6816         /************************/
6817         /* flags */
6818     case 0x9c: /* pushf */
6819         gen_svm_check_intercept(s, SVM_EXIT_PUSHF);
6820         if (check_vm86_iopl(s)) {
6821             gen_update_cc_op(s);
6822             gen_helper_read_eflags(s->T0, cpu_env);
6823             gen_push_v(s, s->T0);
6824         }
6825         break;
6826     case 0x9d: /* popf */
6827         gen_svm_check_intercept(s, SVM_EXIT_POPF);
6828         if (check_vm86_iopl(s)) {
6829             ot = gen_pop_T0(s);
6830             if (CPL(s) == 0) {
6831                 if (dflag != MO_16) {
6832                     gen_helper_write_eflags(cpu_env, s->T0,
6833                                             tcg_const_i32((TF_MASK | AC_MASK |
6834                                                            ID_MASK | NT_MASK |
6835                                                            IF_MASK |
6836                                                            IOPL_MASK)));
6837                 } else {
6838                     gen_helper_write_eflags(cpu_env, s->T0,
6839                                             tcg_const_i32((TF_MASK | AC_MASK |
6840                                                            ID_MASK | NT_MASK |
6841                                                            IF_MASK | IOPL_MASK)
6842                                                           & 0xffff));
6843                 }
6844             } else {
6845                 if (CPL(s) <= IOPL(s)) {
6846                     if (dflag != MO_16) {
6847                         gen_helper_write_eflags(cpu_env, s->T0,
6848                                                 tcg_const_i32((TF_MASK |
6849                                                                AC_MASK |
6850                                                                ID_MASK |
6851                                                                NT_MASK |
6852                                                                IF_MASK)));
6853                     } else {
6854                         gen_helper_write_eflags(cpu_env, s->T0,
6855                                                 tcg_const_i32((TF_MASK |
6856                                                                AC_MASK |
6857                                                                ID_MASK |
6858                                                                NT_MASK |
6859                                                                IF_MASK)
6860                                                               & 0xffff));
6861                     }
6862                 } else {
6863                     if (dflag != MO_16) {
6864                         gen_helper_write_eflags(cpu_env, s->T0,
6865                                            tcg_const_i32((TF_MASK | AC_MASK |
6866                                                           ID_MASK | NT_MASK)));
6867                     } else {
6868                         gen_helper_write_eflags(cpu_env, s->T0,
6869                                            tcg_const_i32((TF_MASK | AC_MASK |
6870                                                           ID_MASK | NT_MASK)
6871                                                          & 0xffff));
6872                     }
6873                 }
6874             }
6875             gen_pop_update(s, ot);
6876             set_cc_op(s, CC_OP_EFLAGS);
6877             /* abort translation because TF/AC flag may change */
6878             gen_jmp_im(s, s->pc - s->cs_base);
6879             gen_eob(s);
6880         }
6881         break;
6882     case 0x9e: /* sahf */
6883         if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
6884             goto illegal_op;
6885         gen_op_mov_v_reg(s, MO_8, s->T0, R_AH);
6886         gen_compute_eflags(s);
6887         tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, CC_O);
6888         tcg_gen_andi_tl(s->T0, s->T0, CC_S | CC_Z | CC_A | CC_P | CC_C);
6889         tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, s->T0);
6890         break;
6891     case 0x9f: /* lahf */
6892         if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
6893             goto illegal_op;
6894         gen_compute_eflags(s);
6895         /* Note: gen_compute_eflags() only gives the condition codes */
6896         tcg_gen_ori_tl(s->T0, cpu_cc_src, 0x02);
6897         gen_op_mov_reg_v(s, MO_8, R_AH, s->T0);
6898         break;
6899     case 0xf5: /* cmc */
6900         gen_compute_eflags(s);
6901         tcg_gen_xori_tl(cpu_cc_src, cpu_cc_src, CC_C);
6902         break;
6903     case 0xf8: /* clc */
6904         gen_compute_eflags(s);
6905         tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_C);
6906         break;
6907     case 0xf9: /* stc */
6908         gen_compute_eflags(s);
6909         tcg_gen_ori_tl(cpu_cc_src, cpu_cc_src, CC_C);
6910         break;
6911     case 0xfc: /* cld */
6912         tcg_gen_movi_i32(s->tmp2_i32, 1);
6913         tcg_gen_st_i32(s->tmp2_i32, cpu_env, offsetof(CPUX86State, df));
6914         break;
6915     case 0xfd: /* std */
6916         tcg_gen_movi_i32(s->tmp2_i32, -1);
6917         tcg_gen_st_i32(s->tmp2_i32, cpu_env, offsetof(CPUX86State, df));
6918         break;
6919 
6920         /************************/
6921         /* bit operations */
6922     case 0x1ba: /* bt/bts/btr/btc Gv, im */
6923         ot = dflag;
6924         modrm = x86_ldub_code(env, s);
6925         op = (modrm >> 3) & 7;
6926         mod = (modrm >> 6) & 3;
6927         rm = (modrm & 7) | REX_B(s);
6928         if (mod != 3) {
6929             s->rip_offset = 1;
6930             gen_lea_modrm(env, s, modrm);
6931             if (!(s->prefix & PREFIX_LOCK)) {
6932                 gen_op_ld_v(s, ot, s->T0, s->A0);
6933             }
6934         } else {
6935             gen_op_mov_v_reg(s, ot, s->T0, rm);
6936         }
6937         /* load shift */
6938         val = x86_ldub_code(env, s);
6939         tcg_gen_movi_tl(s->T1, val);
6940         if (op < 4)
6941             goto unknown_op;
6942         op -= 4;
6943         goto bt_op;
6944     case 0x1a3: /* bt Gv, Ev */
6945         op = 0;
6946         goto do_btx;
6947     case 0x1ab: /* bts */
6948         op = 1;
6949         goto do_btx;
6950     case 0x1b3: /* btr */
6951         op = 2;
6952         goto do_btx;
6953     case 0x1bb: /* btc */
6954         op = 3;
6955     do_btx:
6956         ot = dflag;
6957         modrm = x86_ldub_code(env, s);
6958         reg = ((modrm >> 3) & 7) | REX_R(s);
6959         mod = (modrm >> 6) & 3;
6960         rm = (modrm & 7) | REX_B(s);
6961         gen_op_mov_v_reg(s, MO_32, s->T1, reg);
6962         if (mod != 3) {
6963             AddressParts a = gen_lea_modrm_0(env, s, modrm);
6964             /* specific case: we need to add a displacement */
6965             gen_exts(ot, s->T1);
6966             tcg_gen_sari_tl(s->tmp0, s->T1, 3 + ot);
6967             tcg_gen_shli_tl(s->tmp0, s->tmp0, ot);
6968             tcg_gen_add_tl(s->A0, gen_lea_modrm_1(s, a), s->tmp0);
6969             gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
6970             if (!(s->prefix & PREFIX_LOCK)) {
6971                 gen_op_ld_v(s, ot, s->T0, s->A0);
6972             }
6973         } else {
6974             gen_op_mov_v_reg(s, ot, s->T0, rm);
6975         }
6976     bt_op:
6977         tcg_gen_andi_tl(s->T1, s->T1, (1 << (3 + ot)) - 1);
6978         tcg_gen_movi_tl(s->tmp0, 1);
6979         tcg_gen_shl_tl(s->tmp0, s->tmp0, s->T1);
6980         if (s->prefix & PREFIX_LOCK) {
6981             switch (op) {
6982             case 0: /* bt */
6983                 /* Needs no atomic ops; we surpressed the normal
6984                    memory load for LOCK above so do it now.  */
6985                 gen_op_ld_v(s, ot, s->T0, s->A0);
6986                 break;
6987             case 1: /* bts */
6988                 tcg_gen_atomic_fetch_or_tl(s->T0, s->A0, s->tmp0,
6989                                            s->mem_index, ot | MO_LE);
6990                 break;
6991             case 2: /* btr */
6992                 tcg_gen_not_tl(s->tmp0, s->tmp0);
6993                 tcg_gen_atomic_fetch_and_tl(s->T0, s->A0, s->tmp0,
6994                                             s->mem_index, ot | MO_LE);
6995                 break;
6996             default:
6997             case 3: /* btc */
6998                 tcg_gen_atomic_fetch_xor_tl(s->T0, s->A0, s->tmp0,
6999                                             s->mem_index, ot | MO_LE);
7000                 break;
7001             }
7002             tcg_gen_shr_tl(s->tmp4, s->T0, s->T1);
7003         } else {
7004             tcg_gen_shr_tl(s->tmp4, s->T0, s->T1);
7005             switch (op) {
7006             case 0: /* bt */
7007                 /* Data already loaded; nothing to do.  */
7008                 break;
7009             case 1: /* bts */
7010                 tcg_gen_or_tl(s->T0, s->T0, s->tmp0);
7011                 break;
7012             case 2: /* btr */
7013                 tcg_gen_andc_tl(s->T0, s->T0, s->tmp0);
7014                 break;
7015             default:
7016             case 3: /* btc */
7017                 tcg_gen_xor_tl(s->T0, s->T0, s->tmp0);
7018                 break;
7019             }
7020             if (op != 0) {
7021                 if (mod != 3) {
7022                     gen_op_st_v(s, ot, s->T0, s->A0);
7023                 } else {
7024                     gen_op_mov_reg_v(s, ot, rm, s->T0);
7025                 }
7026             }
7027         }
7028 
7029         /* Delay all CC updates until after the store above.  Note that
7030            C is the result of the test, Z is unchanged, and the others
7031            are all undefined.  */
7032         switch (s->cc_op) {
7033         case CC_OP_MULB ... CC_OP_MULQ:
7034         case CC_OP_ADDB ... CC_OP_ADDQ:
7035         case CC_OP_ADCB ... CC_OP_ADCQ:
7036         case CC_OP_SUBB ... CC_OP_SUBQ:
7037         case CC_OP_SBBB ... CC_OP_SBBQ:
7038         case CC_OP_LOGICB ... CC_OP_LOGICQ:
7039         case CC_OP_INCB ... CC_OP_INCQ:
7040         case CC_OP_DECB ... CC_OP_DECQ:
7041         case CC_OP_SHLB ... CC_OP_SHLQ:
7042         case CC_OP_SARB ... CC_OP_SARQ:
7043         case CC_OP_BMILGB ... CC_OP_BMILGQ:
7044             /* Z was going to be computed from the non-zero status of CC_DST.
7045                We can get that same Z value (and the new C value) by leaving
7046                CC_DST alone, setting CC_SRC, and using a CC_OP_SAR of the
7047                same width.  */
7048             tcg_gen_mov_tl(cpu_cc_src, s->tmp4);
7049             set_cc_op(s, ((s->cc_op - CC_OP_MULB) & 3) + CC_OP_SARB);
7050             break;
7051         default:
7052             /* Otherwise, generate EFLAGS and replace the C bit.  */
7053             gen_compute_eflags(s);
7054             tcg_gen_deposit_tl(cpu_cc_src, cpu_cc_src, s->tmp4,
7055                                ctz32(CC_C), 1);
7056             break;
7057         }
7058         break;
7059     case 0x1bc: /* bsf / tzcnt */
7060     case 0x1bd: /* bsr / lzcnt */
7061         ot = dflag;
7062         modrm = x86_ldub_code(env, s);
7063         reg = ((modrm >> 3) & 7) | REX_R(s);
7064         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
7065         gen_extu(ot, s->T0);
7066 
7067         /* Note that lzcnt and tzcnt are in different extensions.  */
7068         if ((prefixes & PREFIX_REPZ)
7069             && (b & 1
7070                 ? s->cpuid_ext3_features & CPUID_EXT3_ABM
7071                 : s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)) {
7072             int size = 8 << ot;
7073             /* For lzcnt/tzcnt, C bit is defined related to the input. */
7074             tcg_gen_mov_tl(cpu_cc_src, s->T0);
7075             if (b & 1) {
7076                 /* For lzcnt, reduce the target_ulong result by the
7077                    number of zeros that we expect to find at the top.  */
7078                 tcg_gen_clzi_tl(s->T0, s->T0, TARGET_LONG_BITS);
7079                 tcg_gen_subi_tl(s->T0, s->T0, TARGET_LONG_BITS - size);
7080             } else {
7081                 /* For tzcnt, a zero input must return the operand size.  */
7082                 tcg_gen_ctzi_tl(s->T0, s->T0, size);
7083             }
7084             /* For lzcnt/tzcnt, Z bit is defined related to the result.  */
7085             gen_op_update1_cc(s);
7086             set_cc_op(s, CC_OP_BMILGB + ot);
7087         } else {
7088             /* For bsr/bsf, only the Z bit is defined and it is related
7089                to the input and not the result.  */
7090             tcg_gen_mov_tl(cpu_cc_dst, s->T0);
7091             set_cc_op(s, CC_OP_LOGICB + ot);
7092 
7093             /* ??? The manual says that the output is undefined when the
7094                input is zero, but real hardware leaves it unchanged, and
7095                real programs appear to depend on that.  Accomplish this
7096                by passing the output as the value to return upon zero.  */
7097             if (b & 1) {
7098                 /* For bsr, return the bit index of the first 1 bit,
7099                    not the count of leading zeros.  */
7100                 tcg_gen_xori_tl(s->T1, cpu_regs[reg], TARGET_LONG_BITS - 1);
7101                 tcg_gen_clz_tl(s->T0, s->T0, s->T1);
7102                 tcg_gen_xori_tl(s->T0, s->T0, TARGET_LONG_BITS - 1);
7103             } else {
7104                 tcg_gen_ctz_tl(s->T0, s->T0, cpu_regs[reg]);
7105             }
7106         }
7107         gen_op_mov_reg_v(s, ot, reg, s->T0);
7108         break;
7109         /************************/
7110         /* bcd */
7111     case 0x27: /* daa */
7112         if (CODE64(s))
7113             goto illegal_op;
7114         gen_update_cc_op(s);
7115         gen_helper_daa(cpu_env);
7116         set_cc_op(s, CC_OP_EFLAGS);
7117         break;
7118     case 0x2f: /* das */
7119         if (CODE64(s))
7120             goto illegal_op;
7121         gen_update_cc_op(s);
7122         gen_helper_das(cpu_env);
7123         set_cc_op(s, CC_OP_EFLAGS);
7124         break;
7125     case 0x37: /* aaa */
7126         if (CODE64(s))
7127             goto illegal_op;
7128         gen_update_cc_op(s);
7129         gen_helper_aaa(cpu_env);
7130         set_cc_op(s, CC_OP_EFLAGS);
7131         break;
7132     case 0x3f: /* aas */
7133         if (CODE64(s))
7134             goto illegal_op;
7135         gen_update_cc_op(s);
7136         gen_helper_aas(cpu_env);
7137         set_cc_op(s, CC_OP_EFLAGS);
7138         break;
7139     case 0xd4: /* aam */
7140         if (CODE64(s))
7141             goto illegal_op;
7142         val = x86_ldub_code(env, s);
7143         if (val == 0) {
7144             gen_exception(s, EXCP00_DIVZ, pc_start - s->cs_base);
7145         } else {
7146             gen_helper_aam(cpu_env, tcg_const_i32(val));
7147             set_cc_op(s, CC_OP_LOGICB);
7148         }
7149         break;
7150     case 0xd5: /* aad */
7151         if (CODE64(s))
7152             goto illegal_op;
7153         val = x86_ldub_code(env, s);
7154         gen_helper_aad(cpu_env, tcg_const_i32(val));
7155         set_cc_op(s, CC_OP_LOGICB);
7156         break;
7157         /************************/
7158         /* misc */
7159     case 0x90: /* nop */
7160         /* XXX: correct lock test for all insn */
7161         if (prefixes & PREFIX_LOCK) {
7162             goto illegal_op;
7163         }
7164         /* If REX_B is set, then this is xchg eax, r8d, not a nop.  */
7165         if (REX_B(s)) {
7166             goto do_xchg_reg_eax;
7167         }
7168         if (prefixes & PREFIX_REPZ) {
7169             gen_update_cc_op(s);
7170             gen_jmp_im(s, pc_start - s->cs_base);
7171             gen_helper_pause(cpu_env, tcg_const_i32(s->pc - pc_start));
7172             s->base.is_jmp = DISAS_NORETURN;
7173         }
7174         break;
7175     case 0x9b: /* fwait */
7176         if ((s->flags & (HF_MP_MASK | HF_TS_MASK)) ==
7177             (HF_MP_MASK | HF_TS_MASK)) {
7178             gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
7179         } else {
7180             gen_helper_fwait(cpu_env);
7181         }
7182         break;
7183     case 0xcc: /* int3 */
7184         gen_interrupt(s, EXCP03_INT3, pc_start - s->cs_base, s->pc - s->cs_base);
7185         break;
7186     case 0xcd: /* int N */
7187         val = x86_ldub_code(env, s);
7188         if (check_vm86_iopl(s)) {
7189             gen_interrupt(s, val, pc_start - s->cs_base, s->pc - s->cs_base);
7190         }
7191         break;
7192     case 0xce: /* into */
7193         if (CODE64(s))
7194             goto illegal_op;
7195         gen_update_cc_op(s);
7196         gen_jmp_im(s, pc_start - s->cs_base);
7197         gen_helper_into(cpu_env, tcg_const_i32(s->pc - pc_start));
7198         break;
7199 #ifdef WANT_ICEBP
7200     case 0xf1: /* icebp (undocumented, exits to external debugger) */
7201         gen_svm_check_intercept(s, SVM_EXIT_ICEBP);
7202         gen_debug(s);
7203         break;
7204 #endif
7205     case 0xfa: /* cli */
7206         if (check_iopl(s)) {
7207             gen_helper_cli(cpu_env);
7208         }
7209         break;
7210     case 0xfb: /* sti */
7211         if (check_iopl(s)) {
7212             gen_helper_sti(cpu_env);
7213             /* interruptions are enabled only the first insn after sti */
7214             gen_jmp_im(s, s->pc - s->cs_base);
7215             gen_eob_inhibit_irq(s, true);
7216         }
7217         break;
7218     case 0x62: /* bound */
7219         if (CODE64(s))
7220             goto illegal_op;
7221         ot = dflag;
7222         modrm = x86_ldub_code(env, s);
7223         reg = (modrm >> 3) & 7;
7224         mod = (modrm >> 6) & 3;
7225         if (mod == 3)
7226             goto illegal_op;
7227         gen_op_mov_v_reg(s, ot, s->T0, reg);
7228         gen_lea_modrm(env, s, modrm);
7229         tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
7230         if (ot == MO_16) {
7231             gen_helper_boundw(cpu_env, s->A0, s->tmp2_i32);
7232         } else {
7233             gen_helper_boundl(cpu_env, s->A0, s->tmp2_i32);
7234         }
7235         break;
7236     case 0x1c8 ... 0x1cf: /* bswap reg */
7237         reg = (b & 7) | REX_B(s);
7238 #ifdef TARGET_X86_64
7239         if (dflag == MO_64) {
7240             tcg_gen_bswap64_i64(cpu_regs[reg], cpu_regs[reg]);
7241             break;
7242         }
7243 #endif
7244         tcg_gen_bswap32_tl(cpu_regs[reg], cpu_regs[reg], TCG_BSWAP_OZ);
7245         break;
7246     case 0xd6: /* salc */
7247         if (CODE64(s))
7248             goto illegal_op;
7249         gen_compute_eflags_c(s, s->T0);
7250         tcg_gen_neg_tl(s->T0, s->T0);
7251         gen_op_mov_reg_v(s, MO_8, R_EAX, s->T0);
7252         break;
7253     case 0xe0: /* loopnz */
7254     case 0xe1: /* loopz */
7255     case 0xe2: /* loop */
7256     case 0xe3: /* jecxz */
7257         {
7258             TCGLabel *l1, *l2, *l3;
7259 
7260             tval = (int8_t)insn_get(env, s, MO_8);
7261             next_eip = s->pc - s->cs_base;
7262             tval += next_eip;
7263             if (dflag == MO_16) {
7264                 tval &= 0xffff;
7265             }
7266 
7267             l1 = gen_new_label();
7268             l2 = gen_new_label();
7269             l3 = gen_new_label();
7270             gen_update_cc_op(s);
7271             b &= 3;
7272             switch(b) {
7273             case 0: /* loopnz */
7274             case 1: /* loopz */
7275                 gen_op_add_reg_im(s, s->aflag, R_ECX, -1);
7276                 gen_op_jz_ecx(s, s->aflag, l3);
7277                 gen_jcc1(s, (JCC_Z << 1) | (b ^ 1), l1);
7278                 break;
7279             case 2: /* loop */
7280                 gen_op_add_reg_im(s, s->aflag, R_ECX, -1);
7281                 gen_op_jnz_ecx(s, s->aflag, l1);
7282                 break;
7283             default:
7284             case 3: /* jcxz */
7285                 gen_op_jz_ecx(s, s->aflag, l1);
7286                 break;
7287             }
7288 
7289             gen_set_label(l3);
7290             gen_jmp_im(s, next_eip);
7291             tcg_gen_br(l2);
7292 
7293             gen_set_label(l1);
7294             gen_jmp_im(s, tval);
7295             gen_set_label(l2);
7296             gen_eob(s);
7297         }
7298         break;
7299     case 0x130: /* wrmsr */
7300     case 0x132: /* rdmsr */
7301         if (check_cpl0(s)) {
7302             gen_update_cc_op(s);
7303             gen_jmp_im(s, pc_start - s->cs_base);
7304             if (b & 2) {
7305                 gen_helper_rdmsr(cpu_env);
7306             } else {
7307                 gen_helper_wrmsr(cpu_env);
7308                 gen_jmp_im(s, s->pc - s->cs_base);
7309                 gen_eob(s);
7310             }
7311         }
7312         break;
7313     case 0x131: /* rdtsc */
7314         gen_update_cc_op(s);
7315         gen_jmp_im(s, pc_start - s->cs_base);
7316         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7317             gen_io_start();
7318         }
7319         gen_helper_rdtsc(cpu_env);
7320         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7321             gen_jmp(s, s->pc - s->cs_base);
7322         }
7323         break;
7324     case 0x133: /* rdpmc */
7325         gen_update_cc_op(s);
7326         gen_jmp_im(s, pc_start - s->cs_base);
7327         gen_helper_rdpmc(cpu_env);
7328         s->base.is_jmp = DISAS_NORETURN;
7329         break;
7330     case 0x134: /* sysenter */
7331         /* For Intel SYSENTER is valid on 64-bit */
7332         if (CODE64(s) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1)
7333             goto illegal_op;
7334         if (!PE(s)) {
7335             gen_exception_gpf(s);
7336         } else {
7337             gen_helper_sysenter(cpu_env);
7338             gen_eob(s);
7339         }
7340         break;
7341     case 0x135: /* sysexit */
7342         /* For Intel SYSEXIT is valid on 64-bit */
7343         if (CODE64(s) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1)
7344             goto illegal_op;
7345         if (!PE(s)) {
7346             gen_exception_gpf(s);
7347         } else {
7348             gen_helper_sysexit(cpu_env, tcg_const_i32(dflag - 1));
7349             gen_eob(s);
7350         }
7351         break;
7352 #ifdef TARGET_X86_64
7353     case 0x105: /* syscall */
7354         /* XXX: is it usable in real mode ? */
7355         gen_update_cc_op(s);
7356         gen_jmp_im(s, pc_start - s->cs_base);
7357         gen_helper_syscall(cpu_env, tcg_const_i32(s->pc - pc_start));
7358         /* TF handling for the syscall insn is different. The TF bit is  checked
7359            after the syscall insn completes. This allows #DB to not be
7360            generated after one has entered CPL0 if TF is set in FMASK.  */
7361         gen_eob_worker(s, false, true);
7362         break;
7363     case 0x107: /* sysret */
7364         if (!PE(s)) {
7365             gen_exception_gpf(s);
7366         } else {
7367             gen_helper_sysret(cpu_env, tcg_const_i32(dflag - 1));
7368             /* condition codes are modified only in long mode */
7369             if (LMA(s)) {
7370                 set_cc_op(s, CC_OP_EFLAGS);
7371             }
7372             /* TF handling for the sysret insn is different. The TF bit is
7373                checked after the sysret insn completes. This allows #DB to be
7374                generated "as if" the syscall insn in userspace has just
7375                completed.  */
7376             gen_eob_worker(s, false, true);
7377         }
7378         break;
7379 #endif
7380     case 0x1a2: /* cpuid */
7381         gen_update_cc_op(s);
7382         gen_jmp_im(s, pc_start - s->cs_base);
7383         gen_helper_cpuid(cpu_env);
7384         break;
7385     case 0xf4: /* hlt */
7386         if (check_cpl0(s)) {
7387             gen_update_cc_op(s);
7388             gen_jmp_im(s, pc_start - s->cs_base);
7389             gen_helper_hlt(cpu_env, tcg_const_i32(s->pc - pc_start));
7390             s->base.is_jmp = DISAS_NORETURN;
7391         }
7392         break;
7393     case 0x100:
7394         modrm = x86_ldub_code(env, s);
7395         mod = (modrm >> 6) & 3;
7396         op = (modrm >> 3) & 7;
7397         switch(op) {
7398         case 0: /* sldt */
7399             if (!PE(s) || VM86(s))
7400                 goto illegal_op;
7401             gen_svm_check_intercept(s, SVM_EXIT_LDTR_READ);
7402             tcg_gen_ld32u_tl(s->T0, cpu_env,
7403                              offsetof(CPUX86State, ldt.selector));
7404             ot = mod == 3 ? dflag : MO_16;
7405             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7406             break;
7407         case 2: /* lldt */
7408             if (!PE(s) || VM86(s))
7409                 goto illegal_op;
7410             if (check_cpl0(s)) {
7411                 gen_svm_check_intercept(s, SVM_EXIT_LDTR_WRITE);
7412                 gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7413                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
7414                 gen_helper_lldt(cpu_env, s->tmp2_i32);
7415             }
7416             break;
7417         case 1: /* str */
7418             if (!PE(s) || VM86(s))
7419                 goto illegal_op;
7420             gen_svm_check_intercept(s, SVM_EXIT_TR_READ);
7421             tcg_gen_ld32u_tl(s->T0, cpu_env,
7422                              offsetof(CPUX86State, tr.selector));
7423             ot = mod == 3 ? dflag : MO_16;
7424             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7425             break;
7426         case 3: /* ltr */
7427             if (!PE(s) || VM86(s))
7428                 goto illegal_op;
7429             if (check_cpl0(s)) {
7430                 gen_svm_check_intercept(s, SVM_EXIT_TR_WRITE);
7431                 gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7432                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
7433                 gen_helper_ltr(cpu_env, s->tmp2_i32);
7434             }
7435             break;
7436         case 4: /* verr */
7437         case 5: /* verw */
7438             if (!PE(s) || VM86(s))
7439                 goto illegal_op;
7440             gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7441             gen_update_cc_op(s);
7442             if (op == 4) {
7443                 gen_helper_verr(cpu_env, s->T0);
7444             } else {
7445                 gen_helper_verw(cpu_env, s->T0);
7446             }
7447             set_cc_op(s, CC_OP_EFLAGS);
7448             break;
7449         default:
7450             goto unknown_op;
7451         }
7452         break;
7453 
7454     case 0x101:
7455         modrm = x86_ldub_code(env, s);
7456         switch (modrm) {
7457         CASE_MODRM_MEM_OP(0): /* sgdt */
7458             gen_svm_check_intercept(s, SVM_EXIT_GDTR_READ);
7459             gen_lea_modrm(env, s, modrm);
7460             tcg_gen_ld32u_tl(s->T0,
7461                              cpu_env, offsetof(CPUX86State, gdt.limit));
7462             gen_op_st_v(s, MO_16, s->T0, s->A0);
7463             gen_add_A0_im(s, 2);
7464             tcg_gen_ld_tl(s->T0, cpu_env, offsetof(CPUX86State, gdt.base));
7465             if (dflag == MO_16) {
7466                 tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
7467             }
7468             gen_op_st_v(s, CODE64(s) + MO_32, s->T0, s->A0);
7469             break;
7470 
7471         case 0xc8: /* monitor */
7472             if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) || CPL(s) != 0) {
7473                 goto illegal_op;
7474             }
7475             gen_update_cc_op(s);
7476             gen_jmp_im(s, pc_start - s->cs_base);
7477             tcg_gen_mov_tl(s->A0, cpu_regs[R_EAX]);
7478             gen_extu(s->aflag, s->A0);
7479             gen_add_A0_ds_seg(s);
7480             gen_helper_monitor(cpu_env, s->A0);
7481             break;
7482 
7483         case 0xc9: /* mwait */
7484             if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) || CPL(s) != 0) {
7485                 goto illegal_op;
7486             }
7487             gen_update_cc_op(s);
7488             gen_jmp_im(s, pc_start - s->cs_base);
7489             gen_helper_mwait(cpu_env, tcg_const_i32(s->pc - pc_start));
7490             s->base.is_jmp = DISAS_NORETURN;
7491             break;
7492 
7493         case 0xca: /* clac */
7494             if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SMAP)
7495                 || CPL(s) != 0) {
7496                 goto illegal_op;
7497             }
7498             gen_helper_clac(cpu_env);
7499             gen_jmp_im(s, s->pc - s->cs_base);
7500             gen_eob(s);
7501             break;
7502 
7503         case 0xcb: /* stac */
7504             if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SMAP)
7505                 || CPL(s) != 0) {
7506                 goto illegal_op;
7507             }
7508             gen_helper_stac(cpu_env);
7509             gen_jmp_im(s, s->pc - s->cs_base);
7510             gen_eob(s);
7511             break;
7512 
7513         CASE_MODRM_MEM_OP(1): /* sidt */
7514             gen_svm_check_intercept(s, SVM_EXIT_IDTR_READ);
7515             gen_lea_modrm(env, s, modrm);
7516             tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.limit));
7517             gen_op_st_v(s, MO_16, s->T0, s->A0);
7518             gen_add_A0_im(s, 2);
7519             tcg_gen_ld_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.base));
7520             if (dflag == MO_16) {
7521                 tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
7522             }
7523             gen_op_st_v(s, CODE64(s) + MO_32, s->T0, s->A0);
7524             break;
7525 
7526         case 0xd0: /* xgetbv */
7527             if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
7528                 || (s->prefix & (PREFIX_LOCK | PREFIX_DATA
7529                                  | PREFIX_REPZ | PREFIX_REPNZ))) {
7530                 goto illegal_op;
7531             }
7532             tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
7533             gen_helper_xgetbv(s->tmp1_i64, cpu_env, s->tmp2_i32);
7534             tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], s->tmp1_i64);
7535             break;
7536 
7537         case 0xd1: /* xsetbv */
7538             if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
7539                 || (s->prefix & (PREFIX_LOCK | PREFIX_DATA
7540                                  | PREFIX_REPZ | PREFIX_REPNZ))) {
7541                 goto illegal_op;
7542             }
7543             if (!check_cpl0(s)) {
7544                 break;
7545             }
7546             tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
7547                                   cpu_regs[R_EDX]);
7548             tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
7549             gen_helper_xsetbv(cpu_env, s->tmp2_i32, s->tmp1_i64);
7550             /* End TB because translation flags may change.  */
7551             gen_jmp_im(s, s->pc - s->cs_base);
7552             gen_eob(s);
7553             break;
7554 
7555         case 0xd8: /* VMRUN */
7556             if (!SVME(s) || !PE(s)) {
7557                 goto illegal_op;
7558             }
7559             if (!check_cpl0(s)) {
7560                 break;
7561             }
7562             gen_update_cc_op(s);
7563             gen_jmp_im(s, pc_start - s->cs_base);
7564             gen_helper_vmrun(cpu_env, tcg_const_i32(s->aflag - 1),
7565                              tcg_const_i32(s->pc - pc_start));
7566             tcg_gen_exit_tb(NULL, 0);
7567             s->base.is_jmp = DISAS_NORETURN;
7568             break;
7569 
7570         case 0xd9: /* VMMCALL */
7571             if (!SVME(s)) {
7572                 goto illegal_op;
7573             }
7574             gen_update_cc_op(s);
7575             gen_jmp_im(s, pc_start - s->cs_base);
7576             gen_helper_vmmcall(cpu_env);
7577             break;
7578 
7579         case 0xda: /* VMLOAD */
7580             if (!SVME(s) || !PE(s)) {
7581                 goto illegal_op;
7582             }
7583             if (!check_cpl0(s)) {
7584                 break;
7585             }
7586             gen_update_cc_op(s);
7587             gen_jmp_im(s, pc_start - s->cs_base);
7588             gen_helper_vmload(cpu_env, tcg_const_i32(s->aflag - 1));
7589             break;
7590 
7591         case 0xdb: /* VMSAVE */
7592             if (!SVME(s) || !PE(s)) {
7593                 goto illegal_op;
7594             }
7595             if (!check_cpl0(s)) {
7596                 break;
7597             }
7598             gen_update_cc_op(s);
7599             gen_jmp_im(s, pc_start - s->cs_base);
7600             gen_helper_vmsave(cpu_env, tcg_const_i32(s->aflag - 1));
7601             break;
7602 
7603         case 0xdc: /* STGI */
7604             if ((!SVME(s) && !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT))
7605                 || !PE(s)) {
7606                 goto illegal_op;
7607             }
7608             if (!check_cpl0(s)) {
7609                 break;
7610             }
7611             gen_update_cc_op(s);
7612             gen_helper_stgi(cpu_env);
7613             gen_jmp_im(s, s->pc - s->cs_base);
7614             gen_eob(s);
7615             break;
7616 
7617         case 0xdd: /* CLGI */
7618             if (!SVME(s) || !PE(s)) {
7619                 goto illegal_op;
7620             }
7621             if (!check_cpl0(s)) {
7622                 break;
7623             }
7624             gen_update_cc_op(s);
7625             gen_jmp_im(s, pc_start - s->cs_base);
7626             gen_helper_clgi(cpu_env);
7627             break;
7628 
7629         case 0xde: /* SKINIT */
7630             if ((!SVME(s) && !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT))
7631                 || !PE(s)) {
7632                 goto illegal_op;
7633             }
7634             gen_svm_check_intercept(s, SVM_EXIT_SKINIT);
7635             /* If not intercepted, not implemented -- raise #UD. */
7636             goto illegal_op;
7637 
7638         case 0xdf: /* INVLPGA */
7639             if (!SVME(s) || !PE(s)) {
7640                 goto illegal_op;
7641             }
7642             if (!check_cpl0(s)) {
7643                 break;
7644             }
7645             gen_svm_check_intercept(s, SVM_EXIT_INVLPGA);
7646             if (s->aflag == MO_64) {
7647                 tcg_gen_mov_tl(s->A0, cpu_regs[R_EAX]);
7648             } else {
7649                 tcg_gen_ext32u_tl(s->A0, cpu_regs[R_EAX]);
7650             }
7651             gen_helper_flush_page(cpu_env, s->A0);
7652             gen_jmp_im(s, s->pc - s->cs_base);
7653             gen_eob(s);
7654             break;
7655 
7656         CASE_MODRM_MEM_OP(2): /* lgdt */
7657             if (!check_cpl0(s)) {
7658                 break;
7659             }
7660             gen_svm_check_intercept(s, SVM_EXIT_GDTR_WRITE);
7661             gen_lea_modrm(env, s, modrm);
7662             gen_op_ld_v(s, MO_16, s->T1, s->A0);
7663             gen_add_A0_im(s, 2);
7664             gen_op_ld_v(s, CODE64(s) + MO_32, s->T0, s->A0);
7665             if (dflag == MO_16) {
7666                 tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
7667             }
7668             tcg_gen_st_tl(s->T0, cpu_env, offsetof(CPUX86State, gdt.base));
7669             tcg_gen_st32_tl(s->T1, cpu_env, offsetof(CPUX86State, gdt.limit));
7670             break;
7671 
7672         CASE_MODRM_MEM_OP(3): /* lidt */
7673             if (!check_cpl0(s)) {
7674                 break;
7675             }
7676             gen_svm_check_intercept(s, SVM_EXIT_IDTR_WRITE);
7677             gen_lea_modrm(env, s, modrm);
7678             gen_op_ld_v(s, MO_16, s->T1, s->A0);
7679             gen_add_A0_im(s, 2);
7680             gen_op_ld_v(s, CODE64(s) + MO_32, s->T0, s->A0);
7681             if (dflag == MO_16) {
7682                 tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
7683             }
7684             tcg_gen_st_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.base));
7685             tcg_gen_st32_tl(s->T1, cpu_env, offsetof(CPUX86State, idt.limit));
7686             break;
7687 
7688         CASE_MODRM_OP(4): /* smsw */
7689             gen_svm_check_intercept(s, SVM_EXIT_READ_CR0);
7690             tcg_gen_ld_tl(s->T0, cpu_env, offsetof(CPUX86State, cr[0]));
7691             /*
7692              * In 32-bit mode, the higher 16 bits of the destination
7693              * register are undefined.  In practice CR0[31:0] is stored
7694              * just like in 64-bit mode.
7695              */
7696             mod = (modrm >> 6) & 3;
7697             ot = (mod != 3 ? MO_16 : s->dflag);
7698             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7699             break;
7700         case 0xee: /* rdpkru */
7701             if (prefixes & PREFIX_LOCK) {
7702                 goto illegal_op;
7703             }
7704             tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
7705             gen_helper_rdpkru(s->tmp1_i64, cpu_env, s->tmp2_i32);
7706             tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], s->tmp1_i64);
7707             break;
7708         case 0xef: /* wrpkru */
7709             if (prefixes & PREFIX_LOCK) {
7710                 goto illegal_op;
7711             }
7712             tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
7713                                   cpu_regs[R_EDX]);
7714             tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
7715             gen_helper_wrpkru(cpu_env, s->tmp2_i32, s->tmp1_i64);
7716             break;
7717 
7718         CASE_MODRM_OP(6): /* lmsw */
7719             if (!check_cpl0(s)) {
7720                 break;
7721             }
7722             gen_svm_check_intercept(s, SVM_EXIT_WRITE_CR0);
7723             gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7724             /*
7725              * Only the 4 lower bits of CR0 are modified.
7726              * PE cannot be set to zero if already set to one.
7727              */
7728             tcg_gen_ld_tl(s->T1, cpu_env, offsetof(CPUX86State, cr[0]));
7729             tcg_gen_andi_tl(s->T0, s->T0, 0xf);
7730             tcg_gen_andi_tl(s->T1, s->T1, ~0xe);
7731             tcg_gen_or_tl(s->T0, s->T0, s->T1);
7732             gen_helper_write_crN(cpu_env, tcg_constant_i32(0), s->T0);
7733             gen_jmp_im(s, s->pc - s->cs_base);
7734             gen_eob(s);
7735             break;
7736 
7737         CASE_MODRM_MEM_OP(7): /* invlpg */
7738             if (!check_cpl0(s)) {
7739                 break;
7740             }
7741             gen_svm_check_intercept(s, SVM_EXIT_INVLPG);
7742             gen_lea_modrm(env, s, modrm);
7743             gen_helper_flush_page(cpu_env, s->A0);
7744             gen_jmp_im(s, s->pc - s->cs_base);
7745             gen_eob(s);
7746             break;
7747 
7748         case 0xf8: /* swapgs */
7749 #ifdef TARGET_X86_64
7750             if (CODE64(s)) {
7751                 if (check_cpl0(s)) {
7752                     tcg_gen_mov_tl(s->T0, cpu_seg_base[R_GS]);
7753                     tcg_gen_ld_tl(cpu_seg_base[R_GS], cpu_env,
7754                                   offsetof(CPUX86State, kernelgsbase));
7755                     tcg_gen_st_tl(s->T0, cpu_env,
7756                                   offsetof(CPUX86State, kernelgsbase));
7757                 }
7758                 break;
7759             }
7760 #endif
7761             goto illegal_op;
7762 
7763         case 0xf9: /* rdtscp */
7764             if (!(s->cpuid_ext2_features & CPUID_EXT2_RDTSCP)) {
7765                 goto illegal_op;
7766             }
7767             gen_update_cc_op(s);
7768             gen_jmp_im(s, pc_start - s->cs_base);
7769             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7770                 gen_io_start();
7771             }
7772             gen_helper_rdtscp(cpu_env);
7773             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7774                 gen_jmp(s, s->pc - s->cs_base);
7775             }
7776             break;
7777 
7778         default:
7779             goto unknown_op;
7780         }
7781         break;
7782 
7783     case 0x108: /* invd */
7784     case 0x109: /* wbinvd */
7785         if (check_cpl0(s)) {
7786             gen_svm_check_intercept(s, (b & 2) ? SVM_EXIT_INVD : SVM_EXIT_WBINVD);
7787             /* nothing to do */
7788         }
7789         break;
7790     case 0x63: /* arpl or movslS (x86_64) */
7791 #ifdef TARGET_X86_64
7792         if (CODE64(s)) {
7793             int d_ot;
7794             /* d_ot is the size of destination */
7795             d_ot = dflag;
7796 
7797             modrm = x86_ldub_code(env, s);
7798             reg = ((modrm >> 3) & 7) | REX_R(s);
7799             mod = (modrm >> 6) & 3;
7800             rm = (modrm & 7) | REX_B(s);
7801 
7802             if (mod == 3) {
7803                 gen_op_mov_v_reg(s, MO_32, s->T0, rm);
7804                 /* sign extend */
7805                 if (d_ot == MO_64) {
7806                     tcg_gen_ext32s_tl(s->T0, s->T0);
7807                 }
7808                 gen_op_mov_reg_v(s, d_ot, reg, s->T0);
7809             } else {
7810                 gen_lea_modrm(env, s, modrm);
7811                 gen_op_ld_v(s, MO_32 | MO_SIGN, s->T0, s->A0);
7812                 gen_op_mov_reg_v(s, d_ot, reg, s->T0);
7813             }
7814         } else
7815 #endif
7816         {
7817             TCGLabel *label1;
7818             TCGv t0, t1, t2, a0;
7819 
7820             if (!PE(s) || VM86(s))
7821                 goto illegal_op;
7822             t0 = tcg_temp_local_new();
7823             t1 = tcg_temp_local_new();
7824             t2 = tcg_temp_local_new();
7825             ot = MO_16;
7826             modrm = x86_ldub_code(env, s);
7827             reg = (modrm >> 3) & 7;
7828             mod = (modrm >> 6) & 3;
7829             rm = modrm & 7;
7830             if (mod != 3) {
7831                 gen_lea_modrm(env, s, modrm);
7832                 gen_op_ld_v(s, ot, t0, s->A0);
7833                 a0 = tcg_temp_local_new();
7834                 tcg_gen_mov_tl(a0, s->A0);
7835             } else {
7836                 gen_op_mov_v_reg(s, ot, t0, rm);
7837                 a0 = NULL;
7838             }
7839             gen_op_mov_v_reg(s, ot, t1, reg);
7840             tcg_gen_andi_tl(s->tmp0, t0, 3);
7841             tcg_gen_andi_tl(t1, t1, 3);
7842             tcg_gen_movi_tl(t2, 0);
7843             label1 = gen_new_label();
7844             tcg_gen_brcond_tl(TCG_COND_GE, s->tmp0, t1, label1);
7845             tcg_gen_andi_tl(t0, t0, ~3);
7846             tcg_gen_or_tl(t0, t0, t1);
7847             tcg_gen_movi_tl(t2, CC_Z);
7848             gen_set_label(label1);
7849             if (mod != 3) {
7850                 gen_op_st_v(s, ot, t0, a0);
7851                 tcg_temp_free(a0);
7852            } else {
7853                 gen_op_mov_reg_v(s, ot, rm, t0);
7854             }
7855             gen_compute_eflags(s);
7856             tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_Z);
7857             tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t2);
7858             tcg_temp_free(t0);
7859             tcg_temp_free(t1);
7860             tcg_temp_free(t2);
7861         }
7862         break;
7863     case 0x102: /* lar */
7864     case 0x103: /* lsl */
7865         {
7866             TCGLabel *label1;
7867             TCGv t0;
7868             if (!PE(s) || VM86(s))
7869                 goto illegal_op;
7870             ot = dflag != MO_16 ? MO_32 : MO_16;
7871             modrm = x86_ldub_code(env, s);
7872             reg = ((modrm >> 3) & 7) | REX_R(s);
7873             gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7874             t0 = tcg_temp_local_new();
7875             gen_update_cc_op(s);
7876             if (b == 0x102) {
7877                 gen_helper_lar(t0, cpu_env, s->T0);
7878             } else {
7879                 gen_helper_lsl(t0, cpu_env, s->T0);
7880             }
7881             tcg_gen_andi_tl(s->tmp0, cpu_cc_src, CC_Z);
7882             label1 = gen_new_label();
7883             tcg_gen_brcondi_tl(TCG_COND_EQ, s->tmp0, 0, label1);
7884             gen_op_mov_reg_v(s, ot, reg, t0);
7885             gen_set_label(label1);
7886             set_cc_op(s, CC_OP_EFLAGS);
7887             tcg_temp_free(t0);
7888         }
7889         break;
7890     case 0x118:
7891         modrm = x86_ldub_code(env, s);
7892         mod = (modrm >> 6) & 3;
7893         op = (modrm >> 3) & 7;
7894         switch(op) {
7895         case 0: /* prefetchnta */
7896         case 1: /* prefetchnt0 */
7897         case 2: /* prefetchnt0 */
7898         case 3: /* prefetchnt0 */
7899             if (mod == 3)
7900                 goto illegal_op;
7901             gen_nop_modrm(env, s, modrm);
7902             /* nothing more to do */
7903             break;
7904         default: /* nop (multi byte) */
7905             gen_nop_modrm(env, s, modrm);
7906             break;
7907         }
7908         break;
7909     case 0x11a:
7910         modrm = x86_ldub_code(env, s);
7911         if (s->flags & HF_MPX_EN_MASK) {
7912             mod = (modrm >> 6) & 3;
7913             reg = ((modrm >> 3) & 7) | REX_R(s);
7914             if (prefixes & PREFIX_REPZ) {
7915                 /* bndcl */
7916                 if (reg >= 4
7917                     || (prefixes & PREFIX_LOCK)
7918                     || s->aflag == MO_16) {
7919                     goto illegal_op;
7920                 }
7921                 gen_bndck(env, s, modrm, TCG_COND_LTU, cpu_bndl[reg]);
7922             } else if (prefixes & PREFIX_REPNZ) {
7923                 /* bndcu */
7924                 if (reg >= 4
7925                     || (prefixes & PREFIX_LOCK)
7926                     || s->aflag == MO_16) {
7927                     goto illegal_op;
7928                 }
7929                 TCGv_i64 notu = tcg_temp_new_i64();
7930                 tcg_gen_not_i64(notu, cpu_bndu[reg]);
7931                 gen_bndck(env, s, modrm, TCG_COND_GTU, notu);
7932                 tcg_temp_free_i64(notu);
7933             } else if (prefixes & PREFIX_DATA) {
7934                 /* bndmov -- from reg/mem */
7935                 if (reg >= 4 || s->aflag == MO_16) {
7936                     goto illegal_op;
7937                 }
7938                 if (mod == 3) {
7939                     int reg2 = (modrm & 7) | REX_B(s);
7940                     if (reg2 >= 4 || (prefixes & PREFIX_LOCK)) {
7941                         goto illegal_op;
7942                     }
7943                     if (s->flags & HF_MPX_IU_MASK) {
7944                         tcg_gen_mov_i64(cpu_bndl[reg], cpu_bndl[reg2]);
7945                         tcg_gen_mov_i64(cpu_bndu[reg], cpu_bndu[reg2]);
7946                     }
7947                 } else {
7948                     gen_lea_modrm(env, s, modrm);
7949                     if (CODE64(s)) {
7950                         tcg_gen_qemu_ld_i64(cpu_bndl[reg], s->A0,
7951                                             s->mem_index, MO_LEQ);
7952                         tcg_gen_addi_tl(s->A0, s->A0, 8);
7953                         tcg_gen_qemu_ld_i64(cpu_bndu[reg], s->A0,
7954                                             s->mem_index, MO_LEQ);
7955                     } else {
7956                         tcg_gen_qemu_ld_i64(cpu_bndl[reg], s->A0,
7957                                             s->mem_index, MO_LEUL);
7958                         tcg_gen_addi_tl(s->A0, s->A0, 4);
7959                         tcg_gen_qemu_ld_i64(cpu_bndu[reg], s->A0,
7960                                             s->mem_index, MO_LEUL);
7961                     }
7962                     /* bnd registers are now in-use */
7963                     gen_set_hflag(s, HF_MPX_IU_MASK);
7964                 }
7965             } else if (mod != 3) {
7966                 /* bndldx */
7967                 AddressParts a = gen_lea_modrm_0(env, s, modrm);
7968                 if (reg >= 4
7969                     || (prefixes & PREFIX_LOCK)
7970                     || s->aflag == MO_16
7971                     || a.base < -1) {
7972                     goto illegal_op;
7973                 }
7974                 if (a.base >= 0) {
7975                     tcg_gen_addi_tl(s->A0, cpu_regs[a.base], a.disp);
7976                 } else {
7977                     tcg_gen_movi_tl(s->A0, 0);
7978                 }
7979                 gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
7980                 if (a.index >= 0) {
7981                     tcg_gen_mov_tl(s->T0, cpu_regs[a.index]);
7982                 } else {
7983                     tcg_gen_movi_tl(s->T0, 0);
7984                 }
7985                 if (CODE64(s)) {
7986                     gen_helper_bndldx64(cpu_bndl[reg], cpu_env, s->A0, s->T0);
7987                     tcg_gen_ld_i64(cpu_bndu[reg], cpu_env,
7988                                    offsetof(CPUX86State, mmx_t0.MMX_Q(0)));
7989                 } else {
7990                     gen_helper_bndldx32(cpu_bndu[reg], cpu_env, s->A0, s->T0);
7991                     tcg_gen_ext32u_i64(cpu_bndl[reg], cpu_bndu[reg]);
7992                     tcg_gen_shri_i64(cpu_bndu[reg], cpu_bndu[reg], 32);
7993                 }
7994                 gen_set_hflag(s, HF_MPX_IU_MASK);
7995             }
7996         }
7997         gen_nop_modrm(env, s, modrm);
7998         break;
7999     case 0x11b:
8000         modrm = x86_ldub_code(env, s);
8001         if (s->flags & HF_MPX_EN_MASK) {
8002             mod = (modrm >> 6) & 3;
8003             reg = ((modrm >> 3) & 7) | REX_R(s);
8004             if (mod != 3 && (prefixes & PREFIX_REPZ)) {
8005                 /* bndmk */
8006                 if (reg >= 4
8007                     || (prefixes & PREFIX_LOCK)
8008                     || s->aflag == MO_16) {
8009                     goto illegal_op;
8010                 }
8011                 AddressParts a = gen_lea_modrm_0(env, s, modrm);
8012                 if (a.base >= 0) {
8013                     tcg_gen_extu_tl_i64(cpu_bndl[reg], cpu_regs[a.base]);
8014                     if (!CODE64(s)) {
8015                         tcg_gen_ext32u_i64(cpu_bndl[reg], cpu_bndl[reg]);
8016                     }
8017                 } else if (a.base == -1) {
8018                     /* no base register has lower bound of 0 */
8019                     tcg_gen_movi_i64(cpu_bndl[reg], 0);
8020                 } else {
8021                     /* rip-relative generates #ud */
8022                     goto illegal_op;
8023                 }
8024                 tcg_gen_not_tl(s->A0, gen_lea_modrm_1(s, a));
8025                 if (!CODE64(s)) {
8026                     tcg_gen_ext32u_tl(s->A0, s->A0);
8027                 }
8028                 tcg_gen_extu_tl_i64(cpu_bndu[reg], s->A0);
8029                 /* bnd registers are now in-use */
8030                 gen_set_hflag(s, HF_MPX_IU_MASK);
8031                 break;
8032             } else if (prefixes & PREFIX_REPNZ) {
8033                 /* bndcn */
8034                 if (reg >= 4
8035                     || (prefixes & PREFIX_LOCK)
8036                     || s->aflag == MO_16) {
8037                     goto illegal_op;
8038                 }
8039                 gen_bndck(env, s, modrm, TCG_COND_GTU, cpu_bndu[reg]);
8040             } else if (prefixes & PREFIX_DATA) {
8041                 /* bndmov -- to reg/mem */
8042                 if (reg >= 4 || s->aflag == MO_16) {
8043                     goto illegal_op;
8044                 }
8045                 if (mod == 3) {
8046                     int reg2 = (modrm & 7) | REX_B(s);
8047                     if (reg2 >= 4 || (prefixes & PREFIX_LOCK)) {
8048                         goto illegal_op;
8049                     }
8050                     if (s->flags & HF_MPX_IU_MASK) {
8051                         tcg_gen_mov_i64(cpu_bndl[reg2], cpu_bndl[reg]);
8052                         tcg_gen_mov_i64(cpu_bndu[reg2], cpu_bndu[reg]);
8053                     }
8054                 } else {
8055                     gen_lea_modrm(env, s, modrm);
8056                     if (CODE64(s)) {
8057                         tcg_gen_qemu_st_i64(cpu_bndl[reg], s->A0,
8058                                             s->mem_index, MO_LEQ);
8059                         tcg_gen_addi_tl(s->A0, s->A0, 8);
8060                         tcg_gen_qemu_st_i64(cpu_bndu[reg], s->A0,
8061                                             s->mem_index, MO_LEQ);
8062                     } else {
8063                         tcg_gen_qemu_st_i64(cpu_bndl[reg], s->A0,
8064                                             s->mem_index, MO_LEUL);
8065                         tcg_gen_addi_tl(s->A0, s->A0, 4);
8066                         tcg_gen_qemu_st_i64(cpu_bndu[reg], s->A0,
8067                                             s->mem_index, MO_LEUL);
8068                     }
8069                 }
8070             } else if (mod != 3) {
8071                 /* bndstx */
8072                 AddressParts a = gen_lea_modrm_0(env, s, modrm);
8073                 if (reg >= 4
8074                     || (prefixes & PREFIX_LOCK)
8075                     || s->aflag == MO_16
8076                     || a.base < -1) {
8077                     goto illegal_op;
8078                 }
8079                 if (a.base >= 0) {
8080                     tcg_gen_addi_tl(s->A0, cpu_regs[a.base], a.disp);
8081                 } else {
8082                     tcg_gen_movi_tl(s->A0, 0);
8083                 }
8084                 gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
8085                 if (a.index >= 0) {
8086                     tcg_gen_mov_tl(s->T0, cpu_regs[a.index]);
8087                 } else {
8088                     tcg_gen_movi_tl(s->T0, 0);
8089                 }
8090                 if (CODE64(s)) {
8091                     gen_helper_bndstx64(cpu_env, s->A0, s->T0,
8092                                         cpu_bndl[reg], cpu_bndu[reg]);
8093                 } else {
8094                     gen_helper_bndstx32(cpu_env, s->A0, s->T0,
8095                                         cpu_bndl[reg], cpu_bndu[reg]);
8096                 }
8097             }
8098         }
8099         gen_nop_modrm(env, s, modrm);
8100         break;
8101     case 0x119: case 0x11c ... 0x11f: /* nop (multi byte) */
8102         modrm = x86_ldub_code(env, s);
8103         gen_nop_modrm(env, s, modrm);
8104         break;
8105 
8106     case 0x120: /* mov reg, crN */
8107     case 0x122: /* mov crN, reg */
8108         if (!check_cpl0(s)) {
8109             break;
8110         }
8111         modrm = x86_ldub_code(env, s);
8112         /*
8113          * Ignore the mod bits (assume (modrm&0xc0)==0xc0).
8114          * AMD documentation (24594.pdf) and testing of Intel 386 and 486
8115          * processors all show that the mod bits are assumed to be 1's,
8116          * regardless of actual values.
8117          */
8118         rm = (modrm & 7) | REX_B(s);
8119         reg = ((modrm >> 3) & 7) | REX_R(s);
8120         switch (reg) {
8121         case 0:
8122             if ((prefixes & PREFIX_LOCK) &&
8123                 (s->cpuid_ext3_features & CPUID_EXT3_CR8LEG)) {
8124                 reg = 8;
8125             }
8126             break;
8127         case 2:
8128         case 3:
8129         case 4:
8130         case 8:
8131             break;
8132         default:
8133             goto unknown_op;
8134         }
8135         ot  = (CODE64(s) ? MO_64 : MO_32);
8136 
8137         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
8138             gen_io_start();
8139         }
8140         if (b & 2) {
8141             gen_svm_check_intercept(s, SVM_EXIT_WRITE_CR0 + reg);
8142             gen_op_mov_v_reg(s, ot, s->T0, rm);
8143             gen_helper_write_crN(cpu_env, tcg_constant_i32(reg), s->T0);
8144             gen_jmp_im(s, s->pc - s->cs_base);
8145             gen_eob(s);
8146         } else {
8147             gen_svm_check_intercept(s, SVM_EXIT_READ_CR0 + reg);
8148             gen_helper_read_crN(s->T0, cpu_env, tcg_constant_i32(reg));
8149             gen_op_mov_reg_v(s, ot, rm, s->T0);
8150             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
8151                 gen_jmp(s, s->pc - s->cs_base);
8152             }
8153         }
8154         break;
8155 
8156     case 0x121: /* mov reg, drN */
8157     case 0x123: /* mov drN, reg */
8158         if (check_cpl0(s)) {
8159             modrm = x86_ldub_code(env, s);
8160             /* Ignore the mod bits (assume (modrm&0xc0)==0xc0).
8161              * AMD documentation (24594.pdf) and testing of
8162              * intel 386 and 486 processors all show that the mod bits
8163              * are assumed to be 1's, regardless of actual values.
8164              */
8165             rm = (modrm & 7) | REX_B(s);
8166             reg = ((modrm >> 3) & 7) | REX_R(s);
8167             if (CODE64(s))
8168                 ot = MO_64;
8169             else
8170                 ot = MO_32;
8171             if (reg >= 8) {
8172                 goto illegal_op;
8173             }
8174             if (b & 2) {
8175                 gen_svm_check_intercept(s, SVM_EXIT_WRITE_DR0 + reg);
8176                 gen_op_mov_v_reg(s, ot, s->T0, rm);
8177                 tcg_gen_movi_i32(s->tmp2_i32, reg);
8178                 gen_helper_set_dr(cpu_env, s->tmp2_i32, s->T0);
8179                 gen_jmp_im(s, s->pc - s->cs_base);
8180                 gen_eob(s);
8181             } else {
8182                 gen_svm_check_intercept(s, SVM_EXIT_READ_DR0 + reg);
8183                 tcg_gen_movi_i32(s->tmp2_i32, reg);
8184                 gen_helper_get_dr(s->T0, cpu_env, s->tmp2_i32);
8185                 gen_op_mov_reg_v(s, ot, rm, s->T0);
8186             }
8187         }
8188         break;
8189     case 0x106: /* clts */
8190         if (check_cpl0(s)) {
8191             gen_svm_check_intercept(s, SVM_EXIT_WRITE_CR0);
8192             gen_helper_clts(cpu_env);
8193             /* abort block because static cpu state changed */
8194             gen_jmp_im(s, s->pc - s->cs_base);
8195             gen_eob(s);
8196         }
8197         break;
8198     /* MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4 support */
8199     case 0x1c3: /* MOVNTI reg, mem */
8200         if (!(s->cpuid_features & CPUID_SSE2))
8201             goto illegal_op;
8202         ot = mo_64_32(dflag);
8203         modrm = x86_ldub_code(env, s);
8204         mod = (modrm >> 6) & 3;
8205         if (mod == 3)
8206             goto illegal_op;
8207         reg = ((modrm >> 3) & 7) | REX_R(s);
8208         /* generate a generic store */
8209         gen_ldst_modrm(env, s, modrm, ot, reg, 1);
8210         break;
8211     case 0x1ae:
8212         modrm = x86_ldub_code(env, s);
8213         switch (modrm) {
8214         CASE_MODRM_MEM_OP(0): /* fxsave */
8215             if (!(s->cpuid_features & CPUID_FXSR)
8216                 || (prefixes & PREFIX_LOCK)) {
8217                 goto illegal_op;
8218             }
8219             if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) {
8220                 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8221                 break;
8222             }
8223             gen_lea_modrm(env, s, modrm);
8224             gen_helper_fxsave(cpu_env, s->A0);
8225             break;
8226 
8227         CASE_MODRM_MEM_OP(1): /* fxrstor */
8228             if (!(s->cpuid_features & CPUID_FXSR)
8229                 || (prefixes & PREFIX_LOCK)) {
8230                 goto illegal_op;
8231             }
8232             if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) {
8233                 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8234                 break;
8235             }
8236             gen_lea_modrm(env, s, modrm);
8237             gen_helper_fxrstor(cpu_env, s->A0);
8238             break;
8239 
8240         CASE_MODRM_MEM_OP(2): /* ldmxcsr */
8241             if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK)) {
8242                 goto illegal_op;
8243             }
8244             if (s->flags & HF_TS_MASK) {
8245                 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8246                 break;
8247             }
8248             gen_lea_modrm(env, s, modrm);
8249             tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUL);
8250             gen_helper_ldmxcsr(cpu_env, s->tmp2_i32);
8251             break;
8252 
8253         CASE_MODRM_MEM_OP(3): /* stmxcsr */
8254             if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK)) {
8255                 goto illegal_op;
8256             }
8257             if (s->flags & HF_TS_MASK) {
8258                 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8259                 break;
8260             }
8261             gen_helper_update_mxcsr(cpu_env);
8262             gen_lea_modrm(env, s, modrm);
8263             tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State, mxcsr));
8264             gen_op_st_v(s, MO_32, s->T0, s->A0);
8265             break;
8266 
8267         CASE_MODRM_MEM_OP(4): /* xsave */
8268             if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
8269                 || (prefixes & (PREFIX_LOCK | PREFIX_DATA
8270                                 | PREFIX_REPZ | PREFIX_REPNZ))) {
8271                 goto illegal_op;
8272             }
8273             gen_lea_modrm(env, s, modrm);
8274             tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
8275                                   cpu_regs[R_EDX]);
8276             gen_helper_xsave(cpu_env, s->A0, s->tmp1_i64);
8277             break;
8278 
8279         CASE_MODRM_MEM_OP(5): /* xrstor */
8280             if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
8281                 || (prefixes & (PREFIX_LOCK | PREFIX_DATA
8282                                 | PREFIX_REPZ | PREFIX_REPNZ))) {
8283                 goto illegal_op;
8284             }
8285             gen_lea_modrm(env, s, modrm);
8286             tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
8287                                   cpu_regs[R_EDX]);
8288             gen_helper_xrstor(cpu_env, s->A0, s->tmp1_i64);
8289             /* XRSTOR is how MPX is enabled, which changes how
8290                we translate.  Thus we need to end the TB.  */
8291             gen_update_cc_op(s);
8292             gen_jmp_im(s, s->pc - s->cs_base);
8293             gen_eob(s);
8294             break;
8295 
8296         CASE_MODRM_MEM_OP(6): /* xsaveopt / clwb */
8297             if (prefixes & PREFIX_LOCK) {
8298                 goto illegal_op;
8299             }
8300             if (prefixes & PREFIX_DATA) {
8301                 /* clwb */
8302                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_CLWB)) {
8303                     goto illegal_op;
8304                 }
8305                 gen_nop_modrm(env, s, modrm);
8306             } else {
8307                 /* xsaveopt */
8308                 if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
8309                     || (s->cpuid_xsave_features & CPUID_XSAVE_XSAVEOPT) == 0
8310                     || (prefixes & (PREFIX_REPZ | PREFIX_REPNZ))) {
8311                     goto illegal_op;
8312                 }
8313                 gen_lea_modrm(env, s, modrm);
8314                 tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
8315                                       cpu_regs[R_EDX]);
8316                 gen_helper_xsaveopt(cpu_env, s->A0, s->tmp1_i64);
8317             }
8318             break;
8319 
8320         CASE_MODRM_MEM_OP(7): /* clflush / clflushopt */
8321             if (prefixes & PREFIX_LOCK) {
8322                 goto illegal_op;
8323             }
8324             if (prefixes & PREFIX_DATA) {
8325                 /* clflushopt */
8326                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_CLFLUSHOPT)) {
8327                     goto illegal_op;
8328                 }
8329             } else {
8330                 /* clflush */
8331                 if ((s->prefix & (PREFIX_REPZ | PREFIX_REPNZ))
8332                     || !(s->cpuid_features & CPUID_CLFLUSH)) {
8333                     goto illegal_op;
8334                 }
8335             }
8336             gen_nop_modrm(env, s, modrm);
8337             break;
8338 
8339         case 0xc0 ... 0xc7: /* rdfsbase (f3 0f ae /0) */
8340         case 0xc8 ... 0xcf: /* rdgsbase (f3 0f ae /1) */
8341         case 0xd0 ... 0xd7: /* wrfsbase (f3 0f ae /2) */
8342         case 0xd8 ... 0xdf: /* wrgsbase (f3 0f ae /3) */
8343             if (CODE64(s)
8344                 && (prefixes & PREFIX_REPZ)
8345                 && !(prefixes & PREFIX_LOCK)
8346                 && (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_FSGSBASE)) {
8347                 TCGv base, treg, src, dst;
8348 
8349                 /* Preserve hflags bits by testing CR4 at runtime.  */
8350                 tcg_gen_movi_i32(s->tmp2_i32, CR4_FSGSBASE_MASK);
8351                 gen_helper_cr4_testbit(cpu_env, s->tmp2_i32);
8352 
8353                 base = cpu_seg_base[modrm & 8 ? R_GS : R_FS];
8354                 treg = cpu_regs[(modrm & 7) | REX_B(s)];
8355 
8356                 if (modrm & 0x10) {
8357                     /* wr*base */
8358                     dst = base, src = treg;
8359                 } else {
8360                     /* rd*base */
8361                     dst = treg, src = base;
8362                 }
8363 
8364                 if (s->dflag == MO_32) {
8365                     tcg_gen_ext32u_tl(dst, src);
8366                 } else {
8367                     tcg_gen_mov_tl(dst, src);
8368                 }
8369                 break;
8370             }
8371             goto unknown_op;
8372 
8373         case 0xf8: /* sfence / pcommit */
8374             if (prefixes & PREFIX_DATA) {
8375                 /* pcommit */
8376                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_PCOMMIT)
8377                     || (prefixes & PREFIX_LOCK)) {
8378                     goto illegal_op;
8379                 }
8380                 break;
8381             }
8382             /* fallthru */
8383         case 0xf9 ... 0xff: /* sfence */
8384             if (!(s->cpuid_features & CPUID_SSE)
8385                 || (prefixes & PREFIX_LOCK)) {
8386                 goto illegal_op;
8387             }
8388             tcg_gen_mb(TCG_MO_ST_ST | TCG_BAR_SC);
8389             break;
8390         case 0xe8 ... 0xef: /* lfence */
8391             if (!(s->cpuid_features & CPUID_SSE)
8392                 || (prefixes & PREFIX_LOCK)) {
8393                 goto illegal_op;
8394             }
8395             tcg_gen_mb(TCG_MO_LD_LD | TCG_BAR_SC);
8396             break;
8397         case 0xf0 ... 0xf7: /* mfence */
8398             if (!(s->cpuid_features & CPUID_SSE2)
8399                 || (prefixes & PREFIX_LOCK)) {
8400                 goto illegal_op;
8401             }
8402             tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
8403             break;
8404 
8405         default:
8406             goto unknown_op;
8407         }
8408         break;
8409 
8410     case 0x10d: /* 3DNow! prefetch(w) */
8411         modrm = x86_ldub_code(env, s);
8412         mod = (modrm >> 6) & 3;
8413         if (mod == 3)
8414             goto illegal_op;
8415         gen_nop_modrm(env, s, modrm);
8416         break;
8417     case 0x1aa: /* rsm */
8418         gen_svm_check_intercept(s, SVM_EXIT_RSM);
8419         if (!(s->flags & HF_SMM_MASK))
8420             goto illegal_op;
8421 #ifdef CONFIG_USER_ONLY
8422         /* we should not be in SMM mode */
8423         g_assert_not_reached();
8424 #else
8425         gen_update_cc_op(s);
8426         gen_jmp_im(s, s->pc - s->cs_base);
8427         gen_helper_rsm(cpu_env);
8428 #endif /* CONFIG_USER_ONLY */
8429         gen_eob(s);
8430         break;
8431     case 0x1b8: /* SSE4.2 popcnt */
8432         if ((prefixes & (PREFIX_REPZ | PREFIX_LOCK | PREFIX_REPNZ)) !=
8433              PREFIX_REPZ)
8434             goto illegal_op;
8435         if (!(s->cpuid_ext_features & CPUID_EXT_POPCNT))
8436             goto illegal_op;
8437 
8438         modrm = x86_ldub_code(env, s);
8439         reg = ((modrm >> 3) & 7) | REX_R(s);
8440 
8441         if (s->prefix & PREFIX_DATA) {
8442             ot = MO_16;
8443         } else {
8444             ot = mo_64_32(dflag);
8445         }
8446 
8447         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
8448         gen_extu(ot, s->T0);
8449         tcg_gen_mov_tl(cpu_cc_src, s->T0);
8450         tcg_gen_ctpop_tl(s->T0, s->T0);
8451         gen_op_mov_reg_v(s, ot, reg, s->T0);
8452 
8453         set_cc_op(s, CC_OP_POPCNT);
8454         break;
8455     case 0x10e ... 0x10f:
8456         /* 3DNow! instructions, ignore prefixes */
8457         s->prefix &= ~(PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA);
8458         /* fall through */
8459     case 0x110 ... 0x117:
8460     case 0x128 ... 0x12f:
8461     case 0x138 ... 0x13a:
8462     case 0x150 ... 0x179:
8463     case 0x17c ... 0x17f:
8464     case 0x1c2:
8465     case 0x1c4 ... 0x1c6:
8466     case 0x1d0 ... 0x1fe:
8467         gen_sse(env, s, b, pc_start);
8468         break;
8469     default:
8470         goto unknown_op;
8471     }
8472     return s->pc;
8473  illegal_op:
8474     gen_illegal_opcode(s);
8475     return s->pc;
8476  unknown_op:
8477     gen_unknown_opcode(env, s);
8478     return s->pc;
8479 }
8480 
8481 void tcg_x86_init(void)
8482 {
8483     static const char reg_names[CPU_NB_REGS][4] = {
8484 #ifdef TARGET_X86_64
8485         [R_EAX] = "rax",
8486         [R_EBX] = "rbx",
8487         [R_ECX] = "rcx",
8488         [R_EDX] = "rdx",
8489         [R_ESI] = "rsi",
8490         [R_EDI] = "rdi",
8491         [R_EBP] = "rbp",
8492         [R_ESP] = "rsp",
8493         [8]  = "r8",
8494         [9]  = "r9",
8495         [10] = "r10",
8496         [11] = "r11",
8497         [12] = "r12",
8498         [13] = "r13",
8499         [14] = "r14",
8500         [15] = "r15",
8501 #else
8502         [R_EAX] = "eax",
8503         [R_EBX] = "ebx",
8504         [R_ECX] = "ecx",
8505         [R_EDX] = "edx",
8506         [R_ESI] = "esi",
8507         [R_EDI] = "edi",
8508         [R_EBP] = "ebp",
8509         [R_ESP] = "esp",
8510 #endif
8511     };
8512     static const char seg_base_names[6][8] = {
8513         [R_CS] = "cs_base",
8514         [R_DS] = "ds_base",
8515         [R_ES] = "es_base",
8516         [R_FS] = "fs_base",
8517         [R_GS] = "gs_base",
8518         [R_SS] = "ss_base",
8519     };
8520     static const char bnd_regl_names[4][8] = {
8521         "bnd0_lb", "bnd1_lb", "bnd2_lb", "bnd3_lb"
8522     };
8523     static const char bnd_regu_names[4][8] = {
8524         "bnd0_ub", "bnd1_ub", "bnd2_ub", "bnd3_ub"
8525     };
8526     int i;
8527 
8528     cpu_cc_op = tcg_global_mem_new_i32(cpu_env,
8529                                        offsetof(CPUX86State, cc_op), "cc_op");
8530     cpu_cc_dst = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_dst),
8531                                     "cc_dst");
8532     cpu_cc_src = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_src),
8533                                     "cc_src");
8534     cpu_cc_src2 = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_src2),
8535                                      "cc_src2");
8536 
8537     for (i = 0; i < CPU_NB_REGS; ++i) {
8538         cpu_regs[i] = tcg_global_mem_new(cpu_env,
8539                                          offsetof(CPUX86State, regs[i]),
8540                                          reg_names[i]);
8541     }
8542 
8543     for (i = 0; i < 6; ++i) {
8544         cpu_seg_base[i]
8545             = tcg_global_mem_new(cpu_env,
8546                                  offsetof(CPUX86State, segs[i].base),
8547                                  seg_base_names[i]);
8548     }
8549 
8550     for (i = 0; i < 4; ++i) {
8551         cpu_bndl[i]
8552             = tcg_global_mem_new_i64(cpu_env,
8553                                      offsetof(CPUX86State, bnd_regs[i].lb),
8554                                      bnd_regl_names[i]);
8555         cpu_bndu[i]
8556             = tcg_global_mem_new_i64(cpu_env,
8557                                      offsetof(CPUX86State, bnd_regs[i].ub),
8558                                      bnd_regu_names[i]);
8559     }
8560 }
8561 
8562 static void i386_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu)
8563 {
8564     DisasContext *dc = container_of(dcbase, DisasContext, base);
8565     CPUX86State *env = cpu->env_ptr;
8566     uint32_t flags = dc->base.tb->flags;
8567     int cpl = (flags >> HF_CPL_SHIFT) & 3;
8568     int iopl = (flags >> IOPL_SHIFT) & 3;
8569 
8570     dc->cs_base = dc->base.tb->cs_base;
8571     dc->flags = flags;
8572 #ifndef CONFIG_USER_ONLY
8573     dc->cpl = cpl;
8574     dc->iopl = iopl;
8575 #endif
8576 
8577     /* We make some simplifying assumptions; validate they're correct. */
8578     g_assert(PE(dc) == ((flags & HF_PE_MASK) != 0));
8579     g_assert(CPL(dc) == cpl);
8580     g_assert(IOPL(dc) == iopl);
8581     g_assert(VM86(dc) == ((flags & HF_VM_MASK) != 0));
8582     g_assert(CODE32(dc) == ((flags & HF_CS32_MASK) != 0));
8583     g_assert(CODE64(dc) == ((flags & HF_CS64_MASK) != 0));
8584     g_assert(SS32(dc) == ((flags & HF_SS32_MASK) != 0));
8585     g_assert(LMA(dc) == ((flags & HF_LMA_MASK) != 0));
8586     g_assert(ADDSEG(dc) == ((flags & HF_ADDSEG_MASK) != 0));
8587     g_assert(SVME(dc) == ((flags & HF_SVME_MASK) != 0));
8588     g_assert(GUEST(dc) == ((flags & HF_GUEST_MASK) != 0));
8589 
8590     dc->cc_op = CC_OP_DYNAMIC;
8591     dc->cc_op_dirty = false;
8592     dc->popl_esp_hack = 0;
8593     /* select memory access functions */
8594     dc->mem_index = 0;
8595 #ifdef CONFIG_SOFTMMU
8596     dc->mem_index = cpu_mmu_index(env, false);
8597 #endif
8598     dc->cpuid_features = env->features[FEAT_1_EDX];
8599     dc->cpuid_ext_features = env->features[FEAT_1_ECX];
8600     dc->cpuid_ext2_features = env->features[FEAT_8000_0001_EDX];
8601     dc->cpuid_ext3_features = env->features[FEAT_8000_0001_ECX];
8602     dc->cpuid_7_0_ebx_features = env->features[FEAT_7_0_EBX];
8603     dc->cpuid_xsave_features = env->features[FEAT_XSAVE];
8604     dc->jmp_opt = !(dc->base.singlestep_enabled ||
8605                     (flags & (HF_TF_MASK | HF_INHIBIT_IRQ_MASK)));
8606     /*
8607      * If jmp_opt, we want to handle each string instruction individually.
8608      * For icount also disable repz optimization so that each iteration
8609      * is accounted separately.
8610      */
8611     dc->repz_opt = !dc->jmp_opt && !(tb_cflags(dc->base.tb) & CF_USE_ICOUNT);
8612 
8613     dc->T0 = tcg_temp_new();
8614     dc->T1 = tcg_temp_new();
8615     dc->A0 = tcg_temp_new();
8616 
8617     dc->tmp0 = tcg_temp_new();
8618     dc->tmp1_i64 = tcg_temp_new_i64();
8619     dc->tmp2_i32 = tcg_temp_new_i32();
8620     dc->tmp3_i32 = tcg_temp_new_i32();
8621     dc->tmp4 = tcg_temp_new();
8622     dc->ptr0 = tcg_temp_new_ptr();
8623     dc->ptr1 = tcg_temp_new_ptr();
8624     dc->cc_srcT = tcg_temp_local_new();
8625 }
8626 
8627 static void i386_tr_tb_start(DisasContextBase *db, CPUState *cpu)
8628 {
8629 }
8630 
8631 static void i386_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
8632 {
8633     DisasContext *dc = container_of(dcbase, DisasContext, base);
8634 
8635     tcg_gen_insn_start(dc->base.pc_next, dc->cc_op);
8636 }
8637 
8638 static bool i386_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cpu,
8639                                      const CPUBreakpoint *bp)
8640 {
8641     DisasContext *dc = container_of(dcbase, DisasContext, base);
8642     /* If RF is set, suppress an internally generated breakpoint.  */
8643     int flags = dc->base.tb->flags & HF_RF_MASK ? BP_GDB : BP_ANY;
8644     if (bp->flags & flags) {
8645         gen_debug(dc);
8646         /* The address covered by the breakpoint must be included in
8647            [tb->pc, tb->pc + tb->size) in order to for it to be
8648            properly cleared -- thus we increment the PC here so that
8649            the generic logic setting tb->size later does the right thing.  */
8650         dc->base.pc_next += 1;
8651         return true;
8652     } else {
8653         return false;
8654     }
8655 }
8656 
8657 static void i386_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
8658 {
8659     DisasContext *dc = container_of(dcbase, DisasContext, base);
8660     target_ulong pc_next;
8661 
8662 #ifdef TARGET_VSYSCALL_PAGE
8663     /*
8664      * Detect entry into the vsyscall page and invoke the syscall.
8665      */
8666     if ((dc->base.pc_next & TARGET_PAGE_MASK) == TARGET_VSYSCALL_PAGE) {
8667         gen_exception(dc, EXCP_VSYSCALL, dc->base.pc_next);
8668         dc->base.pc_next = dc->pc + 1;
8669         return;
8670     }
8671 #endif
8672 
8673     pc_next = disas_insn(dc, cpu);
8674 
8675     if (dc->flags & (HF_TF_MASK | HF_INHIBIT_IRQ_MASK)) {
8676         /* if single step mode, we generate only one instruction and
8677            generate an exception */
8678         /* if irq were inhibited with HF_INHIBIT_IRQ_MASK, we clear
8679            the flag and abort the translation to give the irqs a
8680            chance to happen */
8681         dc->base.is_jmp = DISAS_TOO_MANY;
8682     } else if ((tb_cflags(dc->base.tb) & CF_USE_ICOUNT)
8683                && ((pc_next & TARGET_PAGE_MASK)
8684                    != ((pc_next + TARGET_MAX_INSN_SIZE - 1)
8685                        & TARGET_PAGE_MASK)
8686                    || (pc_next & ~TARGET_PAGE_MASK) == 0)) {
8687         /* Do not cross the boundary of the pages in icount mode,
8688            it can cause an exception. Do it only when boundary is
8689            crossed by the first instruction in the block.
8690            If current instruction already crossed the bound - it's ok,
8691            because an exception hasn't stopped this code.
8692          */
8693         dc->base.is_jmp = DISAS_TOO_MANY;
8694     } else if ((pc_next - dc->base.pc_first) >= (TARGET_PAGE_SIZE - 32)) {
8695         dc->base.is_jmp = DISAS_TOO_MANY;
8696     }
8697 
8698     dc->base.pc_next = pc_next;
8699 }
8700 
8701 static void i386_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
8702 {
8703     DisasContext *dc = container_of(dcbase, DisasContext, base);
8704 
8705     if (dc->base.is_jmp == DISAS_TOO_MANY) {
8706         gen_jmp_im(dc, dc->base.pc_next - dc->cs_base);
8707         gen_eob(dc);
8708     }
8709 }
8710 
8711 static void i386_tr_disas_log(const DisasContextBase *dcbase,
8712                               CPUState *cpu)
8713 {
8714     DisasContext *dc = container_of(dcbase, DisasContext, base);
8715 
8716     qemu_log("IN: %s\n", lookup_symbol(dc->base.pc_first));
8717     log_target_disas(cpu, dc->base.pc_first, dc->base.tb->size);
8718 }
8719 
8720 static const TranslatorOps i386_tr_ops = {
8721     .init_disas_context = i386_tr_init_disas_context,
8722     .tb_start           = i386_tr_tb_start,
8723     .insn_start         = i386_tr_insn_start,
8724     .breakpoint_check   = i386_tr_breakpoint_check,
8725     .translate_insn     = i386_tr_translate_insn,
8726     .tb_stop            = i386_tr_tb_stop,
8727     .disas_log          = i386_tr_disas_log,
8728 };
8729 
8730 /* generate intermediate code for basic block 'tb'.  */
8731 void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns)
8732 {
8733     DisasContext dc;
8734 
8735     translator_loop(&i386_tr_ops, &dc.base, cpu, tb, max_insns);
8736 }
8737 
8738 void restore_state_to_opc(CPUX86State *env, TranslationBlock *tb,
8739                           target_ulong *data)
8740 {
8741     int cc_op = data[1];
8742     env->eip = data[0] - tb->cs_base;
8743     if (cc_op != CC_OP_DYNAMIC) {
8744         env->cc_op = cc_op;
8745     }
8746 }
8747