xref: /qemu/target/i386/tcg/translate.c (revision d7a84021)
1 /*
2  *  i386 translation
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 #include "qemu/osdep.h"
20 
21 #include "qemu/host-utils.h"
22 #include "cpu.h"
23 #include "disas/disas.h"
24 #include "exec/exec-all.h"
25 #include "tcg/tcg-op.h"
26 #include "exec/cpu_ldst.h"
27 #include "exec/translator.h"
28 
29 #include "exec/helper-proto.h"
30 #include "exec/helper-gen.h"
31 #include "helper-tcg.h"
32 
33 #include "trace-tcg.h"
34 #include "exec/log.h"
35 
36 #define PREFIX_REPZ   0x01
37 #define PREFIX_REPNZ  0x02
38 #define PREFIX_LOCK   0x04
39 #define PREFIX_DATA   0x08
40 #define PREFIX_ADR    0x10
41 #define PREFIX_VEX    0x20
42 
43 #ifdef TARGET_X86_64
44 #define CODE64(s) ((s)->code64)
45 #define REX_X(s) ((s)->rex_x)
46 #define REX_B(s) ((s)->rex_b)
47 #else
48 #define CODE64(s) 0
49 #define REX_X(s) 0
50 #define REX_B(s) 0
51 #endif
52 
53 #ifdef TARGET_X86_64
54 # define ctztl  ctz64
55 # define clztl  clz64
56 #else
57 # define ctztl  ctz32
58 # define clztl  clz32
59 #endif
60 
61 /* For a switch indexed by MODRM, match all memory operands for a given OP.  */
62 #define CASE_MODRM_MEM_OP(OP) \
63     case (0 << 6) | (OP << 3) | 0 ... (0 << 6) | (OP << 3) | 7: \
64     case (1 << 6) | (OP << 3) | 0 ... (1 << 6) | (OP << 3) | 7: \
65     case (2 << 6) | (OP << 3) | 0 ... (2 << 6) | (OP << 3) | 7
66 
67 #define CASE_MODRM_OP(OP) \
68     case (0 << 6) | (OP << 3) | 0 ... (0 << 6) | (OP << 3) | 7: \
69     case (1 << 6) | (OP << 3) | 0 ... (1 << 6) | (OP << 3) | 7: \
70     case (2 << 6) | (OP << 3) | 0 ... (2 << 6) | (OP << 3) | 7: \
71     case (3 << 6) | (OP << 3) | 0 ... (3 << 6) | (OP << 3) | 7
72 
73 //#define MACRO_TEST   1
74 
75 /* global register indexes */
76 static TCGv cpu_cc_dst, cpu_cc_src, cpu_cc_src2;
77 static TCGv_i32 cpu_cc_op;
78 static TCGv cpu_regs[CPU_NB_REGS];
79 static TCGv cpu_seg_base[6];
80 static TCGv_i64 cpu_bndl[4];
81 static TCGv_i64 cpu_bndu[4];
82 
83 #include "exec/gen-icount.h"
84 
85 typedef struct DisasContext {
86     DisasContextBase base;
87 
88     /* current insn context */
89     int override; /* -1 if no override */
90     int prefix;
91     MemOp aflag;
92     MemOp dflag;
93     target_ulong pc_start;
94     target_ulong pc; /* pc = eip + cs_base */
95     /* current block context */
96     target_ulong cs_base; /* base of CS segment */
97     int pe;     /* protected mode */
98     int code32; /* 32 bit code segment */
99 #ifdef TARGET_X86_64
100     int lma;    /* long mode active */
101     int code64; /* 64 bit code segment */
102     int rex_x, rex_b;
103 #endif
104     int vex_l;  /* vex vector length */
105     int vex_v;  /* vex vvvv register, without 1's complement.  */
106     int ss32;   /* 32 bit stack segment */
107     CCOp cc_op;  /* current CC operation */
108     bool cc_op_dirty;
109 #ifdef TARGET_X86_64
110     bool x86_64_hregs;
111 #endif
112     int addseg; /* non zero if either DS/ES/SS have a non zero base */
113     int f_st;   /* currently unused */
114     int vm86;   /* vm86 mode */
115     int cpl;
116     int iopl;
117     int tf;     /* TF cpu flag */
118     int jmp_opt; /* use direct block chaining for direct jumps */
119     int repz_opt; /* optimize jumps within repz instructions */
120     int mem_index; /* select memory access functions */
121     uint64_t flags; /* all execution flags */
122     int popl_esp_hack; /* for correct popl with esp base handling */
123     int rip_offset; /* only used in x86_64, but left for simplicity */
124     int cpuid_features;
125     int cpuid_ext_features;
126     int cpuid_ext2_features;
127     int cpuid_ext3_features;
128     int cpuid_7_0_ebx_features;
129     int cpuid_xsave_features;
130 
131     /* TCG local temps */
132     TCGv cc_srcT;
133     TCGv A0;
134     TCGv T0;
135     TCGv T1;
136 
137     /* TCG local register indexes (only used inside old micro ops) */
138     TCGv tmp0;
139     TCGv tmp4;
140     TCGv_ptr ptr0;
141     TCGv_ptr ptr1;
142     TCGv_i32 tmp2_i32;
143     TCGv_i32 tmp3_i32;
144     TCGv_i64 tmp1_i64;
145 
146     sigjmp_buf jmpbuf;
147 } DisasContext;
148 
149 static void gen_eob(DisasContext *s);
150 static void gen_jr(DisasContext *s, TCGv dest);
151 static void gen_jmp(DisasContext *s, target_ulong eip);
152 static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num);
153 static void gen_op(DisasContext *s1, int op, MemOp ot, int d);
154 
155 /* i386 arith/logic operations */
156 enum {
157     OP_ADDL,
158     OP_ORL,
159     OP_ADCL,
160     OP_SBBL,
161     OP_ANDL,
162     OP_SUBL,
163     OP_XORL,
164     OP_CMPL,
165 };
166 
167 /* i386 shift ops */
168 enum {
169     OP_ROL,
170     OP_ROR,
171     OP_RCL,
172     OP_RCR,
173     OP_SHL,
174     OP_SHR,
175     OP_SHL1, /* undocumented */
176     OP_SAR = 7,
177 };
178 
179 enum {
180     JCC_O,
181     JCC_B,
182     JCC_Z,
183     JCC_BE,
184     JCC_S,
185     JCC_P,
186     JCC_L,
187     JCC_LE,
188 };
189 
190 enum {
191     /* I386 int registers */
192     OR_EAX,   /* MUST be even numbered */
193     OR_ECX,
194     OR_EDX,
195     OR_EBX,
196     OR_ESP,
197     OR_EBP,
198     OR_ESI,
199     OR_EDI,
200 
201     OR_TMP0 = 16,    /* temporary operand register */
202     OR_TMP1,
203     OR_A0, /* temporary register used when doing address evaluation */
204 };
205 
206 enum {
207     USES_CC_DST  = 1,
208     USES_CC_SRC  = 2,
209     USES_CC_SRC2 = 4,
210     USES_CC_SRCT = 8,
211 };
212 
213 /* Bit set if the global variable is live after setting CC_OP to X.  */
214 static const uint8_t cc_op_live[CC_OP_NB] = {
215     [CC_OP_DYNAMIC] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
216     [CC_OP_EFLAGS] = USES_CC_SRC,
217     [CC_OP_MULB ... CC_OP_MULQ] = USES_CC_DST | USES_CC_SRC,
218     [CC_OP_ADDB ... CC_OP_ADDQ] = USES_CC_DST | USES_CC_SRC,
219     [CC_OP_ADCB ... CC_OP_ADCQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
220     [CC_OP_SUBB ... CC_OP_SUBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRCT,
221     [CC_OP_SBBB ... CC_OP_SBBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
222     [CC_OP_LOGICB ... CC_OP_LOGICQ] = USES_CC_DST,
223     [CC_OP_INCB ... CC_OP_INCQ] = USES_CC_DST | USES_CC_SRC,
224     [CC_OP_DECB ... CC_OP_DECQ] = USES_CC_DST | USES_CC_SRC,
225     [CC_OP_SHLB ... CC_OP_SHLQ] = USES_CC_DST | USES_CC_SRC,
226     [CC_OP_SARB ... CC_OP_SARQ] = USES_CC_DST | USES_CC_SRC,
227     [CC_OP_BMILGB ... CC_OP_BMILGQ] = USES_CC_DST | USES_CC_SRC,
228     [CC_OP_ADCX] = USES_CC_DST | USES_CC_SRC,
229     [CC_OP_ADOX] = USES_CC_SRC | USES_CC_SRC2,
230     [CC_OP_ADCOX] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
231     [CC_OP_CLR] = 0,
232     [CC_OP_POPCNT] = USES_CC_SRC,
233 };
234 
235 static void set_cc_op(DisasContext *s, CCOp op)
236 {
237     int dead;
238 
239     if (s->cc_op == op) {
240         return;
241     }
242 
243     /* Discard CC computation that will no longer be used.  */
244     dead = cc_op_live[s->cc_op] & ~cc_op_live[op];
245     if (dead & USES_CC_DST) {
246         tcg_gen_discard_tl(cpu_cc_dst);
247     }
248     if (dead & USES_CC_SRC) {
249         tcg_gen_discard_tl(cpu_cc_src);
250     }
251     if (dead & USES_CC_SRC2) {
252         tcg_gen_discard_tl(cpu_cc_src2);
253     }
254     if (dead & USES_CC_SRCT) {
255         tcg_gen_discard_tl(s->cc_srcT);
256     }
257 
258     if (op == CC_OP_DYNAMIC) {
259         /* The DYNAMIC setting is translator only, and should never be
260            stored.  Thus we always consider it clean.  */
261         s->cc_op_dirty = false;
262     } else {
263         /* Discard any computed CC_OP value (see shifts).  */
264         if (s->cc_op == CC_OP_DYNAMIC) {
265             tcg_gen_discard_i32(cpu_cc_op);
266         }
267         s->cc_op_dirty = true;
268     }
269     s->cc_op = op;
270 }
271 
272 static void gen_update_cc_op(DisasContext *s)
273 {
274     if (s->cc_op_dirty) {
275         tcg_gen_movi_i32(cpu_cc_op, s->cc_op);
276         s->cc_op_dirty = false;
277     }
278 }
279 
280 #ifdef TARGET_X86_64
281 
282 #define NB_OP_SIZES 4
283 
284 #else /* !TARGET_X86_64 */
285 
286 #define NB_OP_SIZES 3
287 
288 #endif /* !TARGET_X86_64 */
289 
290 #if defined(HOST_WORDS_BIGENDIAN)
291 #define REG_B_OFFSET (sizeof(target_ulong) - 1)
292 #define REG_H_OFFSET (sizeof(target_ulong) - 2)
293 #define REG_W_OFFSET (sizeof(target_ulong) - 2)
294 #define REG_L_OFFSET (sizeof(target_ulong) - 4)
295 #define REG_LH_OFFSET (sizeof(target_ulong) - 8)
296 #else
297 #define REG_B_OFFSET 0
298 #define REG_H_OFFSET 1
299 #define REG_W_OFFSET 0
300 #define REG_L_OFFSET 0
301 #define REG_LH_OFFSET 4
302 #endif
303 
304 /* In instruction encodings for byte register accesses the
305  * register number usually indicates "low 8 bits of register N";
306  * however there are some special cases where N 4..7 indicates
307  * [AH, CH, DH, BH], ie "bits 15..8 of register N-4". Return
308  * true for this special case, false otherwise.
309  */
310 static inline bool byte_reg_is_xH(DisasContext *s, int reg)
311 {
312     if (reg < 4) {
313         return false;
314     }
315 #ifdef TARGET_X86_64
316     if (reg >= 8 || s->x86_64_hregs) {
317         return false;
318     }
319 #endif
320     return true;
321 }
322 
323 /* Select the size of a push/pop operation.  */
324 static inline MemOp mo_pushpop(DisasContext *s, MemOp ot)
325 {
326     if (CODE64(s)) {
327         return ot == MO_16 ? MO_16 : MO_64;
328     } else {
329         return ot;
330     }
331 }
332 
333 /* Select the size of the stack pointer.  */
334 static inline MemOp mo_stacksize(DisasContext *s)
335 {
336     return CODE64(s) ? MO_64 : s->ss32 ? MO_32 : MO_16;
337 }
338 
339 /* Select only size 64 else 32.  Used for SSE operand sizes.  */
340 static inline MemOp mo_64_32(MemOp ot)
341 {
342 #ifdef TARGET_X86_64
343     return ot == MO_64 ? MO_64 : MO_32;
344 #else
345     return MO_32;
346 #endif
347 }
348 
349 /* Select size 8 if lsb of B is clear, else OT.  Used for decoding
350    byte vs word opcodes.  */
351 static inline MemOp mo_b_d(int b, MemOp ot)
352 {
353     return b & 1 ? ot : MO_8;
354 }
355 
356 /* Select size 8 if lsb of B is clear, else OT capped at 32.
357    Used for decoding operand size of port opcodes.  */
358 static inline MemOp mo_b_d32(int b, MemOp ot)
359 {
360     return b & 1 ? (ot == MO_16 ? MO_16 : MO_32) : MO_8;
361 }
362 
363 static void gen_op_mov_reg_v(DisasContext *s, MemOp ot, int reg, TCGv t0)
364 {
365     switch(ot) {
366     case MO_8:
367         if (!byte_reg_is_xH(s, reg)) {
368             tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 8);
369         } else {
370             tcg_gen_deposit_tl(cpu_regs[reg - 4], cpu_regs[reg - 4], t0, 8, 8);
371         }
372         break;
373     case MO_16:
374         tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 16);
375         break;
376     case MO_32:
377         /* For x86_64, this sets the higher half of register to zero.
378            For i386, this is equivalent to a mov. */
379         tcg_gen_ext32u_tl(cpu_regs[reg], t0);
380         break;
381 #ifdef TARGET_X86_64
382     case MO_64:
383         tcg_gen_mov_tl(cpu_regs[reg], t0);
384         break;
385 #endif
386     default:
387         tcg_abort();
388     }
389 }
390 
391 static inline
392 void gen_op_mov_v_reg(DisasContext *s, MemOp ot, TCGv t0, int reg)
393 {
394     if (ot == MO_8 && byte_reg_is_xH(s, reg)) {
395         tcg_gen_extract_tl(t0, cpu_regs[reg - 4], 8, 8);
396     } else {
397         tcg_gen_mov_tl(t0, cpu_regs[reg]);
398     }
399 }
400 
401 static void gen_add_A0_im(DisasContext *s, int val)
402 {
403     tcg_gen_addi_tl(s->A0, s->A0, val);
404     if (!CODE64(s)) {
405         tcg_gen_ext32u_tl(s->A0, s->A0);
406     }
407 }
408 
409 static inline void gen_op_jmp_v(TCGv dest)
410 {
411     tcg_gen_st_tl(dest, cpu_env, offsetof(CPUX86State, eip));
412 }
413 
414 static inline
415 void gen_op_add_reg_im(DisasContext *s, MemOp size, int reg, int32_t val)
416 {
417     tcg_gen_addi_tl(s->tmp0, cpu_regs[reg], val);
418     gen_op_mov_reg_v(s, size, reg, s->tmp0);
419 }
420 
421 static inline void gen_op_add_reg_T0(DisasContext *s, MemOp size, int reg)
422 {
423     tcg_gen_add_tl(s->tmp0, cpu_regs[reg], s->T0);
424     gen_op_mov_reg_v(s, size, reg, s->tmp0);
425 }
426 
427 static inline void gen_op_ld_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
428 {
429     tcg_gen_qemu_ld_tl(t0, a0, s->mem_index, idx | MO_LE);
430 }
431 
432 static inline void gen_op_st_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
433 {
434     tcg_gen_qemu_st_tl(t0, a0, s->mem_index, idx | MO_LE);
435 }
436 
437 static inline void gen_op_st_rm_T0_A0(DisasContext *s, int idx, int d)
438 {
439     if (d == OR_TMP0) {
440         gen_op_st_v(s, idx, s->T0, s->A0);
441     } else {
442         gen_op_mov_reg_v(s, idx, d, s->T0);
443     }
444 }
445 
446 static inline void gen_jmp_im(DisasContext *s, target_ulong pc)
447 {
448     tcg_gen_movi_tl(s->tmp0, pc);
449     gen_op_jmp_v(s->tmp0);
450 }
451 
452 /* Compute SEG:REG into A0.  SEG is selected from the override segment
453    (OVR_SEG) and the default segment (DEF_SEG).  OVR_SEG may be -1 to
454    indicate no override.  */
455 static void gen_lea_v_seg(DisasContext *s, MemOp aflag, TCGv a0,
456                           int def_seg, int ovr_seg)
457 {
458     switch (aflag) {
459 #ifdef TARGET_X86_64
460     case MO_64:
461         if (ovr_seg < 0) {
462             tcg_gen_mov_tl(s->A0, a0);
463             return;
464         }
465         break;
466 #endif
467     case MO_32:
468         /* 32 bit address */
469         if (ovr_seg < 0 && s->addseg) {
470             ovr_seg = def_seg;
471         }
472         if (ovr_seg < 0) {
473             tcg_gen_ext32u_tl(s->A0, a0);
474             return;
475         }
476         break;
477     case MO_16:
478         /* 16 bit address */
479         tcg_gen_ext16u_tl(s->A0, a0);
480         a0 = s->A0;
481         if (ovr_seg < 0) {
482             if (s->addseg) {
483                 ovr_seg = def_seg;
484             } else {
485                 return;
486             }
487         }
488         break;
489     default:
490         tcg_abort();
491     }
492 
493     if (ovr_seg >= 0) {
494         TCGv seg = cpu_seg_base[ovr_seg];
495 
496         if (aflag == MO_64) {
497             tcg_gen_add_tl(s->A0, a0, seg);
498         } else if (CODE64(s)) {
499             tcg_gen_ext32u_tl(s->A0, a0);
500             tcg_gen_add_tl(s->A0, s->A0, seg);
501         } else {
502             tcg_gen_add_tl(s->A0, a0, seg);
503             tcg_gen_ext32u_tl(s->A0, s->A0);
504         }
505     }
506 }
507 
508 static inline void gen_string_movl_A0_ESI(DisasContext *s)
509 {
510     gen_lea_v_seg(s, s->aflag, cpu_regs[R_ESI], R_DS, s->override);
511 }
512 
513 static inline void gen_string_movl_A0_EDI(DisasContext *s)
514 {
515     gen_lea_v_seg(s, s->aflag, cpu_regs[R_EDI], R_ES, -1);
516 }
517 
518 static inline void gen_op_movl_T0_Dshift(DisasContext *s, MemOp ot)
519 {
520     tcg_gen_ld32s_tl(s->T0, cpu_env, offsetof(CPUX86State, df));
521     tcg_gen_shli_tl(s->T0, s->T0, ot);
522 };
523 
524 static TCGv gen_ext_tl(TCGv dst, TCGv src, MemOp size, bool sign)
525 {
526     switch (size) {
527     case MO_8:
528         if (sign) {
529             tcg_gen_ext8s_tl(dst, src);
530         } else {
531             tcg_gen_ext8u_tl(dst, src);
532         }
533         return dst;
534     case MO_16:
535         if (sign) {
536             tcg_gen_ext16s_tl(dst, src);
537         } else {
538             tcg_gen_ext16u_tl(dst, src);
539         }
540         return dst;
541 #ifdef TARGET_X86_64
542     case MO_32:
543         if (sign) {
544             tcg_gen_ext32s_tl(dst, src);
545         } else {
546             tcg_gen_ext32u_tl(dst, src);
547         }
548         return dst;
549 #endif
550     default:
551         return src;
552     }
553 }
554 
555 static void gen_extu(MemOp ot, TCGv reg)
556 {
557     gen_ext_tl(reg, reg, ot, false);
558 }
559 
560 static void gen_exts(MemOp ot, TCGv reg)
561 {
562     gen_ext_tl(reg, reg, ot, true);
563 }
564 
565 static inline
566 void gen_op_jnz_ecx(DisasContext *s, MemOp size, TCGLabel *label1)
567 {
568     tcg_gen_mov_tl(s->tmp0, cpu_regs[R_ECX]);
569     gen_extu(size, s->tmp0);
570     tcg_gen_brcondi_tl(TCG_COND_NE, s->tmp0, 0, label1);
571 }
572 
573 static inline
574 void gen_op_jz_ecx(DisasContext *s, MemOp size, TCGLabel *label1)
575 {
576     tcg_gen_mov_tl(s->tmp0, cpu_regs[R_ECX]);
577     gen_extu(size, s->tmp0);
578     tcg_gen_brcondi_tl(TCG_COND_EQ, s->tmp0, 0, label1);
579 }
580 
581 static void gen_helper_in_func(MemOp ot, TCGv v, TCGv_i32 n)
582 {
583     switch (ot) {
584     case MO_8:
585         gen_helper_inb(v, cpu_env, n);
586         break;
587     case MO_16:
588         gen_helper_inw(v, cpu_env, n);
589         break;
590     case MO_32:
591         gen_helper_inl(v, cpu_env, n);
592         break;
593     default:
594         tcg_abort();
595     }
596 }
597 
598 static void gen_helper_out_func(MemOp ot, TCGv_i32 v, TCGv_i32 n)
599 {
600     switch (ot) {
601     case MO_8:
602         gen_helper_outb(cpu_env, v, n);
603         break;
604     case MO_16:
605         gen_helper_outw(cpu_env, v, n);
606         break;
607     case MO_32:
608         gen_helper_outl(cpu_env, v, n);
609         break;
610     default:
611         tcg_abort();
612     }
613 }
614 
615 static void gen_check_io(DisasContext *s, MemOp ot, target_ulong cur_eip,
616                          uint32_t svm_flags)
617 {
618     target_ulong next_eip;
619 
620     if (s->pe && (s->cpl > s->iopl || s->vm86)) {
621         tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
622         switch (ot) {
623         case MO_8:
624             gen_helper_check_iob(cpu_env, s->tmp2_i32);
625             break;
626         case MO_16:
627             gen_helper_check_iow(cpu_env, s->tmp2_i32);
628             break;
629         case MO_32:
630             gen_helper_check_iol(cpu_env, s->tmp2_i32);
631             break;
632         default:
633             tcg_abort();
634         }
635     }
636     if(s->flags & HF_GUEST_MASK) {
637         gen_update_cc_op(s);
638         gen_jmp_im(s, cur_eip);
639         svm_flags |= (1 << (4 + ot));
640         next_eip = s->pc - s->cs_base;
641         tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
642         gen_helper_svm_check_io(cpu_env, s->tmp2_i32,
643                                 tcg_const_i32(svm_flags),
644                                 tcg_const_i32(next_eip - cur_eip));
645     }
646 }
647 
648 static inline void gen_movs(DisasContext *s, MemOp ot)
649 {
650     gen_string_movl_A0_ESI(s);
651     gen_op_ld_v(s, ot, s->T0, s->A0);
652     gen_string_movl_A0_EDI(s);
653     gen_op_st_v(s, ot, s->T0, s->A0);
654     gen_op_movl_T0_Dshift(s, ot);
655     gen_op_add_reg_T0(s, s->aflag, R_ESI);
656     gen_op_add_reg_T0(s, s->aflag, R_EDI);
657 }
658 
659 static void gen_op_update1_cc(DisasContext *s)
660 {
661     tcg_gen_mov_tl(cpu_cc_dst, s->T0);
662 }
663 
664 static void gen_op_update2_cc(DisasContext *s)
665 {
666     tcg_gen_mov_tl(cpu_cc_src, s->T1);
667     tcg_gen_mov_tl(cpu_cc_dst, s->T0);
668 }
669 
670 static void gen_op_update3_cc(DisasContext *s, TCGv reg)
671 {
672     tcg_gen_mov_tl(cpu_cc_src2, reg);
673     tcg_gen_mov_tl(cpu_cc_src, s->T1);
674     tcg_gen_mov_tl(cpu_cc_dst, s->T0);
675 }
676 
677 static inline void gen_op_testl_T0_T1_cc(DisasContext *s)
678 {
679     tcg_gen_and_tl(cpu_cc_dst, s->T0, s->T1);
680 }
681 
682 static void gen_op_update_neg_cc(DisasContext *s)
683 {
684     tcg_gen_mov_tl(cpu_cc_dst, s->T0);
685     tcg_gen_neg_tl(cpu_cc_src, s->T0);
686     tcg_gen_movi_tl(s->cc_srcT, 0);
687 }
688 
689 /* compute all eflags to cc_src */
690 static void gen_compute_eflags(DisasContext *s)
691 {
692     TCGv zero, dst, src1, src2;
693     int live, dead;
694 
695     if (s->cc_op == CC_OP_EFLAGS) {
696         return;
697     }
698     if (s->cc_op == CC_OP_CLR) {
699         tcg_gen_movi_tl(cpu_cc_src, CC_Z | CC_P);
700         set_cc_op(s, CC_OP_EFLAGS);
701         return;
702     }
703 
704     zero = NULL;
705     dst = cpu_cc_dst;
706     src1 = cpu_cc_src;
707     src2 = cpu_cc_src2;
708 
709     /* Take care to not read values that are not live.  */
710     live = cc_op_live[s->cc_op] & ~USES_CC_SRCT;
711     dead = live ^ (USES_CC_DST | USES_CC_SRC | USES_CC_SRC2);
712     if (dead) {
713         zero = tcg_const_tl(0);
714         if (dead & USES_CC_DST) {
715             dst = zero;
716         }
717         if (dead & USES_CC_SRC) {
718             src1 = zero;
719         }
720         if (dead & USES_CC_SRC2) {
721             src2 = zero;
722         }
723     }
724 
725     gen_update_cc_op(s);
726     gen_helper_cc_compute_all(cpu_cc_src, dst, src1, src2, cpu_cc_op);
727     set_cc_op(s, CC_OP_EFLAGS);
728 
729     if (dead) {
730         tcg_temp_free(zero);
731     }
732 }
733 
734 typedef struct CCPrepare {
735     TCGCond cond;
736     TCGv reg;
737     TCGv reg2;
738     target_ulong imm;
739     target_ulong mask;
740     bool use_reg2;
741     bool no_setcond;
742 } CCPrepare;
743 
744 /* compute eflags.C to reg */
745 static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
746 {
747     TCGv t0, t1;
748     int size, shift;
749 
750     switch (s->cc_op) {
751     case CC_OP_SUBB ... CC_OP_SUBQ:
752         /* (DATA_TYPE)CC_SRCT < (DATA_TYPE)CC_SRC */
753         size = s->cc_op - CC_OP_SUBB;
754         t1 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
755         /* If no temporary was used, be careful not to alias t1 and t0.  */
756         t0 = t1 == cpu_cc_src ? s->tmp0 : reg;
757         tcg_gen_mov_tl(t0, s->cc_srcT);
758         gen_extu(size, t0);
759         goto add_sub;
760 
761     case CC_OP_ADDB ... CC_OP_ADDQ:
762         /* (DATA_TYPE)CC_DST < (DATA_TYPE)CC_SRC */
763         size = s->cc_op - CC_OP_ADDB;
764         t1 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
765         t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
766     add_sub:
767         return (CCPrepare) { .cond = TCG_COND_LTU, .reg = t0,
768                              .reg2 = t1, .mask = -1, .use_reg2 = true };
769 
770     case CC_OP_LOGICB ... CC_OP_LOGICQ:
771     case CC_OP_CLR:
772     case CC_OP_POPCNT:
773         return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
774 
775     case CC_OP_INCB ... CC_OP_INCQ:
776     case CC_OP_DECB ... CC_OP_DECQ:
777         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
778                              .mask = -1, .no_setcond = true };
779 
780     case CC_OP_SHLB ... CC_OP_SHLQ:
781         /* (CC_SRC >> (DATA_BITS - 1)) & 1 */
782         size = s->cc_op - CC_OP_SHLB;
783         shift = (8 << size) - 1;
784         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
785                              .mask = (target_ulong)1 << shift };
786 
787     case CC_OP_MULB ... CC_OP_MULQ:
788         return (CCPrepare) { .cond = TCG_COND_NE,
789                              .reg = cpu_cc_src, .mask = -1 };
790 
791     case CC_OP_BMILGB ... CC_OP_BMILGQ:
792         size = s->cc_op - CC_OP_BMILGB;
793         t0 = gen_ext_tl(reg, cpu_cc_src, size, false);
794         return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
795 
796     case CC_OP_ADCX:
797     case CC_OP_ADCOX:
798         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_dst,
799                              .mask = -1, .no_setcond = true };
800 
801     case CC_OP_EFLAGS:
802     case CC_OP_SARB ... CC_OP_SARQ:
803         /* CC_SRC & 1 */
804         return (CCPrepare) { .cond = TCG_COND_NE,
805                              .reg = cpu_cc_src, .mask = CC_C };
806 
807     default:
808        /* The need to compute only C from CC_OP_DYNAMIC is important
809           in efficiently implementing e.g. INC at the start of a TB.  */
810        gen_update_cc_op(s);
811        gen_helper_cc_compute_c(reg, cpu_cc_dst, cpu_cc_src,
812                                cpu_cc_src2, cpu_cc_op);
813        return (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
814                             .mask = -1, .no_setcond = true };
815     }
816 }
817 
818 /* compute eflags.P to reg */
819 static CCPrepare gen_prepare_eflags_p(DisasContext *s, TCGv reg)
820 {
821     gen_compute_eflags(s);
822     return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
823                          .mask = CC_P };
824 }
825 
826 /* compute eflags.S to reg */
827 static CCPrepare gen_prepare_eflags_s(DisasContext *s, TCGv reg)
828 {
829     switch (s->cc_op) {
830     case CC_OP_DYNAMIC:
831         gen_compute_eflags(s);
832         /* FALLTHRU */
833     case CC_OP_EFLAGS:
834     case CC_OP_ADCX:
835     case CC_OP_ADOX:
836     case CC_OP_ADCOX:
837         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
838                              .mask = CC_S };
839     case CC_OP_CLR:
840     case CC_OP_POPCNT:
841         return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
842     default:
843         {
844             MemOp size = (s->cc_op - CC_OP_ADDB) & 3;
845             TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, true);
846             return (CCPrepare) { .cond = TCG_COND_LT, .reg = t0, .mask = -1 };
847         }
848     }
849 }
850 
851 /* compute eflags.O to reg */
852 static CCPrepare gen_prepare_eflags_o(DisasContext *s, TCGv reg)
853 {
854     switch (s->cc_op) {
855     case CC_OP_ADOX:
856     case CC_OP_ADCOX:
857         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src2,
858                              .mask = -1, .no_setcond = true };
859     case CC_OP_CLR:
860     case CC_OP_POPCNT:
861         return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
862     default:
863         gen_compute_eflags(s);
864         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
865                              .mask = CC_O };
866     }
867 }
868 
869 /* compute eflags.Z to reg */
870 static CCPrepare gen_prepare_eflags_z(DisasContext *s, TCGv reg)
871 {
872     switch (s->cc_op) {
873     case CC_OP_DYNAMIC:
874         gen_compute_eflags(s);
875         /* FALLTHRU */
876     case CC_OP_EFLAGS:
877     case CC_OP_ADCX:
878     case CC_OP_ADOX:
879     case CC_OP_ADCOX:
880         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
881                              .mask = CC_Z };
882     case CC_OP_CLR:
883         return (CCPrepare) { .cond = TCG_COND_ALWAYS, .mask = -1 };
884     case CC_OP_POPCNT:
885         return (CCPrepare) { .cond = TCG_COND_EQ, .reg = cpu_cc_src,
886                              .mask = -1 };
887     default:
888         {
889             MemOp size = (s->cc_op - CC_OP_ADDB) & 3;
890             TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
891             return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
892         }
893     }
894 }
895 
896 /* perform a conditional store into register 'reg' according to jump opcode
897    value 'b'. In the fast case, T0 is guaranted not to be used. */
898 static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
899 {
900     int inv, jcc_op, cond;
901     MemOp size;
902     CCPrepare cc;
903     TCGv t0;
904 
905     inv = b & 1;
906     jcc_op = (b >> 1) & 7;
907 
908     switch (s->cc_op) {
909     case CC_OP_SUBB ... CC_OP_SUBQ:
910         /* We optimize relational operators for the cmp/jcc case.  */
911         size = s->cc_op - CC_OP_SUBB;
912         switch (jcc_op) {
913         case JCC_BE:
914             tcg_gen_mov_tl(s->tmp4, s->cc_srcT);
915             gen_extu(size, s->tmp4);
916             t0 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
917             cc = (CCPrepare) { .cond = TCG_COND_LEU, .reg = s->tmp4,
918                                .reg2 = t0, .mask = -1, .use_reg2 = true };
919             break;
920 
921         case JCC_L:
922             cond = TCG_COND_LT;
923             goto fast_jcc_l;
924         case JCC_LE:
925             cond = TCG_COND_LE;
926         fast_jcc_l:
927             tcg_gen_mov_tl(s->tmp4, s->cc_srcT);
928             gen_exts(size, s->tmp4);
929             t0 = gen_ext_tl(s->tmp0, cpu_cc_src, size, true);
930             cc = (CCPrepare) { .cond = cond, .reg = s->tmp4,
931                                .reg2 = t0, .mask = -1, .use_reg2 = true };
932             break;
933 
934         default:
935             goto slow_jcc;
936         }
937         break;
938 
939     default:
940     slow_jcc:
941         /* This actually generates good code for JC, JZ and JS.  */
942         switch (jcc_op) {
943         case JCC_O:
944             cc = gen_prepare_eflags_o(s, reg);
945             break;
946         case JCC_B:
947             cc = gen_prepare_eflags_c(s, reg);
948             break;
949         case JCC_Z:
950             cc = gen_prepare_eflags_z(s, reg);
951             break;
952         case JCC_BE:
953             gen_compute_eflags(s);
954             cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
955                                .mask = CC_Z | CC_C };
956             break;
957         case JCC_S:
958             cc = gen_prepare_eflags_s(s, reg);
959             break;
960         case JCC_P:
961             cc = gen_prepare_eflags_p(s, reg);
962             break;
963         case JCC_L:
964             gen_compute_eflags(s);
965             if (reg == cpu_cc_src) {
966                 reg = s->tmp0;
967             }
968             tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
969             tcg_gen_xor_tl(reg, reg, cpu_cc_src);
970             cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
971                                .mask = CC_S };
972             break;
973         default:
974         case JCC_LE:
975             gen_compute_eflags(s);
976             if (reg == cpu_cc_src) {
977                 reg = s->tmp0;
978             }
979             tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
980             tcg_gen_xor_tl(reg, reg, cpu_cc_src);
981             cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
982                                .mask = CC_S | CC_Z };
983             break;
984         }
985         break;
986     }
987 
988     if (inv) {
989         cc.cond = tcg_invert_cond(cc.cond);
990     }
991     return cc;
992 }
993 
994 static void gen_setcc1(DisasContext *s, int b, TCGv reg)
995 {
996     CCPrepare cc = gen_prepare_cc(s, b, reg);
997 
998     if (cc.no_setcond) {
999         if (cc.cond == TCG_COND_EQ) {
1000             tcg_gen_xori_tl(reg, cc.reg, 1);
1001         } else {
1002             tcg_gen_mov_tl(reg, cc.reg);
1003         }
1004         return;
1005     }
1006 
1007     if (cc.cond == TCG_COND_NE && !cc.use_reg2 && cc.imm == 0 &&
1008         cc.mask != 0 && (cc.mask & (cc.mask - 1)) == 0) {
1009         tcg_gen_shri_tl(reg, cc.reg, ctztl(cc.mask));
1010         tcg_gen_andi_tl(reg, reg, 1);
1011         return;
1012     }
1013     if (cc.mask != -1) {
1014         tcg_gen_andi_tl(reg, cc.reg, cc.mask);
1015         cc.reg = reg;
1016     }
1017     if (cc.use_reg2) {
1018         tcg_gen_setcond_tl(cc.cond, reg, cc.reg, cc.reg2);
1019     } else {
1020         tcg_gen_setcondi_tl(cc.cond, reg, cc.reg, cc.imm);
1021     }
1022 }
1023 
1024 static inline void gen_compute_eflags_c(DisasContext *s, TCGv reg)
1025 {
1026     gen_setcc1(s, JCC_B << 1, reg);
1027 }
1028 
1029 /* generate a conditional jump to label 'l1' according to jump opcode
1030    value 'b'. In the fast case, T0 is guaranted not to be used. */
1031 static inline void gen_jcc1_noeob(DisasContext *s, int b, TCGLabel *l1)
1032 {
1033     CCPrepare cc = gen_prepare_cc(s, b, s->T0);
1034 
1035     if (cc.mask != -1) {
1036         tcg_gen_andi_tl(s->T0, cc.reg, cc.mask);
1037         cc.reg = s->T0;
1038     }
1039     if (cc.use_reg2) {
1040         tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
1041     } else {
1042         tcg_gen_brcondi_tl(cc.cond, cc.reg, cc.imm, l1);
1043     }
1044 }
1045 
1046 /* Generate a conditional jump to label 'l1' according to jump opcode
1047    value 'b'. In the fast case, T0 is guaranted not to be used.
1048    A translation block must end soon.  */
1049 static inline void gen_jcc1(DisasContext *s, int b, TCGLabel *l1)
1050 {
1051     CCPrepare cc = gen_prepare_cc(s, b, s->T0);
1052 
1053     gen_update_cc_op(s);
1054     if (cc.mask != -1) {
1055         tcg_gen_andi_tl(s->T0, cc.reg, cc.mask);
1056         cc.reg = s->T0;
1057     }
1058     set_cc_op(s, CC_OP_DYNAMIC);
1059     if (cc.use_reg2) {
1060         tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
1061     } else {
1062         tcg_gen_brcondi_tl(cc.cond, cc.reg, cc.imm, l1);
1063     }
1064 }
1065 
1066 /* XXX: does not work with gdbstub "ice" single step - not a
1067    serious problem */
1068 static TCGLabel *gen_jz_ecx_string(DisasContext *s, target_ulong next_eip)
1069 {
1070     TCGLabel *l1 = gen_new_label();
1071     TCGLabel *l2 = gen_new_label();
1072     gen_op_jnz_ecx(s, s->aflag, l1);
1073     gen_set_label(l2);
1074     gen_jmp_tb(s, next_eip, 1);
1075     gen_set_label(l1);
1076     return l2;
1077 }
1078 
1079 static inline void gen_stos(DisasContext *s, MemOp ot)
1080 {
1081     gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
1082     gen_string_movl_A0_EDI(s);
1083     gen_op_st_v(s, ot, s->T0, s->A0);
1084     gen_op_movl_T0_Dshift(s, ot);
1085     gen_op_add_reg_T0(s, s->aflag, R_EDI);
1086 }
1087 
1088 static inline void gen_lods(DisasContext *s, MemOp ot)
1089 {
1090     gen_string_movl_A0_ESI(s);
1091     gen_op_ld_v(s, ot, s->T0, s->A0);
1092     gen_op_mov_reg_v(s, ot, R_EAX, s->T0);
1093     gen_op_movl_T0_Dshift(s, ot);
1094     gen_op_add_reg_T0(s, s->aflag, R_ESI);
1095 }
1096 
1097 static inline void gen_scas(DisasContext *s, MemOp ot)
1098 {
1099     gen_string_movl_A0_EDI(s);
1100     gen_op_ld_v(s, ot, s->T1, s->A0);
1101     gen_op(s, OP_CMPL, ot, R_EAX);
1102     gen_op_movl_T0_Dshift(s, ot);
1103     gen_op_add_reg_T0(s, s->aflag, R_EDI);
1104 }
1105 
1106 static inline void gen_cmps(DisasContext *s, MemOp ot)
1107 {
1108     gen_string_movl_A0_EDI(s);
1109     gen_op_ld_v(s, ot, s->T1, s->A0);
1110     gen_string_movl_A0_ESI(s);
1111     gen_op(s, OP_CMPL, ot, OR_TMP0);
1112     gen_op_movl_T0_Dshift(s, ot);
1113     gen_op_add_reg_T0(s, s->aflag, R_ESI);
1114     gen_op_add_reg_T0(s, s->aflag, R_EDI);
1115 }
1116 
1117 static void gen_bpt_io(DisasContext *s, TCGv_i32 t_port, int ot)
1118 {
1119     if (s->flags & HF_IOBPT_MASK) {
1120         TCGv_i32 t_size = tcg_const_i32(1 << ot);
1121         TCGv t_next = tcg_const_tl(s->pc - s->cs_base);
1122 
1123         gen_helper_bpt_io(cpu_env, t_port, t_size, t_next);
1124         tcg_temp_free_i32(t_size);
1125         tcg_temp_free(t_next);
1126     }
1127 }
1128 
1129 
1130 static inline void gen_ins(DisasContext *s, MemOp ot)
1131 {
1132     gen_string_movl_A0_EDI(s);
1133     /* Note: we must do this dummy write first to be restartable in
1134        case of page fault. */
1135     tcg_gen_movi_tl(s->T0, 0);
1136     gen_op_st_v(s, ot, s->T0, s->A0);
1137     tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
1138     tcg_gen_andi_i32(s->tmp2_i32, s->tmp2_i32, 0xffff);
1139     gen_helper_in_func(ot, s->T0, s->tmp2_i32);
1140     gen_op_st_v(s, ot, s->T0, s->A0);
1141     gen_op_movl_T0_Dshift(s, ot);
1142     gen_op_add_reg_T0(s, s->aflag, R_EDI);
1143     gen_bpt_io(s, s->tmp2_i32, ot);
1144 }
1145 
1146 static inline void gen_outs(DisasContext *s, MemOp ot)
1147 {
1148     gen_string_movl_A0_ESI(s);
1149     gen_op_ld_v(s, ot, s->T0, s->A0);
1150 
1151     tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
1152     tcg_gen_andi_i32(s->tmp2_i32, s->tmp2_i32, 0xffff);
1153     tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T0);
1154     gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32);
1155     gen_op_movl_T0_Dshift(s, ot);
1156     gen_op_add_reg_T0(s, s->aflag, R_ESI);
1157     gen_bpt_io(s, s->tmp2_i32, ot);
1158 }
1159 
1160 /* same method as Valgrind : we generate jumps to current or next
1161    instruction */
1162 #define GEN_REPZ(op)                                                          \
1163 static inline void gen_repz_ ## op(DisasContext *s, MemOp ot,              \
1164                                  target_ulong cur_eip, target_ulong next_eip) \
1165 {                                                                             \
1166     TCGLabel *l2;                                                             \
1167     gen_update_cc_op(s);                                                      \
1168     l2 = gen_jz_ecx_string(s, next_eip);                                      \
1169     gen_ ## op(s, ot);                                                        \
1170     gen_op_add_reg_im(s, s->aflag, R_ECX, -1);                                \
1171     /* a loop would cause two single step exceptions if ECX = 1               \
1172        before rep string_insn */                                              \
1173     if (s->repz_opt)                                                          \
1174         gen_op_jz_ecx(s, s->aflag, l2);                                       \
1175     gen_jmp(s, cur_eip);                                                      \
1176 }
1177 
1178 #define GEN_REPZ2(op)                                                         \
1179 static inline void gen_repz_ ## op(DisasContext *s, MemOp ot,              \
1180                                    target_ulong cur_eip,                      \
1181                                    target_ulong next_eip,                     \
1182                                    int nz)                                    \
1183 {                                                                             \
1184     TCGLabel *l2;                                                             \
1185     gen_update_cc_op(s);                                                      \
1186     l2 = gen_jz_ecx_string(s, next_eip);                                      \
1187     gen_ ## op(s, ot);                                                        \
1188     gen_op_add_reg_im(s, s->aflag, R_ECX, -1);                                \
1189     gen_update_cc_op(s);                                                      \
1190     gen_jcc1(s, (JCC_Z << 1) | (nz ^ 1), l2);                                 \
1191     if (s->repz_opt)                                                          \
1192         gen_op_jz_ecx(s, s->aflag, l2);                                       \
1193     gen_jmp(s, cur_eip);                                                      \
1194 }
1195 
1196 GEN_REPZ(movs)
1197 GEN_REPZ(stos)
1198 GEN_REPZ(lods)
1199 GEN_REPZ(ins)
1200 GEN_REPZ(outs)
1201 GEN_REPZ2(scas)
1202 GEN_REPZ2(cmps)
1203 
1204 static void gen_helper_fp_arith_ST0_FT0(int op)
1205 {
1206     switch (op) {
1207     case 0:
1208         gen_helper_fadd_ST0_FT0(cpu_env);
1209         break;
1210     case 1:
1211         gen_helper_fmul_ST0_FT0(cpu_env);
1212         break;
1213     case 2:
1214         gen_helper_fcom_ST0_FT0(cpu_env);
1215         break;
1216     case 3:
1217         gen_helper_fcom_ST0_FT0(cpu_env);
1218         break;
1219     case 4:
1220         gen_helper_fsub_ST0_FT0(cpu_env);
1221         break;
1222     case 5:
1223         gen_helper_fsubr_ST0_FT0(cpu_env);
1224         break;
1225     case 6:
1226         gen_helper_fdiv_ST0_FT0(cpu_env);
1227         break;
1228     case 7:
1229         gen_helper_fdivr_ST0_FT0(cpu_env);
1230         break;
1231     }
1232 }
1233 
1234 /* NOTE the exception in "r" op ordering */
1235 static void gen_helper_fp_arith_STN_ST0(int op, int opreg)
1236 {
1237     TCGv_i32 tmp = tcg_const_i32(opreg);
1238     switch (op) {
1239     case 0:
1240         gen_helper_fadd_STN_ST0(cpu_env, tmp);
1241         break;
1242     case 1:
1243         gen_helper_fmul_STN_ST0(cpu_env, tmp);
1244         break;
1245     case 4:
1246         gen_helper_fsubr_STN_ST0(cpu_env, tmp);
1247         break;
1248     case 5:
1249         gen_helper_fsub_STN_ST0(cpu_env, tmp);
1250         break;
1251     case 6:
1252         gen_helper_fdivr_STN_ST0(cpu_env, tmp);
1253         break;
1254     case 7:
1255         gen_helper_fdiv_STN_ST0(cpu_env, tmp);
1256         break;
1257     }
1258 }
1259 
1260 static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip)
1261 {
1262     gen_update_cc_op(s);
1263     gen_jmp_im(s, cur_eip);
1264     gen_helper_raise_exception(cpu_env, tcg_const_i32(trapno));
1265     s->base.is_jmp = DISAS_NORETURN;
1266 }
1267 
1268 /* Generate #UD for the current instruction.  The assumption here is that
1269    the instruction is known, but it isn't allowed in the current cpu mode.  */
1270 static void gen_illegal_opcode(DisasContext *s)
1271 {
1272     gen_exception(s, EXCP06_ILLOP, s->pc_start - s->cs_base);
1273 }
1274 
1275 /* if d == OR_TMP0, it means memory operand (address in A0) */
1276 static void gen_op(DisasContext *s1, int op, MemOp ot, int d)
1277 {
1278     if (d != OR_TMP0) {
1279         if (s1->prefix & PREFIX_LOCK) {
1280             /* Lock prefix when destination is not memory.  */
1281             gen_illegal_opcode(s1);
1282             return;
1283         }
1284         gen_op_mov_v_reg(s1, ot, s1->T0, d);
1285     } else if (!(s1->prefix & PREFIX_LOCK)) {
1286         gen_op_ld_v(s1, ot, s1->T0, s1->A0);
1287     }
1288     switch(op) {
1289     case OP_ADCL:
1290         gen_compute_eflags_c(s1, s1->tmp4);
1291         if (s1->prefix & PREFIX_LOCK) {
1292             tcg_gen_add_tl(s1->T0, s1->tmp4, s1->T1);
1293             tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
1294                                         s1->mem_index, ot | MO_LE);
1295         } else {
1296             tcg_gen_add_tl(s1->T0, s1->T0, s1->T1);
1297             tcg_gen_add_tl(s1->T0, s1->T0, s1->tmp4);
1298             gen_op_st_rm_T0_A0(s1, ot, d);
1299         }
1300         gen_op_update3_cc(s1, s1->tmp4);
1301         set_cc_op(s1, CC_OP_ADCB + ot);
1302         break;
1303     case OP_SBBL:
1304         gen_compute_eflags_c(s1, s1->tmp4);
1305         if (s1->prefix & PREFIX_LOCK) {
1306             tcg_gen_add_tl(s1->T0, s1->T1, s1->tmp4);
1307             tcg_gen_neg_tl(s1->T0, s1->T0);
1308             tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
1309                                         s1->mem_index, ot | MO_LE);
1310         } else {
1311             tcg_gen_sub_tl(s1->T0, s1->T0, s1->T1);
1312             tcg_gen_sub_tl(s1->T0, s1->T0, s1->tmp4);
1313             gen_op_st_rm_T0_A0(s1, ot, d);
1314         }
1315         gen_op_update3_cc(s1, s1->tmp4);
1316         set_cc_op(s1, CC_OP_SBBB + ot);
1317         break;
1318     case OP_ADDL:
1319         if (s1->prefix & PREFIX_LOCK) {
1320             tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T1,
1321                                         s1->mem_index, ot | MO_LE);
1322         } else {
1323             tcg_gen_add_tl(s1->T0, s1->T0, s1->T1);
1324             gen_op_st_rm_T0_A0(s1, ot, d);
1325         }
1326         gen_op_update2_cc(s1);
1327         set_cc_op(s1, CC_OP_ADDB + ot);
1328         break;
1329     case OP_SUBL:
1330         if (s1->prefix & PREFIX_LOCK) {
1331             tcg_gen_neg_tl(s1->T0, s1->T1);
1332             tcg_gen_atomic_fetch_add_tl(s1->cc_srcT, s1->A0, s1->T0,
1333                                         s1->mem_index, ot | MO_LE);
1334             tcg_gen_sub_tl(s1->T0, s1->cc_srcT, s1->T1);
1335         } else {
1336             tcg_gen_mov_tl(s1->cc_srcT, s1->T0);
1337             tcg_gen_sub_tl(s1->T0, s1->T0, s1->T1);
1338             gen_op_st_rm_T0_A0(s1, ot, d);
1339         }
1340         gen_op_update2_cc(s1);
1341         set_cc_op(s1, CC_OP_SUBB + ot);
1342         break;
1343     default:
1344     case OP_ANDL:
1345         if (s1->prefix & PREFIX_LOCK) {
1346             tcg_gen_atomic_and_fetch_tl(s1->T0, s1->A0, s1->T1,
1347                                         s1->mem_index, ot | MO_LE);
1348         } else {
1349             tcg_gen_and_tl(s1->T0, s1->T0, s1->T1);
1350             gen_op_st_rm_T0_A0(s1, ot, d);
1351         }
1352         gen_op_update1_cc(s1);
1353         set_cc_op(s1, CC_OP_LOGICB + ot);
1354         break;
1355     case OP_ORL:
1356         if (s1->prefix & PREFIX_LOCK) {
1357             tcg_gen_atomic_or_fetch_tl(s1->T0, s1->A0, s1->T1,
1358                                        s1->mem_index, ot | MO_LE);
1359         } else {
1360             tcg_gen_or_tl(s1->T0, s1->T0, s1->T1);
1361             gen_op_st_rm_T0_A0(s1, ot, d);
1362         }
1363         gen_op_update1_cc(s1);
1364         set_cc_op(s1, CC_OP_LOGICB + ot);
1365         break;
1366     case OP_XORL:
1367         if (s1->prefix & PREFIX_LOCK) {
1368             tcg_gen_atomic_xor_fetch_tl(s1->T0, s1->A0, s1->T1,
1369                                         s1->mem_index, ot | MO_LE);
1370         } else {
1371             tcg_gen_xor_tl(s1->T0, s1->T0, s1->T1);
1372             gen_op_st_rm_T0_A0(s1, ot, d);
1373         }
1374         gen_op_update1_cc(s1);
1375         set_cc_op(s1, CC_OP_LOGICB + ot);
1376         break;
1377     case OP_CMPL:
1378         tcg_gen_mov_tl(cpu_cc_src, s1->T1);
1379         tcg_gen_mov_tl(s1->cc_srcT, s1->T0);
1380         tcg_gen_sub_tl(cpu_cc_dst, s1->T0, s1->T1);
1381         set_cc_op(s1, CC_OP_SUBB + ot);
1382         break;
1383     }
1384 }
1385 
1386 /* if d == OR_TMP0, it means memory operand (address in A0) */
1387 static void gen_inc(DisasContext *s1, MemOp ot, int d, int c)
1388 {
1389     if (s1->prefix & PREFIX_LOCK) {
1390         if (d != OR_TMP0) {
1391             /* Lock prefix when destination is not memory */
1392             gen_illegal_opcode(s1);
1393             return;
1394         }
1395         tcg_gen_movi_tl(s1->T0, c > 0 ? 1 : -1);
1396         tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
1397                                     s1->mem_index, ot | MO_LE);
1398     } else {
1399         if (d != OR_TMP0) {
1400             gen_op_mov_v_reg(s1, ot, s1->T0, d);
1401         } else {
1402             gen_op_ld_v(s1, ot, s1->T0, s1->A0);
1403         }
1404         tcg_gen_addi_tl(s1->T0, s1->T0, (c > 0 ? 1 : -1));
1405         gen_op_st_rm_T0_A0(s1, ot, d);
1406     }
1407 
1408     gen_compute_eflags_c(s1, cpu_cc_src);
1409     tcg_gen_mov_tl(cpu_cc_dst, s1->T0);
1410     set_cc_op(s1, (c > 0 ? CC_OP_INCB : CC_OP_DECB) + ot);
1411 }
1412 
1413 static void gen_shift_flags(DisasContext *s, MemOp ot, TCGv result,
1414                             TCGv shm1, TCGv count, bool is_right)
1415 {
1416     TCGv_i32 z32, s32, oldop;
1417     TCGv z_tl;
1418 
1419     /* Store the results into the CC variables.  If we know that the
1420        variable must be dead, store unconditionally.  Otherwise we'll
1421        need to not disrupt the current contents.  */
1422     z_tl = tcg_const_tl(0);
1423     if (cc_op_live[s->cc_op] & USES_CC_DST) {
1424         tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_dst, count, z_tl,
1425                            result, cpu_cc_dst);
1426     } else {
1427         tcg_gen_mov_tl(cpu_cc_dst, result);
1428     }
1429     if (cc_op_live[s->cc_op] & USES_CC_SRC) {
1430         tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_src, count, z_tl,
1431                            shm1, cpu_cc_src);
1432     } else {
1433         tcg_gen_mov_tl(cpu_cc_src, shm1);
1434     }
1435     tcg_temp_free(z_tl);
1436 
1437     /* Get the two potential CC_OP values into temporaries.  */
1438     tcg_gen_movi_i32(s->tmp2_i32, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
1439     if (s->cc_op == CC_OP_DYNAMIC) {
1440         oldop = cpu_cc_op;
1441     } else {
1442         tcg_gen_movi_i32(s->tmp3_i32, s->cc_op);
1443         oldop = s->tmp3_i32;
1444     }
1445 
1446     /* Conditionally store the CC_OP value.  */
1447     z32 = tcg_const_i32(0);
1448     s32 = tcg_temp_new_i32();
1449     tcg_gen_trunc_tl_i32(s32, count);
1450     tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, s32, z32, s->tmp2_i32, oldop);
1451     tcg_temp_free_i32(z32);
1452     tcg_temp_free_i32(s32);
1453 
1454     /* The CC_OP value is no longer predictable.  */
1455     set_cc_op(s, CC_OP_DYNAMIC);
1456 }
1457 
1458 static void gen_shift_rm_T1(DisasContext *s, MemOp ot, int op1,
1459                             int is_right, int is_arith)
1460 {
1461     target_ulong mask = (ot == MO_64 ? 0x3f : 0x1f);
1462 
1463     /* load */
1464     if (op1 == OR_TMP0) {
1465         gen_op_ld_v(s, ot, s->T0, s->A0);
1466     } else {
1467         gen_op_mov_v_reg(s, ot, s->T0, op1);
1468     }
1469 
1470     tcg_gen_andi_tl(s->T1, s->T1, mask);
1471     tcg_gen_subi_tl(s->tmp0, s->T1, 1);
1472 
1473     if (is_right) {
1474         if (is_arith) {
1475             gen_exts(ot, s->T0);
1476             tcg_gen_sar_tl(s->tmp0, s->T0, s->tmp0);
1477             tcg_gen_sar_tl(s->T0, s->T0, s->T1);
1478         } else {
1479             gen_extu(ot, s->T0);
1480             tcg_gen_shr_tl(s->tmp0, s->T0, s->tmp0);
1481             tcg_gen_shr_tl(s->T0, s->T0, s->T1);
1482         }
1483     } else {
1484         tcg_gen_shl_tl(s->tmp0, s->T0, s->tmp0);
1485         tcg_gen_shl_tl(s->T0, s->T0, s->T1);
1486     }
1487 
1488     /* store */
1489     gen_op_st_rm_T0_A0(s, ot, op1);
1490 
1491     gen_shift_flags(s, ot, s->T0, s->tmp0, s->T1, is_right);
1492 }
1493 
1494 static void gen_shift_rm_im(DisasContext *s, MemOp ot, int op1, int op2,
1495                             int is_right, int is_arith)
1496 {
1497     int mask = (ot == MO_64 ? 0x3f : 0x1f);
1498 
1499     /* load */
1500     if (op1 == OR_TMP0)
1501         gen_op_ld_v(s, ot, s->T0, s->A0);
1502     else
1503         gen_op_mov_v_reg(s, ot, s->T0, op1);
1504 
1505     op2 &= mask;
1506     if (op2 != 0) {
1507         if (is_right) {
1508             if (is_arith) {
1509                 gen_exts(ot, s->T0);
1510                 tcg_gen_sari_tl(s->tmp4, s->T0, op2 - 1);
1511                 tcg_gen_sari_tl(s->T0, s->T0, op2);
1512             } else {
1513                 gen_extu(ot, s->T0);
1514                 tcg_gen_shri_tl(s->tmp4, s->T0, op2 - 1);
1515                 tcg_gen_shri_tl(s->T0, s->T0, op2);
1516             }
1517         } else {
1518             tcg_gen_shli_tl(s->tmp4, s->T0, op2 - 1);
1519             tcg_gen_shli_tl(s->T0, s->T0, op2);
1520         }
1521     }
1522 
1523     /* store */
1524     gen_op_st_rm_T0_A0(s, ot, op1);
1525 
1526     /* update eflags if non zero shift */
1527     if (op2 != 0) {
1528         tcg_gen_mov_tl(cpu_cc_src, s->tmp4);
1529         tcg_gen_mov_tl(cpu_cc_dst, s->T0);
1530         set_cc_op(s, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
1531     }
1532 }
1533 
1534 static void gen_rot_rm_T1(DisasContext *s, MemOp ot, int op1, int is_right)
1535 {
1536     target_ulong mask = (ot == MO_64 ? 0x3f : 0x1f);
1537     TCGv_i32 t0, t1;
1538 
1539     /* load */
1540     if (op1 == OR_TMP0) {
1541         gen_op_ld_v(s, ot, s->T0, s->A0);
1542     } else {
1543         gen_op_mov_v_reg(s, ot, s->T0, op1);
1544     }
1545 
1546     tcg_gen_andi_tl(s->T1, s->T1, mask);
1547 
1548     switch (ot) {
1549     case MO_8:
1550         /* Replicate the 8-bit input so that a 32-bit rotate works.  */
1551         tcg_gen_ext8u_tl(s->T0, s->T0);
1552         tcg_gen_muli_tl(s->T0, s->T0, 0x01010101);
1553         goto do_long;
1554     case MO_16:
1555         /* Replicate the 16-bit input so that a 32-bit rotate works.  */
1556         tcg_gen_deposit_tl(s->T0, s->T0, s->T0, 16, 16);
1557         goto do_long;
1558     do_long:
1559 #ifdef TARGET_X86_64
1560     case MO_32:
1561         tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
1562         tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
1563         if (is_right) {
1564             tcg_gen_rotr_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
1565         } else {
1566             tcg_gen_rotl_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
1567         }
1568         tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
1569         break;
1570 #endif
1571     default:
1572         if (is_right) {
1573             tcg_gen_rotr_tl(s->T0, s->T0, s->T1);
1574         } else {
1575             tcg_gen_rotl_tl(s->T0, s->T0, s->T1);
1576         }
1577         break;
1578     }
1579 
1580     /* store */
1581     gen_op_st_rm_T0_A0(s, ot, op1);
1582 
1583     /* We'll need the flags computed into CC_SRC.  */
1584     gen_compute_eflags(s);
1585 
1586     /* The value that was "rotated out" is now present at the other end
1587        of the word.  Compute C into CC_DST and O into CC_SRC2.  Note that
1588        since we've computed the flags into CC_SRC, these variables are
1589        currently dead.  */
1590     if (is_right) {
1591         tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask - 1);
1592         tcg_gen_shri_tl(cpu_cc_dst, s->T0, mask);
1593         tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
1594     } else {
1595         tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask);
1596         tcg_gen_andi_tl(cpu_cc_dst, s->T0, 1);
1597     }
1598     tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
1599     tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
1600 
1601     /* Now conditionally store the new CC_OP value.  If the shift count
1602        is 0 we keep the CC_OP_EFLAGS setting so that only CC_SRC is live.
1603        Otherwise reuse CC_OP_ADCOX which have the C and O flags split out
1604        exactly as we computed above.  */
1605     t0 = tcg_const_i32(0);
1606     t1 = tcg_temp_new_i32();
1607     tcg_gen_trunc_tl_i32(t1, s->T1);
1608     tcg_gen_movi_i32(s->tmp2_i32, CC_OP_ADCOX);
1609     tcg_gen_movi_i32(s->tmp3_i32, CC_OP_EFLAGS);
1610     tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, t1, t0,
1611                         s->tmp2_i32, s->tmp3_i32);
1612     tcg_temp_free_i32(t0);
1613     tcg_temp_free_i32(t1);
1614 
1615     /* The CC_OP value is no longer predictable.  */
1616     set_cc_op(s, CC_OP_DYNAMIC);
1617 }
1618 
1619 static void gen_rot_rm_im(DisasContext *s, MemOp ot, int op1, int op2,
1620                           int is_right)
1621 {
1622     int mask = (ot == MO_64 ? 0x3f : 0x1f);
1623     int shift;
1624 
1625     /* load */
1626     if (op1 == OR_TMP0) {
1627         gen_op_ld_v(s, ot, s->T0, s->A0);
1628     } else {
1629         gen_op_mov_v_reg(s, ot, s->T0, op1);
1630     }
1631 
1632     op2 &= mask;
1633     if (op2 != 0) {
1634         switch (ot) {
1635 #ifdef TARGET_X86_64
1636         case MO_32:
1637             tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
1638             if (is_right) {
1639                 tcg_gen_rotri_i32(s->tmp2_i32, s->tmp2_i32, op2);
1640             } else {
1641                 tcg_gen_rotli_i32(s->tmp2_i32, s->tmp2_i32, op2);
1642             }
1643             tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
1644             break;
1645 #endif
1646         default:
1647             if (is_right) {
1648                 tcg_gen_rotri_tl(s->T0, s->T0, op2);
1649             } else {
1650                 tcg_gen_rotli_tl(s->T0, s->T0, op2);
1651             }
1652             break;
1653         case MO_8:
1654             mask = 7;
1655             goto do_shifts;
1656         case MO_16:
1657             mask = 15;
1658         do_shifts:
1659             shift = op2 & mask;
1660             if (is_right) {
1661                 shift = mask + 1 - shift;
1662             }
1663             gen_extu(ot, s->T0);
1664             tcg_gen_shli_tl(s->tmp0, s->T0, shift);
1665             tcg_gen_shri_tl(s->T0, s->T0, mask + 1 - shift);
1666             tcg_gen_or_tl(s->T0, s->T0, s->tmp0);
1667             break;
1668         }
1669     }
1670 
1671     /* store */
1672     gen_op_st_rm_T0_A0(s, ot, op1);
1673 
1674     if (op2 != 0) {
1675         /* Compute the flags into CC_SRC.  */
1676         gen_compute_eflags(s);
1677 
1678         /* The value that was "rotated out" is now present at the other end
1679            of the word.  Compute C into CC_DST and O into CC_SRC2.  Note that
1680            since we've computed the flags into CC_SRC, these variables are
1681            currently dead.  */
1682         if (is_right) {
1683             tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask - 1);
1684             tcg_gen_shri_tl(cpu_cc_dst, s->T0, mask);
1685             tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
1686         } else {
1687             tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask);
1688             tcg_gen_andi_tl(cpu_cc_dst, s->T0, 1);
1689         }
1690         tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
1691         tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
1692         set_cc_op(s, CC_OP_ADCOX);
1693     }
1694 }
1695 
1696 /* XXX: add faster immediate = 1 case */
1697 static void gen_rotc_rm_T1(DisasContext *s, MemOp ot, int op1,
1698                            int is_right)
1699 {
1700     gen_compute_eflags(s);
1701     assert(s->cc_op == CC_OP_EFLAGS);
1702 
1703     /* load */
1704     if (op1 == OR_TMP0)
1705         gen_op_ld_v(s, ot, s->T0, s->A0);
1706     else
1707         gen_op_mov_v_reg(s, ot, s->T0, op1);
1708 
1709     if (is_right) {
1710         switch (ot) {
1711         case MO_8:
1712             gen_helper_rcrb(s->T0, cpu_env, s->T0, s->T1);
1713             break;
1714         case MO_16:
1715             gen_helper_rcrw(s->T0, cpu_env, s->T0, s->T1);
1716             break;
1717         case MO_32:
1718             gen_helper_rcrl(s->T0, cpu_env, s->T0, s->T1);
1719             break;
1720 #ifdef TARGET_X86_64
1721         case MO_64:
1722             gen_helper_rcrq(s->T0, cpu_env, s->T0, s->T1);
1723             break;
1724 #endif
1725         default:
1726             tcg_abort();
1727         }
1728     } else {
1729         switch (ot) {
1730         case MO_8:
1731             gen_helper_rclb(s->T0, cpu_env, s->T0, s->T1);
1732             break;
1733         case MO_16:
1734             gen_helper_rclw(s->T0, cpu_env, s->T0, s->T1);
1735             break;
1736         case MO_32:
1737             gen_helper_rcll(s->T0, cpu_env, s->T0, s->T1);
1738             break;
1739 #ifdef TARGET_X86_64
1740         case MO_64:
1741             gen_helper_rclq(s->T0, cpu_env, s->T0, s->T1);
1742             break;
1743 #endif
1744         default:
1745             tcg_abort();
1746         }
1747     }
1748     /* store */
1749     gen_op_st_rm_T0_A0(s, ot, op1);
1750 }
1751 
1752 /* XXX: add faster immediate case */
1753 static void gen_shiftd_rm_T1(DisasContext *s, MemOp ot, int op1,
1754                              bool is_right, TCGv count_in)
1755 {
1756     target_ulong mask = (ot == MO_64 ? 63 : 31);
1757     TCGv count;
1758 
1759     /* load */
1760     if (op1 == OR_TMP0) {
1761         gen_op_ld_v(s, ot, s->T0, s->A0);
1762     } else {
1763         gen_op_mov_v_reg(s, ot, s->T0, op1);
1764     }
1765 
1766     count = tcg_temp_new();
1767     tcg_gen_andi_tl(count, count_in, mask);
1768 
1769     switch (ot) {
1770     case MO_16:
1771         /* Note: we implement the Intel behaviour for shift count > 16.
1772            This means "shrdw C, B, A" shifts A:B:A >> C.  Build the B:A
1773            portion by constructing it as a 32-bit value.  */
1774         if (is_right) {
1775             tcg_gen_deposit_tl(s->tmp0, s->T0, s->T1, 16, 16);
1776             tcg_gen_mov_tl(s->T1, s->T0);
1777             tcg_gen_mov_tl(s->T0, s->tmp0);
1778         } else {
1779             tcg_gen_deposit_tl(s->T1, s->T0, s->T1, 16, 16);
1780         }
1781         /*
1782          * If TARGET_X86_64 defined then fall through into MO_32 case,
1783          * otherwise fall through default case.
1784          */
1785     case MO_32:
1786 #ifdef TARGET_X86_64
1787         /* Concatenate the two 32-bit values and use a 64-bit shift.  */
1788         tcg_gen_subi_tl(s->tmp0, count, 1);
1789         if (is_right) {
1790             tcg_gen_concat_tl_i64(s->T0, s->T0, s->T1);
1791             tcg_gen_shr_i64(s->tmp0, s->T0, s->tmp0);
1792             tcg_gen_shr_i64(s->T0, s->T0, count);
1793         } else {
1794             tcg_gen_concat_tl_i64(s->T0, s->T1, s->T0);
1795             tcg_gen_shl_i64(s->tmp0, s->T0, s->tmp0);
1796             tcg_gen_shl_i64(s->T0, s->T0, count);
1797             tcg_gen_shri_i64(s->tmp0, s->tmp0, 32);
1798             tcg_gen_shri_i64(s->T0, s->T0, 32);
1799         }
1800         break;
1801 #endif
1802     default:
1803         tcg_gen_subi_tl(s->tmp0, count, 1);
1804         if (is_right) {
1805             tcg_gen_shr_tl(s->tmp0, s->T0, s->tmp0);
1806 
1807             tcg_gen_subfi_tl(s->tmp4, mask + 1, count);
1808             tcg_gen_shr_tl(s->T0, s->T0, count);
1809             tcg_gen_shl_tl(s->T1, s->T1, s->tmp4);
1810         } else {
1811             tcg_gen_shl_tl(s->tmp0, s->T0, s->tmp0);
1812             if (ot == MO_16) {
1813                 /* Only needed if count > 16, for Intel behaviour.  */
1814                 tcg_gen_subfi_tl(s->tmp4, 33, count);
1815                 tcg_gen_shr_tl(s->tmp4, s->T1, s->tmp4);
1816                 tcg_gen_or_tl(s->tmp0, s->tmp0, s->tmp4);
1817             }
1818 
1819             tcg_gen_subfi_tl(s->tmp4, mask + 1, count);
1820             tcg_gen_shl_tl(s->T0, s->T0, count);
1821             tcg_gen_shr_tl(s->T1, s->T1, s->tmp4);
1822         }
1823         tcg_gen_movi_tl(s->tmp4, 0);
1824         tcg_gen_movcond_tl(TCG_COND_EQ, s->T1, count, s->tmp4,
1825                            s->tmp4, s->T1);
1826         tcg_gen_or_tl(s->T0, s->T0, s->T1);
1827         break;
1828     }
1829 
1830     /* store */
1831     gen_op_st_rm_T0_A0(s, ot, op1);
1832 
1833     gen_shift_flags(s, ot, s->T0, s->tmp0, count, is_right);
1834     tcg_temp_free(count);
1835 }
1836 
1837 static void gen_shift(DisasContext *s1, int op, MemOp ot, int d, int s)
1838 {
1839     if (s != OR_TMP1)
1840         gen_op_mov_v_reg(s1, ot, s1->T1, s);
1841     switch(op) {
1842     case OP_ROL:
1843         gen_rot_rm_T1(s1, ot, d, 0);
1844         break;
1845     case OP_ROR:
1846         gen_rot_rm_T1(s1, ot, d, 1);
1847         break;
1848     case OP_SHL:
1849     case OP_SHL1:
1850         gen_shift_rm_T1(s1, ot, d, 0, 0);
1851         break;
1852     case OP_SHR:
1853         gen_shift_rm_T1(s1, ot, d, 1, 0);
1854         break;
1855     case OP_SAR:
1856         gen_shift_rm_T1(s1, ot, d, 1, 1);
1857         break;
1858     case OP_RCL:
1859         gen_rotc_rm_T1(s1, ot, d, 0);
1860         break;
1861     case OP_RCR:
1862         gen_rotc_rm_T1(s1, ot, d, 1);
1863         break;
1864     }
1865 }
1866 
1867 static void gen_shifti(DisasContext *s1, int op, MemOp ot, int d, int c)
1868 {
1869     switch(op) {
1870     case OP_ROL:
1871         gen_rot_rm_im(s1, ot, d, c, 0);
1872         break;
1873     case OP_ROR:
1874         gen_rot_rm_im(s1, ot, d, c, 1);
1875         break;
1876     case OP_SHL:
1877     case OP_SHL1:
1878         gen_shift_rm_im(s1, ot, d, c, 0, 0);
1879         break;
1880     case OP_SHR:
1881         gen_shift_rm_im(s1, ot, d, c, 1, 0);
1882         break;
1883     case OP_SAR:
1884         gen_shift_rm_im(s1, ot, d, c, 1, 1);
1885         break;
1886     default:
1887         /* currently not optimized */
1888         tcg_gen_movi_tl(s1->T1, c);
1889         gen_shift(s1, op, ot, d, OR_TMP1);
1890         break;
1891     }
1892 }
1893 
1894 #define X86_MAX_INSN_LENGTH 15
1895 
1896 static uint64_t advance_pc(CPUX86State *env, DisasContext *s, int num_bytes)
1897 {
1898     uint64_t pc = s->pc;
1899 
1900     s->pc += num_bytes;
1901     if (unlikely(s->pc - s->pc_start > X86_MAX_INSN_LENGTH)) {
1902         /* If the instruction's 16th byte is on a different page than the 1st, a
1903          * page fault on the second page wins over the general protection fault
1904          * caused by the instruction being too long.
1905          * This can happen even if the operand is only one byte long!
1906          */
1907         if (((s->pc - 1) ^ (pc - 1)) & TARGET_PAGE_MASK) {
1908             volatile uint8_t unused =
1909                 cpu_ldub_code(env, (s->pc - 1) & TARGET_PAGE_MASK);
1910             (void) unused;
1911         }
1912         siglongjmp(s->jmpbuf, 1);
1913     }
1914 
1915     return pc;
1916 }
1917 
1918 static inline uint8_t x86_ldub_code(CPUX86State *env, DisasContext *s)
1919 {
1920     return translator_ldub(env, advance_pc(env, s, 1));
1921 }
1922 
1923 static inline int16_t x86_ldsw_code(CPUX86State *env, DisasContext *s)
1924 {
1925     return translator_ldsw(env, advance_pc(env, s, 2));
1926 }
1927 
1928 static inline uint16_t x86_lduw_code(CPUX86State *env, DisasContext *s)
1929 {
1930     return translator_lduw(env, advance_pc(env, s, 2));
1931 }
1932 
1933 static inline uint32_t x86_ldl_code(CPUX86State *env, DisasContext *s)
1934 {
1935     return translator_ldl(env, advance_pc(env, s, 4));
1936 }
1937 
1938 #ifdef TARGET_X86_64
1939 static inline uint64_t x86_ldq_code(CPUX86State *env, DisasContext *s)
1940 {
1941     return translator_ldq(env, advance_pc(env, s, 8));
1942 }
1943 #endif
1944 
1945 /* Decompose an address.  */
1946 
1947 typedef struct AddressParts {
1948     int def_seg;
1949     int base;
1950     int index;
1951     int scale;
1952     target_long disp;
1953 } AddressParts;
1954 
1955 static AddressParts gen_lea_modrm_0(CPUX86State *env, DisasContext *s,
1956                                     int modrm)
1957 {
1958     int def_seg, base, index, scale, mod, rm;
1959     target_long disp;
1960     bool havesib;
1961 
1962     def_seg = R_DS;
1963     index = -1;
1964     scale = 0;
1965     disp = 0;
1966 
1967     mod = (modrm >> 6) & 3;
1968     rm = modrm & 7;
1969     base = rm | REX_B(s);
1970 
1971     if (mod == 3) {
1972         /* Normally filtered out earlier, but including this path
1973            simplifies multi-byte nop, as well as bndcl, bndcu, bndcn.  */
1974         goto done;
1975     }
1976 
1977     switch (s->aflag) {
1978     case MO_64:
1979     case MO_32:
1980         havesib = 0;
1981         if (rm == 4) {
1982             int code = x86_ldub_code(env, s);
1983             scale = (code >> 6) & 3;
1984             index = ((code >> 3) & 7) | REX_X(s);
1985             if (index == 4) {
1986                 index = -1;  /* no index */
1987             }
1988             base = (code & 7) | REX_B(s);
1989             havesib = 1;
1990         }
1991 
1992         switch (mod) {
1993         case 0:
1994             if ((base & 7) == 5) {
1995                 base = -1;
1996                 disp = (int32_t)x86_ldl_code(env, s);
1997                 if (CODE64(s) && !havesib) {
1998                     base = -2;
1999                     disp += s->pc + s->rip_offset;
2000                 }
2001             }
2002             break;
2003         case 1:
2004             disp = (int8_t)x86_ldub_code(env, s);
2005             break;
2006         default:
2007         case 2:
2008             disp = (int32_t)x86_ldl_code(env, s);
2009             break;
2010         }
2011 
2012         /* For correct popl handling with esp.  */
2013         if (base == R_ESP && s->popl_esp_hack) {
2014             disp += s->popl_esp_hack;
2015         }
2016         if (base == R_EBP || base == R_ESP) {
2017             def_seg = R_SS;
2018         }
2019         break;
2020 
2021     case MO_16:
2022         if (mod == 0) {
2023             if (rm == 6) {
2024                 base = -1;
2025                 disp = x86_lduw_code(env, s);
2026                 break;
2027             }
2028         } else if (mod == 1) {
2029             disp = (int8_t)x86_ldub_code(env, s);
2030         } else {
2031             disp = (int16_t)x86_lduw_code(env, s);
2032         }
2033 
2034         switch (rm) {
2035         case 0:
2036             base = R_EBX;
2037             index = R_ESI;
2038             break;
2039         case 1:
2040             base = R_EBX;
2041             index = R_EDI;
2042             break;
2043         case 2:
2044             base = R_EBP;
2045             index = R_ESI;
2046             def_seg = R_SS;
2047             break;
2048         case 3:
2049             base = R_EBP;
2050             index = R_EDI;
2051             def_seg = R_SS;
2052             break;
2053         case 4:
2054             base = R_ESI;
2055             break;
2056         case 5:
2057             base = R_EDI;
2058             break;
2059         case 6:
2060             base = R_EBP;
2061             def_seg = R_SS;
2062             break;
2063         default:
2064         case 7:
2065             base = R_EBX;
2066             break;
2067         }
2068         break;
2069 
2070     default:
2071         tcg_abort();
2072     }
2073 
2074  done:
2075     return (AddressParts){ def_seg, base, index, scale, disp };
2076 }
2077 
2078 /* Compute the address, with a minimum number of TCG ops.  */
2079 static TCGv gen_lea_modrm_1(DisasContext *s, AddressParts a)
2080 {
2081     TCGv ea = NULL;
2082 
2083     if (a.index >= 0) {
2084         if (a.scale == 0) {
2085             ea = cpu_regs[a.index];
2086         } else {
2087             tcg_gen_shli_tl(s->A0, cpu_regs[a.index], a.scale);
2088             ea = s->A0;
2089         }
2090         if (a.base >= 0) {
2091             tcg_gen_add_tl(s->A0, ea, cpu_regs[a.base]);
2092             ea = s->A0;
2093         }
2094     } else if (a.base >= 0) {
2095         ea = cpu_regs[a.base];
2096     }
2097     if (!ea) {
2098         tcg_gen_movi_tl(s->A0, a.disp);
2099         ea = s->A0;
2100     } else if (a.disp != 0) {
2101         tcg_gen_addi_tl(s->A0, ea, a.disp);
2102         ea = s->A0;
2103     }
2104 
2105     return ea;
2106 }
2107 
2108 static void gen_lea_modrm(CPUX86State *env, DisasContext *s, int modrm)
2109 {
2110     AddressParts a = gen_lea_modrm_0(env, s, modrm);
2111     TCGv ea = gen_lea_modrm_1(s, a);
2112     gen_lea_v_seg(s, s->aflag, ea, a.def_seg, s->override);
2113 }
2114 
2115 static void gen_nop_modrm(CPUX86State *env, DisasContext *s, int modrm)
2116 {
2117     (void)gen_lea_modrm_0(env, s, modrm);
2118 }
2119 
2120 /* Used for BNDCL, BNDCU, BNDCN.  */
2121 static void gen_bndck(CPUX86State *env, DisasContext *s, int modrm,
2122                       TCGCond cond, TCGv_i64 bndv)
2123 {
2124     TCGv ea = gen_lea_modrm_1(s, gen_lea_modrm_0(env, s, modrm));
2125 
2126     tcg_gen_extu_tl_i64(s->tmp1_i64, ea);
2127     if (!CODE64(s)) {
2128         tcg_gen_ext32u_i64(s->tmp1_i64, s->tmp1_i64);
2129     }
2130     tcg_gen_setcond_i64(cond, s->tmp1_i64, s->tmp1_i64, bndv);
2131     tcg_gen_extrl_i64_i32(s->tmp2_i32, s->tmp1_i64);
2132     gen_helper_bndck(cpu_env, s->tmp2_i32);
2133 }
2134 
2135 /* used for LEA and MOV AX, mem */
2136 static void gen_add_A0_ds_seg(DisasContext *s)
2137 {
2138     gen_lea_v_seg(s, s->aflag, s->A0, R_DS, s->override);
2139 }
2140 
2141 /* generate modrm memory load or store of 'reg'. TMP0 is used if reg ==
2142    OR_TMP0 */
2143 static void gen_ldst_modrm(CPUX86State *env, DisasContext *s, int modrm,
2144                            MemOp ot, int reg, int is_store)
2145 {
2146     int mod, rm;
2147 
2148     mod = (modrm >> 6) & 3;
2149     rm = (modrm & 7) | REX_B(s);
2150     if (mod == 3) {
2151         if (is_store) {
2152             if (reg != OR_TMP0)
2153                 gen_op_mov_v_reg(s, ot, s->T0, reg);
2154             gen_op_mov_reg_v(s, ot, rm, s->T0);
2155         } else {
2156             gen_op_mov_v_reg(s, ot, s->T0, rm);
2157             if (reg != OR_TMP0)
2158                 gen_op_mov_reg_v(s, ot, reg, s->T0);
2159         }
2160     } else {
2161         gen_lea_modrm(env, s, modrm);
2162         if (is_store) {
2163             if (reg != OR_TMP0)
2164                 gen_op_mov_v_reg(s, ot, s->T0, reg);
2165             gen_op_st_v(s, ot, s->T0, s->A0);
2166         } else {
2167             gen_op_ld_v(s, ot, s->T0, s->A0);
2168             if (reg != OR_TMP0)
2169                 gen_op_mov_reg_v(s, ot, reg, s->T0);
2170         }
2171     }
2172 }
2173 
2174 static inline uint32_t insn_get(CPUX86State *env, DisasContext *s, MemOp ot)
2175 {
2176     uint32_t ret;
2177 
2178     switch (ot) {
2179     case MO_8:
2180         ret = x86_ldub_code(env, s);
2181         break;
2182     case MO_16:
2183         ret = x86_lduw_code(env, s);
2184         break;
2185     case MO_32:
2186 #ifdef TARGET_X86_64
2187     case MO_64:
2188 #endif
2189         ret = x86_ldl_code(env, s);
2190         break;
2191     default:
2192         tcg_abort();
2193     }
2194     return ret;
2195 }
2196 
2197 static inline int insn_const_size(MemOp ot)
2198 {
2199     if (ot <= MO_32) {
2200         return 1 << ot;
2201     } else {
2202         return 4;
2203     }
2204 }
2205 
2206 static inline bool use_goto_tb(DisasContext *s, target_ulong pc)
2207 {
2208 #ifndef CONFIG_USER_ONLY
2209     return (pc & TARGET_PAGE_MASK) == (s->base.tb->pc & TARGET_PAGE_MASK) ||
2210            (pc & TARGET_PAGE_MASK) == (s->pc_start & TARGET_PAGE_MASK);
2211 #else
2212     return true;
2213 #endif
2214 }
2215 
2216 static inline void gen_goto_tb(DisasContext *s, int tb_num, target_ulong eip)
2217 {
2218     target_ulong pc = s->cs_base + eip;
2219 
2220     if (use_goto_tb(s, pc))  {
2221         /* jump to same page: we can use a direct jump */
2222         tcg_gen_goto_tb(tb_num);
2223         gen_jmp_im(s, eip);
2224         tcg_gen_exit_tb(s->base.tb, tb_num);
2225         s->base.is_jmp = DISAS_NORETURN;
2226     } else {
2227         /* jump to another page */
2228         gen_jmp_im(s, eip);
2229         gen_jr(s, s->tmp0);
2230     }
2231 }
2232 
2233 static inline void gen_jcc(DisasContext *s, int b,
2234                            target_ulong val, target_ulong next_eip)
2235 {
2236     TCGLabel *l1, *l2;
2237 
2238     if (s->jmp_opt) {
2239         l1 = gen_new_label();
2240         gen_jcc1(s, b, l1);
2241 
2242         gen_goto_tb(s, 0, next_eip);
2243 
2244         gen_set_label(l1);
2245         gen_goto_tb(s, 1, val);
2246     } else {
2247         l1 = gen_new_label();
2248         l2 = gen_new_label();
2249         gen_jcc1(s, b, l1);
2250 
2251         gen_jmp_im(s, next_eip);
2252         tcg_gen_br(l2);
2253 
2254         gen_set_label(l1);
2255         gen_jmp_im(s, val);
2256         gen_set_label(l2);
2257         gen_eob(s);
2258     }
2259 }
2260 
2261 static void gen_cmovcc1(CPUX86State *env, DisasContext *s, MemOp ot, int b,
2262                         int modrm, int reg)
2263 {
2264     CCPrepare cc;
2265 
2266     gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
2267 
2268     cc = gen_prepare_cc(s, b, s->T1);
2269     if (cc.mask != -1) {
2270         TCGv t0 = tcg_temp_new();
2271         tcg_gen_andi_tl(t0, cc.reg, cc.mask);
2272         cc.reg = t0;
2273     }
2274     if (!cc.use_reg2) {
2275         cc.reg2 = tcg_const_tl(cc.imm);
2276     }
2277 
2278     tcg_gen_movcond_tl(cc.cond, s->T0, cc.reg, cc.reg2,
2279                        s->T0, cpu_regs[reg]);
2280     gen_op_mov_reg_v(s, ot, reg, s->T0);
2281 
2282     if (cc.mask != -1) {
2283         tcg_temp_free(cc.reg);
2284     }
2285     if (!cc.use_reg2) {
2286         tcg_temp_free(cc.reg2);
2287     }
2288 }
2289 
2290 static inline void gen_op_movl_T0_seg(DisasContext *s, X86Seg seg_reg)
2291 {
2292     tcg_gen_ld32u_tl(s->T0, cpu_env,
2293                      offsetof(CPUX86State,segs[seg_reg].selector));
2294 }
2295 
2296 static inline void gen_op_movl_seg_T0_vm(DisasContext *s, X86Seg seg_reg)
2297 {
2298     tcg_gen_ext16u_tl(s->T0, s->T0);
2299     tcg_gen_st32_tl(s->T0, cpu_env,
2300                     offsetof(CPUX86State,segs[seg_reg].selector));
2301     tcg_gen_shli_tl(cpu_seg_base[seg_reg], s->T0, 4);
2302 }
2303 
2304 /* move T0 to seg_reg and compute if the CPU state may change. Never
2305    call this function with seg_reg == R_CS */
2306 static void gen_movl_seg_T0(DisasContext *s, X86Seg seg_reg)
2307 {
2308     if (s->pe && !s->vm86) {
2309         tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
2310         gen_helper_load_seg(cpu_env, tcg_const_i32(seg_reg), s->tmp2_i32);
2311         /* abort translation because the addseg value may change or
2312            because ss32 may change. For R_SS, translation must always
2313            stop as a special handling must be done to disable hardware
2314            interrupts for the next instruction */
2315         if (seg_reg == R_SS || (s->code32 && seg_reg < R_FS)) {
2316             s->base.is_jmp = DISAS_TOO_MANY;
2317         }
2318     } else {
2319         gen_op_movl_seg_T0_vm(s, seg_reg);
2320         if (seg_reg == R_SS) {
2321             s->base.is_jmp = DISAS_TOO_MANY;
2322         }
2323     }
2324 }
2325 
2326 static inline int svm_is_rep(int prefixes)
2327 {
2328     return ((prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) ? 8 : 0);
2329 }
2330 
2331 static inline void
2332 gen_svm_check_intercept_param(DisasContext *s, target_ulong pc_start,
2333                               uint32_t type, uint64_t param)
2334 {
2335     /* no SVM activated; fast case */
2336     if (likely(!(s->flags & HF_GUEST_MASK)))
2337         return;
2338     gen_update_cc_op(s);
2339     gen_jmp_im(s, pc_start - s->cs_base);
2340     gen_helper_svm_check_intercept_param(cpu_env, tcg_const_i32(type),
2341                                          tcg_const_i64(param));
2342 }
2343 
2344 static inline void
2345 gen_svm_check_intercept(DisasContext *s, target_ulong pc_start, uint64_t type)
2346 {
2347     gen_svm_check_intercept_param(s, pc_start, type, 0);
2348 }
2349 
2350 static inline void gen_stack_update(DisasContext *s, int addend)
2351 {
2352     gen_op_add_reg_im(s, mo_stacksize(s), R_ESP, addend);
2353 }
2354 
2355 /* Generate a push. It depends on ss32, addseg and dflag.  */
2356 static void gen_push_v(DisasContext *s, TCGv val)
2357 {
2358     MemOp d_ot = mo_pushpop(s, s->dflag);
2359     MemOp a_ot = mo_stacksize(s);
2360     int size = 1 << d_ot;
2361     TCGv new_esp = s->A0;
2362 
2363     tcg_gen_subi_tl(s->A0, cpu_regs[R_ESP], size);
2364 
2365     if (!CODE64(s)) {
2366         if (s->addseg) {
2367             new_esp = s->tmp4;
2368             tcg_gen_mov_tl(new_esp, s->A0);
2369         }
2370         gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2371     }
2372 
2373     gen_op_st_v(s, d_ot, val, s->A0);
2374     gen_op_mov_reg_v(s, a_ot, R_ESP, new_esp);
2375 }
2376 
2377 /* two step pop is necessary for precise exceptions */
2378 static MemOp gen_pop_T0(DisasContext *s)
2379 {
2380     MemOp d_ot = mo_pushpop(s, s->dflag);
2381 
2382     gen_lea_v_seg(s, mo_stacksize(s), cpu_regs[R_ESP], R_SS, -1);
2383     gen_op_ld_v(s, d_ot, s->T0, s->A0);
2384 
2385     return d_ot;
2386 }
2387 
2388 static inline void gen_pop_update(DisasContext *s, MemOp ot)
2389 {
2390     gen_stack_update(s, 1 << ot);
2391 }
2392 
2393 static inline void gen_stack_A0(DisasContext *s)
2394 {
2395     gen_lea_v_seg(s, s->ss32 ? MO_32 : MO_16, cpu_regs[R_ESP], R_SS, -1);
2396 }
2397 
2398 static void gen_pusha(DisasContext *s)
2399 {
2400     MemOp s_ot = s->ss32 ? MO_32 : MO_16;
2401     MemOp d_ot = s->dflag;
2402     int size = 1 << d_ot;
2403     int i;
2404 
2405     for (i = 0; i < 8; i++) {
2406         tcg_gen_addi_tl(s->A0, cpu_regs[R_ESP], (i - 8) * size);
2407         gen_lea_v_seg(s, s_ot, s->A0, R_SS, -1);
2408         gen_op_st_v(s, d_ot, cpu_regs[7 - i], s->A0);
2409     }
2410 
2411     gen_stack_update(s, -8 * size);
2412 }
2413 
2414 static void gen_popa(DisasContext *s)
2415 {
2416     MemOp s_ot = s->ss32 ? MO_32 : MO_16;
2417     MemOp d_ot = s->dflag;
2418     int size = 1 << d_ot;
2419     int i;
2420 
2421     for (i = 0; i < 8; i++) {
2422         /* ESP is not reloaded */
2423         if (7 - i == R_ESP) {
2424             continue;
2425         }
2426         tcg_gen_addi_tl(s->A0, cpu_regs[R_ESP], i * size);
2427         gen_lea_v_seg(s, s_ot, s->A0, R_SS, -1);
2428         gen_op_ld_v(s, d_ot, s->T0, s->A0);
2429         gen_op_mov_reg_v(s, d_ot, 7 - i, s->T0);
2430     }
2431 
2432     gen_stack_update(s, 8 * size);
2433 }
2434 
2435 static void gen_enter(DisasContext *s, int esp_addend, int level)
2436 {
2437     MemOp d_ot = mo_pushpop(s, s->dflag);
2438     MemOp a_ot = CODE64(s) ? MO_64 : s->ss32 ? MO_32 : MO_16;
2439     int size = 1 << d_ot;
2440 
2441     /* Push BP; compute FrameTemp into T1.  */
2442     tcg_gen_subi_tl(s->T1, cpu_regs[R_ESP], size);
2443     gen_lea_v_seg(s, a_ot, s->T1, R_SS, -1);
2444     gen_op_st_v(s, d_ot, cpu_regs[R_EBP], s->A0);
2445 
2446     level &= 31;
2447     if (level != 0) {
2448         int i;
2449 
2450         /* Copy level-1 pointers from the previous frame.  */
2451         for (i = 1; i < level; ++i) {
2452             tcg_gen_subi_tl(s->A0, cpu_regs[R_EBP], size * i);
2453             gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2454             gen_op_ld_v(s, d_ot, s->tmp0, s->A0);
2455 
2456             tcg_gen_subi_tl(s->A0, s->T1, size * i);
2457             gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2458             gen_op_st_v(s, d_ot, s->tmp0, s->A0);
2459         }
2460 
2461         /* Push the current FrameTemp as the last level.  */
2462         tcg_gen_subi_tl(s->A0, s->T1, size * level);
2463         gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2464         gen_op_st_v(s, d_ot, s->T1, s->A0);
2465     }
2466 
2467     /* Copy the FrameTemp value to EBP.  */
2468     gen_op_mov_reg_v(s, a_ot, R_EBP, s->T1);
2469 
2470     /* Compute the final value of ESP.  */
2471     tcg_gen_subi_tl(s->T1, s->T1, esp_addend + size * level);
2472     gen_op_mov_reg_v(s, a_ot, R_ESP, s->T1);
2473 }
2474 
2475 static void gen_leave(DisasContext *s)
2476 {
2477     MemOp d_ot = mo_pushpop(s, s->dflag);
2478     MemOp a_ot = mo_stacksize(s);
2479 
2480     gen_lea_v_seg(s, a_ot, cpu_regs[R_EBP], R_SS, -1);
2481     gen_op_ld_v(s, d_ot, s->T0, s->A0);
2482 
2483     tcg_gen_addi_tl(s->T1, cpu_regs[R_EBP], 1 << d_ot);
2484 
2485     gen_op_mov_reg_v(s, d_ot, R_EBP, s->T0);
2486     gen_op_mov_reg_v(s, a_ot, R_ESP, s->T1);
2487 }
2488 
2489 /* Similarly, except that the assumption here is that we don't decode
2490    the instruction at all -- either a missing opcode, an unimplemented
2491    feature, or just a bogus instruction stream.  */
2492 static void gen_unknown_opcode(CPUX86State *env, DisasContext *s)
2493 {
2494     gen_illegal_opcode(s);
2495 
2496     if (qemu_loglevel_mask(LOG_UNIMP)) {
2497         FILE *logfile = qemu_log_lock();
2498         target_ulong pc = s->pc_start, end = s->pc;
2499 
2500         qemu_log("ILLOPC: " TARGET_FMT_lx ":", pc);
2501         for (; pc < end; ++pc) {
2502             qemu_log(" %02x", cpu_ldub_code(env, pc));
2503         }
2504         qemu_log("\n");
2505         qemu_log_unlock(logfile);
2506     }
2507 }
2508 
2509 /* an interrupt is different from an exception because of the
2510    privilege checks */
2511 static void gen_interrupt(DisasContext *s, int intno,
2512                           target_ulong cur_eip, target_ulong next_eip)
2513 {
2514     gen_update_cc_op(s);
2515     gen_jmp_im(s, cur_eip);
2516     gen_helper_raise_interrupt(cpu_env, tcg_const_i32(intno),
2517                                tcg_const_i32(next_eip - cur_eip));
2518     s->base.is_jmp = DISAS_NORETURN;
2519 }
2520 
2521 static void gen_debug(DisasContext *s, target_ulong cur_eip)
2522 {
2523     gen_update_cc_op(s);
2524     gen_jmp_im(s, cur_eip);
2525     gen_helper_debug(cpu_env);
2526     s->base.is_jmp = DISAS_NORETURN;
2527 }
2528 
2529 static void gen_set_hflag(DisasContext *s, uint32_t mask)
2530 {
2531     if ((s->flags & mask) == 0) {
2532         TCGv_i32 t = tcg_temp_new_i32();
2533         tcg_gen_ld_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2534         tcg_gen_ori_i32(t, t, mask);
2535         tcg_gen_st_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2536         tcg_temp_free_i32(t);
2537         s->flags |= mask;
2538     }
2539 }
2540 
2541 static void gen_reset_hflag(DisasContext *s, uint32_t mask)
2542 {
2543     if (s->flags & mask) {
2544         TCGv_i32 t = tcg_temp_new_i32();
2545         tcg_gen_ld_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2546         tcg_gen_andi_i32(t, t, ~mask);
2547         tcg_gen_st_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2548         tcg_temp_free_i32(t);
2549         s->flags &= ~mask;
2550     }
2551 }
2552 
2553 /* Clear BND registers during legacy branches.  */
2554 static void gen_bnd_jmp(DisasContext *s)
2555 {
2556     /* Clear the registers only if BND prefix is missing, MPX is enabled,
2557        and if the BNDREGs are known to be in use (non-zero) already.
2558        The helper itself will check BNDPRESERVE at runtime.  */
2559     if ((s->prefix & PREFIX_REPNZ) == 0
2560         && (s->flags & HF_MPX_EN_MASK) != 0
2561         && (s->flags & HF_MPX_IU_MASK) != 0) {
2562         gen_helper_bnd_jmp(cpu_env);
2563     }
2564 }
2565 
2566 /* Generate an end of block. Trace exception is also generated if needed.
2567    If INHIBIT, set HF_INHIBIT_IRQ_MASK if it isn't already set.
2568    If RECHECK_TF, emit a rechecking helper for #DB, ignoring the state of
2569    S->TF.  This is used by the syscall/sysret insns.  */
2570 static void
2571 do_gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf, bool jr)
2572 {
2573     gen_update_cc_op(s);
2574 
2575     /* If several instructions disable interrupts, only the first does it.  */
2576     if (inhibit && !(s->flags & HF_INHIBIT_IRQ_MASK)) {
2577         gen_set_hflag(s, HF_INHIBIT_IRQ_MASK);
2578     } else {
2579         gen_reset_hflag(s, HF_INHIBIT_IRQ_MASK);
2580     }
2581 
2582     if (s->base.tb->flags & HF_RF_MASK) {
2583         gen_helper_reset_rf(cpu_env);
2584     }
2585     if (s->base.singlestep_enabled) {
2586         gen_helper_debug(cpu_env);
2587     } else if (recheck_tf) {
2588         gen_helper_rechecking_single_step(cpu_env);
2589         tcg_gen_exit_tb(NULL, 0);
2590     } else if (s->tf) {
2591         gen_helper_single_step(cpu_env);
2592     } else if (jr) {
2593         tcg_gen_lookup_and_goto_ptr();
2594     } else {
2595         tcg_gen_exit_tb(NULL, 0);
2596     }
2597     s->base.is_jmp = DISAS_NORETURN;
2598 }
2599 
2600 static inline void
2601 gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf)
2602 {
2603     do_gen_eob_worker(s, inhibit, recheck_tf, false);
2604 }
2605 
2606 /* End of block.
2607    If INHIBIT, set HF_INHIBIT_IRQ_MASK if it isn't already set.  */
2608 static void gen_eob_inhibit_irq(DisasContext *s, bool inhibit)
2609 {
2610     gen_eob_worker(s, inhibit, false);
2611 }
2612 
2613 /* End of block, resetting the inhibit irq flag.  */
2614 static void gen_eob(DisasContext *s)
2615 {
2616     gen_eob_worker(s, false, false);
2617 }
2618 
2619 /* Jump to register */
2620 static void gen_jr(DisasContext *s, TCGv dest)
2621 {
2622     do_gen_eob_worker(s, false, false, true);
2623 }
2624 
2625 /* generate a jump to eip. No segment change must happen before as a
2626    direct call to the next block may occur */
2627 static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num)
2628 {
2629     gen_update_cc_op(s);
2630     set_cc_op(s, CC_OP_DYNAMIC);
2631     if (s->jmp_opt) {
2632         gen_goto_tb(s, tb_num, eip);
2633     } else {
2634         gen_jmp_im(s, eip);
2635         gen_eob(s);
2636     }
2637 }
2638 
2639 static void gen_jmp(DisasContext *s, target_ulong eip)
2640 {
2641     gen_jmp_tb(s, eip, 0);
2642 }
2643 
2644 static inline void gen_ldq_env_A0(DisasContext *s, int offset)
2645 {
2646     tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ);
2647     tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset);
2648 }
2649 
2650 static inline void gen_stq_env_A0(DisasContext *s, int offset)
2651 {
2652     tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset);
2653     tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ);
2654 }
2655 
2656 static inline void gen_ldo_env_A0(DisasContext *s, int offset)
2657 {
2658     int mem_index = s->mem_index;
2659     tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, mem_index, MO_LEQ);
2660     tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
2661     tcg_gen_addi_tl(s->tmp0, s->A0, 8);
2662     tcg_gen_qemu_ld_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEQ);
2663     tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
2664 }
2665 
2666 static inline void gen_sto_env_A0(DisasContext *s, int offset)
2667 {
2668     int mem_index = s->mem_index;
2669     tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
2670     tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, mem_index, MO_LEQ);
2671     tcg_gen_addi_tl(s->tmp0, s->A0, 8);
2672     tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
2673     tcg_gen_qemu_st_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEQ);
2674 }
2675 
2676 static inline void gen_op_movo(DisasContext *s, int d_offset, int s_offset)
2677 {
2678     tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(0)));
2679     tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(0)));
2680     tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(1)));
2681     tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(1)));
2682 }
2683 
2684 static inline void gen_op_movq(DisasContext *s, int d_offset, int s_offset)
2685 {
2686     tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset);
2687     tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset);
2688 }
2689 
2690 static inline void gen_op_movl(DisasContext *s, int d_offset, int s_offset)
2691 {
2692     tcg_gen_ld_i32(s->tmp2_i32, cpu_env, s_offset);
2693     tcg_gen_st_i32(s->tmp2_i32, cpu_env, d_offset);
2694 }
2695 
2696 static inline void gen_op_movq_env_0(DisasContext *s, int d_offset)
2697 {
2698     tcg_gen_movi_i64(s->tmp1_i64, 0);
2699     tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset);
2700 }
2701 
2702 typedef void (*SSEFunc_i_ep)(TCGv_i32 val, TCGv_ptr env, TCGv_ptr reg);
2703 typedef void (*SSEFunc_l_ep)(TCGv_i64 val, TCGv_ptr env, TCGv_ptr reg);
2704 typedef void (*SSEFunc_0_epi)(TCGv_ptr env, TCGv_ptr reg, TCGv_i32 val);
2705 typedef void (*SSEFunc_0_epl)(TCGv_ptr env, TCGv_ptr reg, TCGv_i64 val);
2706 typedef void (*SSEFunc_0_epp)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b);
2707 typedef void (*SSEFunc_0_eppi)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b,
2708                                TCGv_i32 val);
2709 typedef void (*SSEFunc_0_ppi)(TCGv_ptr reg_a, TCGv_ptr reg_b, TCGv_i32 val);
2710 typedef void (*SSEFunc_0_eppt)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b,
2711                                TCGv val);
2712 
2713 #define SSE_SPECIAL ((void *)1)
2714 #define SSE_DUMMY ((void *)2)
2715 
2716 #define MMX_OP2(x) { gen_helper_ ## x ## _mmx, gen_helper_ ## x ## _xmm }
2717 #define SSE_FOP(x) { gen_helper_ ## x ## ps, gen_helper_ ## x ## pd, \
2718                      gen_helper_ ## x ## ss, gen_helper_ ## x ## sd, }
2719 
2720 static const SSEFunc_0_epp sse_op_table1[256][4] = {
2721     /* 3DNow! extensions */
2722     [0x0e] = { SSE_DUMMY }, /* femms */
2723     [0x0f] = { SSE_DUMMY }, /* pf... */
2724     /* pure SSE operations */
2725     [0x10] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
2726     [0x11] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
2727     [0x12] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movlps, movlpd, movsldup, movddup */
2728     [0x13] = { SSE_SPECIAL, SSE_SPECIAL },  /* movlps, movlpd */
2729     [0x14] = { gen_helper_punpckldq_xmm, gen_helper_punpcklqdq_xmm },
2730     [0x15] = { gen_helper_punpckhdq_xmm, gen_helper_punpckhqdq_xmm },
2731     [0x16] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },  /* movhps, movhpd, movshdup */
2732     [0x17] = { SSE_SPECIAL, SSE_SPECIAL },  /* movhps, movhpd */
2733 
2734     [0x28] = { SSE_SPECIAL, SSE_SPECIAL },  /* movaps, movapd */
2735     [0x29] = { SSE_SPECIAL, SSE_SPECIAL },  /* movaps, movapd */
2736     [0x2a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtpi2ps, cvtpi2pd, cvtsi2ss, cvtsi2sd */
2737     [0x2b] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movntps, movntpd, movntss, movntsd */
2738     [0x2c] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvttps2pi, cvttpd2pi, cvttsd2si, cvttss2si */
2739     [0x2d] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtps2pi, cvtpd2pi, cvtsd2si, cvtss2si */
2740     [0x2e] = { gen_helper_ucomiss, gen_helper_ucomisd },
2741     [0x2f] = { gen_helper_comiss, gen_helper_comisd },
2742     [0x50] = { SSE_SPECIAL, SSE_SPECIAL }, /* movmskps, movmskpd */
2743     [0x51] = SSE_FOP(sqrt),
2744     [0x52] = { gen_helper_rsqrtps, NULL, gen_helper_rsqrtss, NULL },
2745     [0x53] = { gen_helper_rcpps, NULL, gen_helper_rcpss, NULL },
2746     [0x54] = { gen_helper_pand_xmm, gen_helper_pand_xmm }, /* andps, andpd */
2747     [0x55] = { gen_helper_pandn_xmm, gen_helper_pandn_xmm }, /* andnps, andnpd */
2748     [0x56] = { gen_helper_por_xmm, gen_helper_por_xmm }, /* orps, orpd */
2749     [0x57] = { gen_helper_pxor_xmm, gen_helper_pxor_xmm }, /* xorps, xorpd */
2750     [0x58] = SSE_FOP(add),
2751     [0x59] = SSE_FOP(mul),
2752     [0x5a] = { gen_helper_cvtps2pd, gen_helper_cvtpd2ps,
2753                gen_helper_cvtss2sd, gen_helper_cvtsd2ss },
2754     [0x5b] = { gen_helper_cvtdq2ps, gen_helper_cvtps2dq, gen_helper_cvttps2dq },
2755     [0x5c] = SSE_FOP(sub),
2756     [0x5d] = SSE_FOP(min),
2757     [0x5e] = SSE_FOP(div),
2758     [0x5f] = SSE_FOP(max),
2759 
2760     [0xc2] = SSE_FOP(cmpeq),
2761     [0xc6] = { (SSEFunc_0_epp)gen_helper_shufps,
2762                (SSEFunc_0_epp)gen_helper_shufpd }, /* XXX: casts */
2763 
2764     /* SSSE3, SSE4, MOVBE, CRC32, BMI1, BMI2, ADX.  */
2765     [0x38] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2766     [0x3a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2767 
2768     /* MMX ops and their SSE extensions */
2769     [0x60] = MMX_OP2(punpcklbw),
2770     [0x61] = MMX_OP2(punpcklwd),
2771     [0x62] = MMX_OP2(punpckldq),
2772     [0x63] = MMX_OP2(packsswb),
2773     [0x64] = MMX_OP2(pcmpgtb),
2774     [0x65] = MMX_OP2(pcmpgtw),
2775     [0x66] = MMX_OP2(pcmpgtl),
2776     [0x67] = MMX_OP2(packuswb),
2777     [0x68] = MMX_OP2(punpckhbw),
2778     [0x69] = MMX_OP2(punpckhwd),
2779     [0x6a] = MMX_OP2(punpckhdq),
2780     [0x6b] = MMX_OP2(packssdw),
2781     [0x6c] = { NULL, gen_helper_punpcklqdq_xmm },
2782     [0x6d] = { NULL, gen_helper_punpckhqdq_xmm },
2783     [0x6e] = { SSE_SPECIAL, SSE_SPECIAL }, /* movd mm, ea */
2784     [0x6f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, , movqdu */
2785     [0x70] = { (SSEFunc_0_epp)gen_helper_pshufw_mmx,
2786                (SSEFunc_0_epp)gen_helper_pshufd_xmm,
2787                (SSEFunc_0_epp)gen_helper_pshufhw_xmm,
2788                (SSEFunc_0_epp)gen_helper_pshuflw_xmm }, /* XXX: casts */
2789     [0x71] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftw */
2790     [0x72] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftd */
2791     [0x73] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftq */
2792     [0x74] = MMX_OP2(pcmpeqb),
2793     [0x75] = MMX_OP2(pcmpeqw),
2794     [0x76] = MMX_OP2(pcmpeql),
2795     [0x77] = { SSE_DUMMY }, /* emms */
2796     [0x78] = { NULL, SSE_SPECIAL, NULL, SSE_SPECIAL }, /* extrq_i, insertq_i */
2797     [0x79] = { NULL, gen_helper_extrq_r, NULL, gen_helper_insertq_r },
2798     [0x7c] = { NULL, gen_helper_haddpd, NULL, gen_helper_haddps },
2799     [0x7d] = { NULL, gen_helper_hsubpd, NULL, gen_helper_hsubps },
2800     [0x7e] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movd, movd, , movq */
2801     [0x7f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, movdqu */
2802     [0xc4] = { SSE_SPECIAL, SSE_SPECIAL }, /* pinsrw */
2803     [0xc5] = { SSE_SPECIAL, SSE_SPECIAL }, /* pextrw */
2804     [0xd0] = { NULL, gen_helper_addsubpd, NULL, gen_helper_addsubps },
2805     [0xd1] = MMX_OP2(psrlw),
2806     [0xd2] = MMX_OP2(psrld),
2807     [0xd3] = MMX_OP2(psrlq),
2808     [0xd4] = MMX_OP2(paddq),
2809     [0xd5] = MMX_OP2(pmullw),
2810     [0xd6] = { NULL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2811     [0xd7] = { SSE_SPECIAL, SSE_SPECIAL }, /* pmovmskb */
2812     [0xd8] = MMX_OP2(psubusb),
2813     [0xd9] = MMX_OP2(psubusw),
2814     [0xda] = MMX_OP2(pminub),
2815     [0xdb] = MMX_OP2(pand),
2816     [0xdc] = MMX_OP2(paddusb),
2817     [0xdd] = MMX_OP2(paddusw),
2818     [0xde] = MMX_OP2(pmaxub),
2819     [0xdf] = MMX_OP2(pandn),
2820     [0xe0] = MMX_OP2(pavgb),
2821     [0xe1] = MMX_OP2(psraw),
2822     [0xe2] = MMX_OP2(psrad),
2823     [0xe3] = MMX_OP2(pavgw),
2824     [0xe4] = MMX_OP2(pmulhuw),
2825     [0xe5] = MMX_OP2(pmulhw),
2826     [0xe6] = { NULL, gen_helper_cvttpd2dq, gen_helper_cvtdq2pd, gen_helper_cvtpd2dq },
2827     [0xe7] = { SSE_SPECIAL , SSE_SPECIAL },  /* movntq, movntq */
2828     [0xe8] = MMX_OP2(psubsb),
2829     [0xe9] = MMX_OP2(psubsw),
2830     [0xea] = MMX_OP2(pminsw),
2831     [0xeb] = MMX_OP2(por),
2832     [0xec] = MMX_OP2(paddsb),
2833     [0xed] = MMX_OP2(paddsw),
2834     [0xee] = MMX_OP2(pmaxsw),
2835     [0xef] = MMX_OP2(pxor),
2836     [0xf0] = { NULL, NULL, NULL, SSE_SPECIAL }, /* lddqu */
2837     [0xf1] = MMX_OP2(psllw),
2838     [0xf2] = MMX_OP2(pslld),
2839     [0xf3] = MMX_OP2(psllq),
2840     [0xf4] = MMX_OP2(pmuludq),
2841     [0xf5] = MMX_OP2(pmaddwd),
2842     [0xf6] = MMX_OP2(psadbw),
2843     [0xf7] = { (SSEFunc_0_epp)gen_helper_maskmov_mmx,
2844                (SSEFunc_0_epp)gen_helper_maskmov_xmm }, /* XXX: casts */
2845     [0xf8] = MMX_OP2(psubb),
2846     [0xf9] = MMX_OP2(psubw),
2847     [0xfa] = MMX_OP2(psubl),
2848     [0xfb] = MMX_OP2(psubq),
2849     [0xfc] = MMX_OP2(paddb),
2850     [0xfd] = MMX_OP2(paddw),
2851     [0xfe] = MMX_OP2(paddl),
2852 };
2853 
2854 static const SSEFunc_0_epp sse_op_table2[3 * 8][2] = {
2855     [0 + 2] = MMX_OP2(psrlw),
2856     [0 + 4] = MMX_OP2(psraw),
2857     [0 + 6] = MMX_OP2(psllw),
2858     [8 + 2] = MMX_OP2(psrld),
2859     [8 + 4] = MMX_OP2(psrad),
2860     [8 + 6] = MMX_OP2(pslld),
2861     [16 + 2] = MMX_OP2(psrlq),
2862     [16 + 3] = { NULL, gen_helper_psrldq_xmm },
2863     [16 + 6] = MMX_OP2(psllq),
2864     [16 + 7] = { NULL, gen_helper_pslldq_xmm },
2865 };
2866 
2867 static const SSEFunc_0_epi sse_op_table3ai[] = {
2868     gen_helper_cvtsi2ss,
2869     gen_helper_cvtsi2sd
2870 };
2871 
2872 #ifdef TARGET_X86_64
2873 static const SSEFunc_0_epl sse_op_table3aq[] = {
2874     gen_helper_cvtsq2ss,
2875     gen_helper_cvtsq2sd
2876 };
2877 #endif
2878 
2879 static const SSEFunc_i_ep sse_op_table3bi[] = {
2880     gen_helper_cvttss2si,
2881     gen_helper_cvtss2si,
2882     gen_helper_cvttsd2si,
2883     gen_helper_cvtsd2si
2884 };
2885 
2886 #ifdef TARGET_X86_64
2887 static const SSEFunc_l_ep sse_op_table3bq[] = {
2888     gen_helper_cvttss2sq,
2889     gen_helper_cvtss2sq,
2890     gen_helper_cvttsd2sq,
2891     gen_helper_cvtsd2sq
2892 };
2893 #endif
2894 
2895 static const SSEFunc_0_epp sse_op_table4[8][4] = {
2896     SSE_FOP(cmpeq),
2897     SSE_FOP(cmplt),
2898     SSE_FOP(cmple),
2899     SSE_FOP(cmpunord),
2900     SSE_FOP(cmpneq),
2901     SSE_FOP(cmpnlt),
2902     SSE_FOP(cmpnle),
2903     SSE_FOP(cmpord),
2904 };
2905 
2906 static const SSEFunc_0_epp sse_op_table5[256] = {
2907     [0x0c] = gen_helper_pi2fw,
2908     [0x0d] = gen_helper_pi2fd,
2909     [0x1c] = gen_helper_pf2iw,
2910     [0x1d] = gen_helper_pf2id,
2911     [0x8a] = gen_helper_pfnacc,
2912     [0x8e] = gen_helper_pfpnacc,
2913     [0x90] = gen_helper_pfcmpge,
2914     [0x94] = gen_helper_pfmin,
2915     [0x96] = gen_helper_pfrcp,
2916     [0x97] = gen_helper_pfrsqrt,
2917     [0x9a] = gen_helper_pfsub,
2918     [0x9e] = gen_helper_pfadd,
2919     [0xa0] = gen_helper_pfcmpgt,
2920     [0xa4] = gen_helper_pfmax,
2921     [0xa6] = gen_helper_movq, /* pfrcpit1; no need to actually increase precision */
2922     [0xa7] = gen_helper_movq, /* pfrsqit1 */
2923     [0xaa] = gen_helper_pfsubr,
2924     [0xae] = gen_helper_pfacc,
2925     [0xb0] = gen_helper_pfcmpeq,
2926     [0xb4] = gen_helper_pfmul,
2927     [0xb6] = gen_helper_movq, /* pfrcpit2 */
2928     [0xb7] = gen_helper_pmulhrw_mmx,
2929     [0xbb] = gen_helper_pswapd,
2930     [0xbf] = gen_helper_pavgb_mmx /* pavgusb */
2931 };
2932 
2933 struct SSEOpHelper_epp {
2934     SSEFunc_0_epp op[2];
2935     uint32_t ext_mask;
2936 };
2937 
2938 struct SSEOpHelper_eppi {
2939     SSEFunc_0_eppi op[2];
2940     uint32_t ext_mask;
2941 };
2942 
2943 #define SSSE3_OP(x) { MMX_OP2(x), CPUID_EXT_SSSE3 }
2944 #define SSE41_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE41 }
2945 #define SSE42_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE42 }
2946 #define SSE41_SPECIAL { { NULL, SSE_SPECIAL }, CPUID_EXT_SSE41 }
2947 #define PCLMULQDQ_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, \
2948         CPUID_EXT_PCLMULQDQ }
2949 #define AESNI_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_AES }
2950 
2951 static const struct SSEOpHelper_epp sse_op_table6[256] = {
2952     [0x00] = SSSE3_OP(pshufb),
2953     [0x01] = SSSE3_OP(phaddw),
2954     [0x02] = SSSE3_OP(phaddd),
2955     [0x03] = SSSE3_OP(phaddsw),
2956     [0x04] = SSSE3_OP(pmaddubsw),
2957     [0x05] = SSSE3_OP(phsubw),
2958     [0x06] = SSSE3_OP(phsubd),
2959     [0x07] = SSSE3_OP(phsubsw),
2960     [0x08] = SSSE3_OP(psignb),
2961     [0x09] = SSSE3_OP(psignw),
2962     [0x0a] = SSSE3_OP(psignd),
2963     [0x0b] = SSSE3_OP(pmulhrsw),
2964     [0x10] = SSE41_OP(pblendvb),
2965     [0x14] = SSE41_OP(blendvps),
2966     [0x15] = SSE41_OP(blendvpd),
2967     [0x17] = SSE41_OP(ptest),
2968     [0x1c] = SSSE3_OP(pabsb),
2969     [0x1d] = SSSE3_OP(pabsw),
2970     [0x1e] = SSSE3_OP(pabsd),
2971     [0x20] = SSE41_OP(pmovsxbw),
2972     [0x21] = SSE41_OP(pmovsxbd),
2973     [0x22] = SSE41_OP(pmovsxbq),
2974     [0x23] = SSE41_OP(pmovsxwd),
2975     [0x24] = SSE41_OP(pmovsxwq),
2976     [0x25] = SSE41_OP(pmovsxdq),
2977     [0x28] = SSE41_OP(pmuldq),
2978     [0x29] = SSE41_OP(pcmpeqq),
2979     [0x2a] = SSE41_SPECIAL, /* movntqda */
2980     [0x2b] = SSE41_OP(packusdw),
2981     [0x30] = SSE41_OP(pmovzxbw),
2982     [0x31] = SSE41_OP(pmovzxbd),
2983     [0x32] = SSE41_OP(pmovzxbq),
2984     [0x33] = SSE41_OP(pmovzxwd),
2985     [0x34] = SSE41_OP(pmovzxwq),
2986     [0x35] = SSE41_OP(pmovzxdq),
2987     [0x37] = SSE42_OP(pcmpgtq),
2988     [0x38] = SSE41_OP(pminsb),
2989     [0x39] = SSE41_OP(pminsd),
2990     [0x3a] = SSE41_OP(pminuw),
2991     [0x3b] = SSE41_OP(pminud),
2992     [0x3c] = SSE41_OP(pmaxsb),
2993     [0x3d] = SSE41_OP(pmaxsd),
2994     [0x3e] = SSE41_OP(pmaxuw),
2995     [0x3f] = SSE41_OP(pmaxud),
2996     [0x40] = SSE41_OP(pmulld),
2997     [0x41] = SSE41_OP(phminposuw),
2998     [0xdb] = AESNI_OP(aesimc),
2999     [0xdc] = AESNI_OP(aesenc),
3000     [0xdd] = AESNI_OP(aesenclast),
3001     [0xde] = AESNI_OP(aesdec),
3002     [0xdf] = AESNI_OP(aesdeclast),
3003 };
3004 
3005 static const struct SSEOpHelper_eppi sse_op_table7[256] = {
3006     [0x08] = SSE41_OP(roundps),
3007     [0x09] = SSE41_OP(roundpd),
3008     [0x0a] = SSE41_OP(roundss),
3009     [0x0b] = SSE41_OP(roundsd),
3010     [0x0c] = SSE41_OP(blendps),
3011     [0x0d] = SSE41_OP(blendpd),
3012     [0x0e] = SSE41_OP(pblendw),
3013     [0x0f] = SSSE3_OP(palignr),
3014     [0x14] = SSE41_SPECIAL, /* pextrb */
3015     [0x15] = SSE41_SPECIAL, /* pextrw */
3016     [0x16] = SSE41_SPECIAL, /* pextrd/pextrq */
3017     [0x17] = SSE41_SPECIAL, /* extractps */
3018     [0x20] = SSE41_SPECIAL, /* pinsrb */
3019     [0x21] = SSE41_SPECIAL, /* insertps */
3020     [0x22] = SSE41_SPECIAL, /* pinsrd/pinsrq */
3021     [0x40] = SSE41_OP(dpps),
3022     [0x41] = SSE41_OP(dppd),
3023     [0x42] = SSE41_OP(mpsadbw),
3024     [0x44] = PCLMULQDQ_OP(pclmulqdq),
3025     [0x60] = SSE42_OP(pcmpestrm),
3026     [0x61] = SSE42_OP(pcmpestri),
3027     [0x62] = SSE42_OP(pcmpistrm),
3028     [0x63] = SSE42_OP(pcmpistri),
3029     [0xdf] = AESNI_OP(aeskeygenassist),
3030 };
3031 
3032 static void gen_sse(CPUX86State *env, DisasContext *s, int b,
3033                     target_ulong pc_start, int rex_r)
3034 {
3035     int b1, op1_offset, op2_offset, is_xmm, val;
3036     int modrm, mod, rm, reg;
3037     SSEFunc_0_epp sse_fn_epp;
3038     SSEFunc_0_eppi sse_fn_eppi;
3039     SSEFunc_0_ppi sse_fn_ppi;
3040     SSEFunc_0_eppt sse_fn_eppt;
3041     MemOp ot;
3042 
3043     b &= 0xff;
3044     if (s->prefix & PREFIX_DATA)
3045         b1 = 1;
3046     else if (s->prefix & PREFIX_REPZ)
3047         b1 = 2;
3048     else if (s->prefix & PREFIX_REPNZ)
3049         b1 = 3;
3050     else
3051         b1 = 0;
3052     sse_fn_epp = sse_op_table1[b][b1];
3053     if (!sse_fn_epp) {
3054         goto unknown_op;
3055     }
3056     if ((b <= 0x5f && b >= 0x10) || b == 0xc6 || b == 0xc2) {
3057         is_xmm = 1;
3058     } else {
3059         if (b1 == 0) {
3060             /* MMX case */
3061             is_xmm = 0;
3062         } else {
3063             is_xmm = 1;
3064         }
3065     }
3066     /* simple MMX/SSE operation */
3067     if (s->flags & HF_TS_MASK) {
3068         gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
3069         return;
3070     }
3071     if (s->flags & HF_EM_MASK) {
3072     illegal_op:
3073         gen_illegal_opcode(s);
3074         return;
3075     }
3076     if (is_xmm
3077         && !(s->flags & HF_OSFXSR_MASK)
3078         && (b != 0x38 && b != 0x3a)) {
3079         goto unknown_op;
3080     }
3081     if (b == 0x0e) {
3082         if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) {
3083             /* If we were fully decoding this we might use illegal_op.  */
3084             goto unknown_op;
3085         }
3086         /* femms */
3087         gen_helper_emms(cpu_env);
3088         return;
3089     }
3090     if (b == 0x77) {
3091         /* emms */
3092         gen_helper_emms(cpu_env);
3093         return;
3094     }
3095     /* prepare MMX state (XXX: optimize by storing fptt and fptags in
3096        the static cpu state) */
3097     if (!is_xmm) {
3098         gen_helper_enter_mmx(cpu_env);
3099     }
3100 
3101     modrm = x86_ldub_code(env, s);
3102     reg = ((modrm >> 3) & 7);
3103     if (is_xmm)
3104         reg |= rex_r;
3105     mod = (modrm >> 6) & 3;
3106     if (sse_fn_epp == SSE_SPECIAL) {
3107         b |= (b1 << 8);
3108         switch(b) {
3109         case 0x0e7: /* movntq */
3110             if (mod == 3) {
3111                 goto illegal_op;
3112             }
3113             gen_lea_modrm(env, s, modrm);
3114             gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3115             break;
3116         case 0x1e7: /* movntdq */
3117         case 0x02b: /* movntps */
3118         case 0x12b: /* movntps */
3119             if (mod == 3)
3120                 goto illegal_op;
3121             gen_lea_modrm(env, s, modrm);
3122             gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3123             break;
3124         case 0x3f0: /* lddqu */
3125             if (mod == 3)
3126                 goto illegal_op;
3127             gen_lea_modrm(env, s, modrm);
3128             gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3129             break;
3130         case 0x22b: /* movntss */
3131         case 0x32b: /* movntsd */
3132             if (mod == 3)
3133                 goto illegal_op;
3134             gen_lea_modrm(env, s, modrm);
3135             if (b1 & 1) {
3136                 gen_stq_env_A0(s, offsetof(CPUX86State,
3137                                            xmm_regs[reg].ZMM_Q(0)));
3138             } else {
3139                 tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State,
3140                     xmm_regs[reg].ZMM_L(0)));
3141                 gen_op_st_v(s, MO_32, s->T0, s->A0);
3142             }
3143             break;
3144         case 0x6e: /* movd mm, ea */
3145 #ifdef TARGET_X86_64
3146             if (s->dflag == MO_64) {
3147                 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
3148                 tcg_gen_st_tl(s->T0, cpu_env,
3149                               offsetof(CPUX86State, fpregs[reg].mmx));
3150             } else
3151 #endif
3152             {
3153                 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
3154                 tcg_gen_addi_ptr(s->ptr0, cpu_env,
3155                                  offsetof(CPUX86State,fpregs[reg].mmx));
3156                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
3157                 gen_helper_movl_mm_T0_mmx(s->ptr0, s->tmp2_i32);
3158             }
3159             break;
3160         case 0x16e: /* movd xmm, ea */
3161 #ifdef TARGET_X86_64
3162             if (s->dflag == MO_64) {
3163                 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
3164                 tcg_gen_addi_ptr(s->ptr0, cpu_env,
3165                                  offsetof(CPUX86State,xmm_regs[reg]));
3166                 gen_helper_movq_mm_T0_xmm(s->ptr0, s->T0);
3167             } else
3168 #endif
3169             {
3170                 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
3171                 tcg_gen_addi_ptr(s->ptr0, cpu_env,
3172                                  offsetof(CPUX86State,xmm_regs[reg]));
3173                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
3174                 gen_helper_movl_mm_T0_xmm(s->ptr0, s->tmp2_i32);
3175             }
3176             break;
3177         case 0x6f: /* movq mm, ea */
3178             if (mod != 3) {
3179                 gen_lea_modrm(env, s, modrm);
3180                 gen_ldq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3181             } else {
3182                 rm = (modrm & 7);
3183                 tcg_gen_ld_i64(s->tmp1_i64, cpu_env,
3184                                offsetof(CPUX86State,fpregs[rm].mmx));
3185                 tcg_gen_st_i64(s->tmp1_i64, cpu_env,
3186                                offsetof(CPUX86State,fpregs[reg].mmx));
3187             }
3188             break;
3189         case 0x010: /* movups */
3190         case 0x110: /* movupd */
3191         case 0x028: /* movaps */
3192         case 0x128: /* movapd */
3193         case 0x16f: /* movdqa xmm, ea */
3194         case 0x26f: /* movdqu xmm, ea */
3195             if (mod != 3) {
3196                 gen_lea_modrm(env, s, modrm);
3197                 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3198             } else {
3199                 rm = (modrm & 7) | REX_B(s);
3200                 gen_op_movo(s, offsetof(CPUX86State, xmm_regs[reg]),
3201                             offsetof(CPUX86State,xmm_regs[rm]));
3202             }
3203             break;
3204         case 0x210: /* movss xmm, ea */
3205             if (mod != 3) {
3206                 gen_lea_modrm(env, s, modrm);
3207                 gen_op_ld_v(s, MO_32, s->T0, s->A0);
3208                 tcg_gen_st32_tl(s->T0, cpu_env,
3209                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)));
3210                 tcg_gen_movi_tl(s->T0, 0);
3211                 tcg_gen_st32_tl(s->T0, cpu_env,
3212                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1)));
3213                 tcg_gen_st32_tl(s->T0, cpu_env,
3214                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)));
3215                 tcg_gen_st32_tl(s->T0, cpu_env,
3216                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)));
3217             } else {
3218                 rm = (modrm & 7) | REX_B(s);
3219                 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)),
3220                             offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)));
3221             }
3222             break;
3223         case 0x310: /* movsd xmm, ea */
3224             if (mod != 3) {
3225                 gen_lea_modrm(env, s, modrm);
3226                 gen_ldq_env_A0(s, offsetof(CPUX86State,
3227                                            xmm_regs[reg].ZMM_Q(0)));
3228                 tcg_gen_movi_tl(s->T0, 0);
3229                 tcg_gen_st32_tl(s->T0, cpu_env,
3230                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)));
3231                 tcg_gen_st32_tl(s->T0, cpu_env,
3232                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)));
3233             } else {
3234                 rm = (modrm & 7) | REX_B(s);
3235                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3236                             offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3237             }
3238             break;
3239         case 0x012: /* movlps */
3240         case 0x112: /* movlpd */
3241             if (mod != 3) {
3242                 gen_lea_modrm(env, s, modrm);
3243                 gen_ldq_env_A0(s, offsetof(CPUX86State,
3244                                            xmm_regs[reg].ZMM_Q(0)));
3245             } else {
3246                 /* movhlps */
3247                 rm = (modrm & 7) | REX_B(s);
3248                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3249                             offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(1)));
3250             }
3251             break;
3252         case 0x212: /* movsldup */
3253             if (mod != 3) {
3254                 gen_lea_modrm(env, s, modrm);
3255                 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3256             } else {
3257                 rm = (modrm & 7) | REX_B(s);
3258                 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)),
3259                             offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)));
3260                 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)),
3261                             offsetof(CPUX86State,xmm_regs[rm].ZMM_L(2)));
3262             }
3263             gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1)),
3264                         offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3265             gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)),
3266                         offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)));
3267             break;
3268         case 0x312: /* movddup */
3269             if (mod != 3) {
3270                 gen_lea_modrm(env, s, modrm);
3271                 gen_ldq_env_A0(s, offsetof(CPUX86State,
3272                                            xmm_regs[reg].ZMM_Q(0)));
3273             } else {
3274                 rm = (modrm & 7) | REX_B(s);
3275                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3276                             offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3277             }
3278             gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)),
3279                         offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3280             break;
3281         case 0x016: /* movhps */
3282         case 0x116: /* movhpd */
3283             if (mod != 3) {
3284                 gen_lea_modrm(env, s, modrm);
3285                 gen_ldq_env_A0(s, offsetof(CPUX86State,
3286                                            xmm_regs[reg].ZMM_Q(1)));
3287             } else {
3288                 /* movlhps */
3289                 rm = (modrm & 7) | REX_B(s);
3290                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)),
3291                             offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3292             }
3293             break;
3294         case 0x216: /* movshdup */
3295             if (mod != 3) {
3296                 gen_lea_modrm(env, s, modrm);
3297                 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3298             } else {
3299                 rm = (modrm & 7) | REX_B(s);
3300                 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1)),
3301                             offsetof(CPUX86State,xmm_regs[rm].ZMM_L(1)));
3302                 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)),
3303                             offsetof(CPUX86State,xmm_regs[rm].ZMM_L(3)));
3304             }
3305             gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)),
3306                         offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)));
3307             gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)),
3308                         offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)));
3309             break;
3310         case 0x178:
3311         case 0x378:
3312             {
3313                 int bit_index, field_length;
3314 
3315                 if (b1 == 1 && reg != 0)
3316                     goto illegal_op;
3317                 field_length = x86_ldub_code(env, s) & 0x3F;
3318                 bit_index = x86_ldub_code(env, s) & 0x3F;
3319                 tcg_gen_addi_ptr(s->ptr0, cpu_env,
3320                     offsetof(CPUX86State,xmm_regs[reg]));
3321                 if (b1 == 1)
3322                     gen_helper_extrq_i(cpu_env, s->ptr0,
3323                                        tcg_const_i32(bit_index),
3324                                        tcg_const_i32(field_length));
3325                 else
3326                     gen_helper_insertq_i(cpu_env, s->ptr0,
3327                                          tcg_const_i32(bit_index),
3328                                          tcg_const_i32(field_length));
3329             }
3330             break;
3331         case 0x7e: /* movd ea, mm */
3332 #ifdef TARGET_X86_64
3333             if (s->dflag == MO_64) {
3334                 tcg_gen_ld_i64(s->T0, cpu_env,
3335                                offsetof(CPUX86State,fpregs[reg].mmx));
3336                 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
3337             } else
3338 #endif
3339             {
3340                 tcg_gen_ld32u_tl(s->T0, cpu_env,
3341                                  offsetof(CPUX86State,fpregs[reg].mmx.MMX_L(0)));
3342                 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
3343             }
3344             break;
3345         case 0x17e: /* movd ea, xmm */
3346 #ifdef TARGET_X86_64
3347             if (s->dflag == MO_64) {
3348                 tcg_gen_ld_i64(s->T0, cpu_env,
3349                                offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3350                 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
3351             } else
3352 #endif
3353             {
3354                 tcg_gen_ld32u_tl(s->T0, cpu_env,
3355                                  offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3356                 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
3357             }
3358             break;
3359         case 0x27e: /* movq xmm, ea */
3360             if (mod != 3) {
3361                 gen_lea_modrm(env, s, modrm);
3362                 gen_ldq_env_A0(s, offsetof(CPUX86State,
3363                                            xmm_regs[reg].ZMM_Q(0)));
3364             } else {
3365                 rm = (modrm & 7) | REX_B(s);
3366                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3367                             offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3368             }
3369             gen_op_movq_env_0(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)));
3370             break;
3371         case 0x7f: /* movq ea, mm */
3372             if (mod != 3) {
3373                 gen_lea_modrm(env, s, modrm);
3374                 gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3375             } else {
3376                 rm = (modrm & 7);
3377                 gen_op_movq(s, offsetof(CPUX86State, fpregs[rm].mmx),
3378                             offsetof(CPUX86State,fpregs[reg].mmx));
3379             }
3380             break;
3381         case 0x011: /* movups */
3382         case 0x111: /* movupd */
3383         case 0x029: /* movaps */
3384         case 0x129: /* movapd */
3385         case 0x17f: /* movdqa ea, xmm */
3386         case 0x27f: /* movdqu ea, xmm */
3387             if (mod != 3) {
3388                 gen_lea_modrm(env, s, modrm);
3389                 gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3390             } else {
3391                 rm = (modrm & 7) | REX_B(s);
3392                 gen_op_movo(s, offsetof(CPUX86State, xmm_regs[rm]),
3393                             offsetof(CPUX86State,xmm_regs[reg]));
3394             }
3395             break;
3396         case 0x211: /* movss ea, xmm */
3397             if (mod != 3) {
3398                 gen_lea_modrm(env, s, modrm);
3399                 tcg_gen_ld32u_tl(s->T0, cpu_env,
3400                                  offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)));
3401                 gen_op_st_v(s, MO_32, s->T0, s->A0);
3402             } else {
3403                 rm = (modrm & 7) | REX_B(s);
3404                 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[rm].ZMM_L(0)),
3405                             offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3406             }
3407             break;
3408         case 0x311: /* movsd ea, xmm */
3409             if (mod != 3) {
3410                 gen_lea_modrm(env, s, modrm);
3411                 gen_stq_env_A0(s, offsetof(CPUX86State,
3412                                            xmm_regs[reg].ZMM_Q(0)));
3413             } else {
3414                 rm = (modrm & 7) | REX_B(s);
3415                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[rm].ZMM_Q(0)),
3416                             offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3417             }
3418             break;
3419         case 0x013: /* movlps */
3420         case 0x113: /* movlpd */
3421             if (mod != 3) {
3422                 gen_lea_modrm(env, s, modrm);
3423                 gen_stq_env_A0(s, offsetof(CPUX86State,
3424                                            xmm_regs[reg].ZMM_Q(0)));
3425             } else {
3426                 goto illegal_op;
3427             }
3428             break;
3429         case 0x017: /* movhps */
3430         case 0x117: /* movhpd */
3431             if (mod != 3) {
3432                 gen_lea_modrm(env, s, modrm);
3433                 gen_stq_env_A0(s, offsetof(CPUX86State,
3434                                            xmm_regs[reg].ZMM_Q(1)));
3435             } else {
3436                 goto illegal_op;
3437             }
3438             break;
3439         case 0x71: /* shift mm, im */
3440         case 0x72:
3441         case 0x73:
3442         case 0x171: /* shift xmm, im */
3443         case 0x172:
3444         case 0x173:
3445             if (b1 >= 2) {
3446                 goto unknown_op;
3447             }
3448             val = x86_ldub_code(env, s);
3449             if (is_xmm) {
3450                 tcg_gen_movi_tl(s->T0, val);
3451                 tcg_gen_st32_tl(s->T0, cpu_env,
3452                                 offsetof(CPUX86State, xmm_t0.ZMM_L(0)));
3453                 tcg_gen_movi_tl(s->T0, 0);
3454                 tcg_gen_st32_tl(s->T0, cpu_env,
3455                                 offsetof(CPUX86State, xmm_t0.ZMM_L(1)));
3456                 op1_offset = offsetof(CPUX86State,xmm_t0);
3457             } else {
3458                 tcg_gen_movi_tl(s->T0, val);
3459                 tcg_gen_st32_tl(s->T0, cpu_env,
3460                                 offsetof(CPUX86State, mmx_t0.MMX_L(0)));
3461                 tcg_gen_movi_tl(s->T0, 0);
3462                 tcg_gen_st32_tl(s->T0, cpu_env,
3463                                 offsetof(CPUX86State, mmx_t0.MMX_L(1)));
3464                 op1_offset = offsetof(CPUX86State,mmx_t0);
3465             }
3466             sse_fn_epp = sse_op_table2[((b - 1) & 3) * 8 +
3467                                        (((modrm >> 3)) & 7)][b1];
3468             if (!sse_fn_epp) {
3469                 goto unknown_op;
3470             }
3471             if (is_xmm) {
3472                 rm = (modrm & 7) | REX_B(s);
3473                 op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3474             } else {
3475                 rm = (modrm & 7);
3476                 op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3477             }
3478             tcg_gen_addi_ptr(s->ptr0, cpu_env, op2_offset);
3479             tcg_gen_addi_ptr(s->ptr1, cpu_env, op1_offset);
3480             sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
3481             break;
3482         case 0x050: /* movmskps */
3483             rm = (modrm & 7) | REX_B(s);
3484             tcg_gen_addi_ptr(s->ptr0, cpu_env,
3485                              offsetof(CPUX86State,xmm_regs[rm]));
3486             gen_helper_movmskps(s->tmp2_i32, cpu_env, s->ptr0);
3487             tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
3488             break;
3489         case 0x150: /* movmskpd */
3490             rm = (modrm & 7) | REX_B(s);
3491             tcg_gen_addi_ptr(s->ptr0, cpu_env,
3492                              offsetof(CPUX86State,xmm_regs[rm]));
3493             gen_helper_movmskpd(s->tmp2_i32, cpu_env, s->ptr0);
3494             tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
3495             break;
3496         case 0x02a: /* cvtpi2ps */
3497         case 0x12a: /* cvtpi2pd */
3498             gen_helper_enter_mmx(cpu_env);
3499             if (mod != 3) {
3500                 gen_lea_modrm(env, s, modrm);
3501                 op2_offset = offsetof(CPUX86State,mmx_t0);
3502                 gen_ldq_env_A0(s, op2_offset);
3503             } else {
3504                 rm = (modrm & 7);
3505                 op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3506             }
3507             op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3508             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
3509             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
3510             switch(b >> 8) {
3511             case 0x0:
3512                 gen_helper_cvtpi2ps(cpu_env, s->ptr0, s->ptr1);
3513                 break;
3514             default:
3515             case 0x1:
3516                 gen_helper_cvtpi2pd(cpu_env, s->ptr0, s->ptr1);
3517                 break;
3518             }
3519             break;
3520         case 0x22a: /* cvtsi2ss */
3521         case 0x32a: /* cvtsi2sd */
3522             ot = mo_64_32(s->dflag);
3523             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3524             op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3525             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
3526             if (ot == MO_32) {
3527                 SSEFunc_0_epi sse_fn_epi = sse_op_table3ai[(b >> 8) & 1];
3528                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
3529                 sse_fn_epi(cpu_env, s->ptr0, s->tmp2_i32);
3530             } else {
3531 #ifdef TARGET_X86_64
3532                 SSEFunc_0_epl sse_fn_epl = sse_op_table3aq[(b >> 8) & 1];
3533                 sse_fn_epl(cpu_env, s->ptr0, s->T0);
3534 #else
3535                 goto illegal_op;
3536 #endif
3537             }
3538             break;
3539         case 0x02c: /* cvttps2pi */
3540         case 0x12c: /* cvttpd2pi */
3541         case 0x02d: /* cvtps2pi */
3542         case 0x12d: /* cvtpd2pi */
3543             gen_helper_enter_mmx(cpu_env);
3544             if (mod != 3) {
3545                 gen_lea_modrm(env, s, modrm);
3546                 op2_offset = offsetof(CPUX86State,xmm_t0);
3547                 gen_ldo_env_A0(s, op2_offset);
3548             } else {
3549                 rm = (modrm & 7) | REX_B(s);
3550                 op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3551             }
3552             op1_offset = offsetof(CPUX86State,fpregs[reg & 7].mmx);
3553             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
3554             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
3555             switch(b) {
3556             case 0x02c:
3557                 gen_helper_cvttps2pi(cpu_env, s->ptr0, s->ptr1);
3558                 break;
3559             case 0x12c:
3560                 gen_helper_cvttpd2pi(cpu_env, s->ptr0, s->ptr1);
3561                 break;
3562             case 0x02d:
3563                 gen_helper_cvtps2pi(cpu_env, s->ptr0, s->ptr1);
3564                 break;
3565             case 0x12d:
3566                 gen_helper_cvtpd2pi(cpu_env, s->ptr0, s->ptr1);
3567                 break;
3568             }
3569             break;
3570         case 0x22c: /* cvttss2si */
3571         case 0x32c: /* cvttsd2si */
3572         case 0x22d: /* cvtss2si */
3573         case 0x32d: /* cvtsd2si */
3574             ot = mo_64_32(s->dflag);
3575             if (mod != 3) {
3576                 gen_lea_modrm(env, s, modrm);
3577                 if ((b >> 8) & 1) {
3578                     gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.ZMM_Q(0)));
3579                 } else {
3580                     gen_op_ld_v(s, MO_32, s->T0, s->A0);
3581                     tcg_gen_st32_tl(s->T0, cpu_env,
3582                                     offsetof(CPUX86State, xmm_t0.ZMM_L(0)));
3583                 }
3584                 op2_offset = offsetof(CPUX86State,xmm_t0);
3585             } else {
3586                 rm = (modrm & 7) | REX_B(s);
3587                 op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3588             }
3589             tcg_gen_addi_ptr(s->ptr0, cpu_env, op2_offset);
3590             if (ot == MO_32) {
3591                 SSEFunc_i_ep sse_fn_i_ep =
3592                     sse_op_table3bi[((b >> 7) & 2) | (b & 1)];
3593                 sse_fn_i_ep(s->tmp2_i32, cpu_env, s->ptr0);
3594                 tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
3595             } else {
3596 #ifdef TARGET_X86_64
3597                 SSEFunc_l_ep sse_fn_l_ep =
3598                     sse_op_table3bq[((b >> 7) & 2) | (b & 1)];
3599                 sse_fn_l_ep(s->T0, cpu_env, s->ptr0);
3600 #else
3601                 goto illegal_op;
3602 #endif
3603             }
3604             gen_op_mov_reg_v(s, ot, reg, s->T0);
3605             break;
3606         case 0xc4: /* pinsrw */
3607         case 0x1c4:
3608             s->rip_offset = 1;
3609             gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
3610             val = x86_ldub_code(env, s);
3611             if (b1) {
3612                 val &= 7;
3613                 tcg_gen_st16_tl(s->T0, cpu_env,
3614                                 offsetof(CPUX86State,xmm_regs[reg].ZMM_W(val)));
3615             } else {
3616                 val &= 3;
3617                 tcg_gen_st16_tl(s->T0, cpu_env,
3618                                 offsetof(CPUX86State,fpregs[reg].mmx.MMX_W(val)));
3619             }
3620             break;
3621         case 0xc5: /* pextrw */
3622         case 0x1c5:
3623             if (mod != 3)
3624                 goto illegal_op;
3625             ot = mo_64_32(s->dflag);
3626             val = x86_ldub_code(env, s);
3627             if (b1) {
3628                 val &= 7;
3629                 rm = (modrm & 7) | REX_B(s);
3630                 tcg_gen_ld16u_tl(s->T0, cpu_env,
3631                                  offsetof(CPUX86State,xmm_regs[rm].ZMM_W(val)));
3632             } else {
3633                 val &= 3;
3634                 rm = (modrm & 7);
3635                 tcg_gen_ld16u_tl(s->T0, cpu_env,
3636                                 offsetof(CPUX86State,fpregs[rm].mmx.MMX_W(val)));
3637             }
3638             reg = ((modrm >> 3) & 7) | rex_r;
3639             gen_op_mov_reg_v(s, ot, reg, s->T0);
3640             break;
3641         case 0x1d6: /* movq ea, xmm */
3642             if (mod != 3) {
3643                 gen_lea_modrm(env, s, modrm);
3644                 gen_stq_env_A0(s, offsetof(CPUX86State,
3645                                            xmm_regs[reg].ZMM_Q(0)));
3646             } else {
3647                 rm = (modrm & 7) | REX_B(s);
3648                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[rm].ZMM_Q(0)),
3649                             offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3650                 gen_op_movq_env_0(s,
3651                                   offsetof(CPUX86State, xmm_regs[rm].ZMM_Q(1)));
3652             }
3653             break;
3654         case 0x2d6: /* movq2dq */
3655             gen_helper_enter_mmx(cpu_env);
3656             rm = (modrm & 7);
3657             gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3658                         offsetof(CPUX86State,fpregs[rm].mmx));
3659             gen_op_movq_env_0(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)));
3660             break;
3661         case 0x3d6: /* movdq2q */
3662             gen_helper_enter_mmx(cpu_env);
3663             rm = (modrm & 7) | REX_B(s);
3664             gen_op_movq(s, offsetof(CPUX86State, fpregs[reg & 7].mmx),
3665                         offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3666             break;
3667         case 0xd7: /* pmovmskb */
3668         case 0x1d7:
3669             if (mod != 3)
3670                 goto illegal_op;
3671             if (b1) {
3672                 rm = (modrm & 7) | REX_B(s);
3673                 tcg_gen_addi_ptr(s->ptr0, cpu_env,
3674                                  offsetof(CPUX86State, xmm_regs[rm]));
3675                 gen_helper_pmovmskb_xmm(s->tmp2_i32, cpu_env, s->ptr0);
3676             } else {
3677                 rm = (modrm & 7);
3678                 tcg_gen_addi_ptr(s->ptr0, cpu_env,
3679                                  offsetof(CPUX86State, fpregs[rm].mmx));
3680                 gen_helper_pmovmskb_mmx(s->tmp2_i32, cpu_env, s->ptr0);
3681             }
3682             reg = ((modrm >> 3) & 7) | rex_r;
3683             tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
3684             break;
3685 
3686         case 0x138:
3687         case 0x038:
3688             b = modrm;
3689             if ((b & 0xf0) == 0xf0) {
3690                 goto do_0f_38_fx;
3691             }
3692             modrm = x86_ldub_code(env, s);
3693             rm = modrm & 7;
3694             reg = ((modrm >> 3) & 7) | rex_r;
3695             mod = (modrm >> 6) & 3;
3696             if (b1 >= 2) {
3697                 goto unknown_op;
3698             }
3699 
3700             sse_fn_epp = sse_op_table6[b].op[b1];
3701             if (!sse_fn_epp) {
3702                 goto unknown_op;
3703             }
3704             if (!(s->cpuid_ext_features & sse_op_table6[b].ext_mask))
3705                 goto illegal_op;
3706 
3707             if (b1) {
3708                 op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3709                 if (mod == 3) {
3710                     op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
3711                 } else {
3712                     op2_offset = offsetof(CPUX86State,xmm_t0);
3713                     gen_lea_modrm(env, s, modrm);
3714                     switch (b) {
3715                     case 0x20: case 0x30: /* pmovsxbw, pmovzxbw */
3716                     case 0x23: case 0x33: /* pmovsxwd, pmovzxwd */
3717                     case 0x25: case 0x35: /* pmovsxdq, pmovzxdq */
3718                         gen_ldq_env_A0(s, op2_offset +
3719                                         offsetof(ZMMReg, ZMM_Q(0)));
3720                         break;
3721                     case 0x21: case 0x31: /* pmovsxbd, pmovzxbd */
3722                     case 0x24: case 0x34: /* pmovsxwq, pmovzxwq */
3723                         tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
3724                                             s->mem_index, MO_LEUL);
3725                         tcg_gen_st_i32(s->tmp2_i32, cpu_env, op2_offset +
3726                                         offsetof(ZMMReg, ZMM_L(0)));
3727                         break;
3728                     case 0x22: case 0x32: /* pmovsxbq, pmovzxbq */
3729                         tcg_gen_qemu_ld_tl(s->tmp0, s->A0,
3730                                            s->mem_index, MO_LEUW);
3731                         tcg_gen_st16_tl(s->tmp0, cpu_env, op2_offset +
3732                                         offsetof(ZMMReg, ZMM_W(0)));
3733                         break;
3734                     case 0x2a:            /* movntqda */
3735                         gen_ldo_env_A0(s, op1_offset);
3736                         return;
3737                     default:
3738                         gen_ldo_env_A0(s, op2_offset);
3739                     }
3740                 }
3741             } else {
3742                 op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
3743                 if (mod == 3) {
3744                     op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3745                 } else {
3746                     op2_offset = offsetof(CPUX86State,mmx_t0);
3747                     gen_lea_modrm(env, s, modrm);
3748                     gen_ldq_env_A0(s, op2_offset);
3749                 }
3750             }
3751             if (sse_fn_epp == SSE_SPECIAL) {
3752                 goto unknown_op;
3753             }
3754 
3755             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
3756             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
3757             sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
3758 
3759             if (b == 0x17) {
3760                 set_cc_op(s, CC_OP_EFLAGS);
3761             }
3762             break;
3763 
3764         case 0x238:
3765         case 0x338:
3766         do_0f_38_fx:
3767             /* Various integer extensions at 0f 38 f[0-f].  */
3768             b = modrm | (b1 << 8);
3769             modrm = x86_ldub_code(env, s);
3770             reg = ((modrm >> 3) & 7) | rex_r;
3771 
3772             switch (b) {
3773             case 0x3f0: /* crc32 Gd,Eb */
3774             case 0x3f1: /* crc32 Gd,Ey */
3775             do_crc32:
3776                 if (!(s->cpuid_ext_features & CPUID_EXT_SSE42)) {
3777                     goto illegal_op;
3778                 }
3779                 if ((b & 0xff) == 0xf0) {
3780                     ot = MO_8;
3781                 } else if (s->dflag != MO_64) {
3782                     ot = (s->prefix & PREFIX_DATA ? MO_16 : MO_32);
3783                 } else {
3784                     ot = MO_64;
3785                 }
3786 
3787                 tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[reg]);
3788                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3789                 gen_helper_crc32(s->T0, s->tmp2_i32,
3790                                  s->T0, tcg_const_i32(8 << ot));
3791 
3792                 ot = mo_64_32(s->dflag);
3793                 gen_op_mov_reg_v(s, ot, reg, s->T0);
3794                 break;
3795 
3796             case 0x1f0: /* crc32 or movbe */
3797             case 0x1f1:
3798                 /* For these insns, the f3 prefix is supposed to have priority
3799                    over the 66 prefix, but that's not what we implement above
3800                    setting b1.  */
3801                 if (s->prefix & PREFIX_REPNZ) {
3802                     goto do_crc32;
3803                 }
3804                 /* FALLTHRU */
3805             case 0x0f0: /* movbe Gy,My */
3806             case 0x0f1: /* movbe My,Gy */
3807                 if (!(s->cpuid_ext_features & CPUID_EXT_MOVBE)) {
3808                     goto illegal_op;
3809                 }
3810                 if (s->dflag != MO_64) {
3811                     ot = (s->prefix & PREFIX_DATA ? MO_16 : MO_32);
3812                 } else {
3813                     ot = MO_64;
3814                 }
3815 
3816                 gen_lea_modrm(env, s, modrm);
3817                 if ((b & 1) == 0) {
3818                     tcg_gen_qemu_ld_tl(s->T0, s->A0,
3819                                        s->mem_index, ot | MO_BE);
3820                     gen_op_mov_reg_v(s, ot, reg, s->T0);
3821                 } else {
3822                     tcg_gen_qemu_st_tl(cpu_regs[reg], s->A0,
3823                                        s->mem_index, ot | MO_BE);
3824                 }
3825                 break;
3826 
3827             case 0x0f2: /* andn Gy, By, Ey */
3828                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
3829                     || !(s->prefix & PREFIX_VEX)
3830                     || s->vex_l != 0) {
3831                     goto illegal_op;
3832                 }
3833                 ot = mo_64_32(s->dflag);
3834                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3835                 tcg_gen_andc_tl(s->T0, s->T0, cpu_regs[s->vex_v]);
3836                 gen_op_mov_reg_v(s, ot, reg, s->T0);
3837                 gen_op_update1_cc(s);
3838                 set_cc_op(s, CC_OP_LOGICB + ot);
3839                 break;
3840 
3841             case 0x0f7: /* bextr Gy, Ey, By */
3842                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
3843                     || !(s->prefix & PREFIX_VEX)
3844                     || s->vex_l != 0) {
3845                     goto illegal_op;
3846                 }
3847                 ot = mo_64_32(s->dflag);
3848                 {
3849                     TCGv bound, zero;
3850 
3851                     gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3852                     /* Extract START, and shift the operand.
3853                        Shifts larger than operand size get zeros.  */
3854                     tcg_gen_ext8u_tl(s->A0, cpu_regs[s->vex_v]);
3855                     tcg_gen_shr_tl(s->T0, s->T0, s->A0);
3856 
3857                     bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
3858                     zero = tcg_const_tl(0);
3859                     tcg_gen_movcond_tl(TCG_COND_LEU, s->T0, s->A0, bound,
3860                                        s->T0, zero);
3861                     tcg_temp_free(zero);
3862 
3863                     /* Extract the LEN into a mask.  Lengths larger than
3864                        operand size get all ones.  */
3865                     tcg_gen_extract_tl(s->A0, cpu_regs[s->vex_v], 8, 8);
3866                     tcg_gen_movcond_tl(TCG_COND_LEU, s->A0, s->A0, bound,
3867                                        s->A0, bound);
3868                     tcg_temp_free(bound);
3869                     tcg_gen_movi_tl(s->T1, 1);
3870                     tcg_gen_shl_tl(s->T1, s->T1, s->A0);
3871                     tcg_gen_subi_tl(s->T1, s->T1, 1);
3872                     tcg_gen_and_tl(s->T0, s->T0, s->T1);
3873 
3874                     gen_op_mov_reg_v(s, ot, reg, s->T0);
3875                     gen_op_update1_cc(s);
3876                     set_cc_op(s, CC_OP_LOGICB + ot);
3877                 }
3878                 break;
3879 
3880             case 0x0f5: /* bzhi Gy, Ey, By */
3881                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3882                     || !(s->prefix & PREFIX_VEX)
3883                     || s->vex_l != 0) {
3884                     goto illegal_op;
3885                 }
3886                 ot = mo_64_32(s->dflag);
3887                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3888                 tcg_gen_ext8u_tl(s->T1, cpu_regs[s->vex_v]);
3889                 {
3890                     TCGv bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
3891                     /* Note that since we're using BMILG (in order to get O
3892                        cleared) we need to store the inverse into C.  */
3893                     tcg_gen_setcond_tl(TCG_COND_LT, cpu_cc_src,
3894                                        s->T1, bound);
3895                     tcg_gen_movcond_tl(TCG_COND_GT, s->T1, s->T1,
3896                                        bound, bound, s->T1);
3897                     tcg_temp_free(bound);
3898                 }
3899                 tcg_gen_movi_tl(s->A0, -1);
3900                 tcg_gen_shl_tl(s->A0, s->A0, s->T1);
3901                 tcg_gen_andc_tl(s->T0, s->T0, s->A0);
3902                 gen_op_mov_reg_v(s, ot, reg, s->T0);
3903                 gen_op_update1_cc(s);
3904                 set_cc_op(s, CC_OP_BMILGB + ot);
3905                 break;
3906 
3907             case 0x3f6: /* mulx By, Gy, rdx, Ey */
3908                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3909                     || !(s->prefix & PREFIX_VEX)
3910                     || s->vex_l != 0) {
3911                     goto illegal_op;
3912                 }
3913                 ot = mo_64_32(s->dflag);
3914                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3915                 switch (ot) {
3916                 default:
3917                     tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
3918                     tcg_gen_trunc_tl_i32(s->tmp3_i32, cpu_regs[R_EDX]);
3919                     tcg_gen_mulu2_i32(s->tmp2_i32, s->tmp3_i32,
3920                                       s->tmp2_i32, s->tmp3_i32);
3921                     tcg_gen_extu_i32_tl(cpu_regs[s->vex_v], s->tmp2_i32);
3922                     tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp3_i32);
3923                     break;
3924 #ifdef TARGET_X86_64
3925                 case MO_64:
3926                     tcg_gen_mulu2_i64(s->T0, s->T1,
3927                                       s->T0, cpu_regs[R_EDX]);
3928                     tcg_gen_mov_i64(cpu_regs[s->vex_v], s->T0);
3929                     tcg_gen_mov_i64(cpu_regs[reg], s->T1);
3930                     break;
3931 #endif
3932                 }
3933                 break;
3934 
3935             case 0x3f5: /* pdep Gy, By, Ey */
3936                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3937                     || !(s->prefix & PREFIX_VEX)
3938                     || s->vex_l != 0) {
3939                     goto illegal_op;
3940                 }
3941                 ot = mo_64_32(s->dflag);
3942                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3943                 /* Note that by zero-extending the source operand, we
3944                    automatically handle zero-extending the result.  */
3945                 if (ot == MO_64) {
3946                     tcg_gen_mov_tl(s->T1, cpu_regs[s->vex_v]);
3947                 } else {
3948                     tcg_gen_ext32u_tl(s->T1, cpu_regs[s->vex_v]);
3949                 }
3950                 gen_helper_pdep(cpu_regs[reg], s->T1, s->T0);
3951                 break;
3952 
3953             case 0x2f5: /* pext Gy, By, Ey */
3954                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3955                     || !(s->prefix & PREFIX_VEX)
3956                     || s->vex_l != 0) {
3957                     goto illegal_op;
3958                 }
3959                 ot = mo_64_32(s->dflag);
3960                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3961                 /* Note that by zero-extending the source operand, we
3962                    automatically handle zero-extending the result.  */
3963                 if (ot == MO_64) {
3964                     tcg_gen_mov_tl(s->T1, cpu_regs[s->vex_v]);
3965                 } else {
3966                     tcg_gen_ext32u_tl(s->T1, cpu_regs[s->vex_v]);
3967                 }
3968                 gen_helper_pext(cpu_regs[reg], s->T1, s->T0);
3969                 break;
3970 
3971             case 0x1f6: /* adcx Gy, Ey */
3972             case 0x2f6: /* adox Gy, Ey */
3973                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_ADX)) {
3974                     goto illegal_op;
3975                 } else {
3976                     TCGv carry_in, carry_out, zero;
3977                     int end_op;
3978 
3979                     ot = mo_64_32(s->dflag);
3980                     gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3981 
3982                     /* Re-use the carry-out from a previous round.  */
3983                     carry_in = NULL;
3984                     carry_out = (b == 0x1f6 ? cpu_cc_dst : cpu_cc_src2);
3985                     switch (s->cc_op) {
3986                     case CC_OP_ADCX:
3987                         if (b == 0x1f6) {
3988                             carry_in = cpu_cc_dst;
3989                             end_op = CC_OP_ADCX;
3990                         } else {
3991                             end_op = CC_OP_ADCOX;
3992                         }
3993                         break;
3994                     case CC_OP_ADOX:
3995                         if (b == 0x1f6) {
3996                             end_op = CC_OP_ADCOX;
3997                         } else {
3998                             carry_in = cpu_cc_src2;
3999                             end_op = CC_OP_ADOX;
4000                         }
4001                         break;
4002                     case CC_OP_ADCOX:
4003                         end_op = CC_OP_ADCOX;
4004                         carry_in = carry_out;
4005                         break;
4006                     default:
4007                         end_op = (b == 0x1f6 ? CC_OP_ADCX : CC_OP_ADOX);
4008                         break;
4009                     }
4010                     /* If we can't reuse carry-out, get it out of EFLAGS.  */
4011                     if (!carry_in) {
4012                         if (s->cc_op != CC_OP_ADCX && s->cc_op != CC_OP_ADOX) {
4013                             gen_compute_eflags(s);
4014                         }
4015                         carry_in = s->tmp0;
4016                         tcg_gen_extract_tl(carry_in, cpu_cc_src,
4017                                            ctz32(b == 0x1f6 ? CC_C : CC_O), 1);
4018                     }
4019 
4020                     switch (ot) {
4021 #ifdef TARGET_X86_64
4022                     case MO_32:
4023                         /* If we know TL is 64-bit, and we want a 32-bit
4024                            result, just do everything in 64-bit arithmetic.  */
4025                         tcg_gen_ext32u_i64(cpu_regs[reg], cpu_regs[reg]);
4026                         tcg_gen_ext32u_i64(s->T0, s->T0);
4027                         tcg_gen_add_i64(s->T0, s->T0, cpu_regs[reg]);
4028                         tcg_gen_add_i64(s->T0, s->T0, carry_in);
4029                         tcg_gen_ext32u_i64(cpu_regs[reg], s->T0);
4030                         tcg_gen_shri_i64(carry_out, s->T0, 32);
4031                         break;
4032 #endif
4033                     default:
4034                         /* Otherwise compute the carry-out in two steps.  */
4035                         zero = tcg_const_tl(0);
4036                         tcg_gen_add2_tl(s->T0, carry_out,
4037                                         s->T0, zero,
4038                                         carry_in, zero);
4039                         tcg_gen_add2_tl(cpu_regs[reg], carry_out,
4040                                         cpu_regs[reg], carry_out,
4041                                         s->T0, zero);
4042                         tcg_temp_free(zero);
4043                         break;
4044                     }
4045                     set_cc_op(s, end_op);
4046                 }
4047                 break;
4048 
4049             case 0x1f7: /* shlx Gy, Ey, By */
4050             case 0x2f7: /* sarx Gy, Ey, By */
4051             case 0x3f7: /* shrx Gy, Ey, By */
4052                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
4053                     || !(s->prefix & PREFIX_VEX)
4054                     || s->vex_l != 0) {
4055                     goto illegal_op;
4056                 }
4057                 ot = mo_64_32(s->dflag);
4058                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4059                 if (ot == MO_64) {
4060                     tcg_gen_andi_tl(s->T1, cpu_regs[s->vex_v], 63);
4061                 } else {
4062                     tcg_gen_andi_tl(s->T1, cpu_regs[s->vex_v], 31);
4063                 }
4064                 if (b == 0x1f7) {
4065                     tcg_gen_shl_tl(s->T0, s->T0, s->T1);
4066                 } else if (b == 0x2f7) {
4067                     if (ot != MO_64) {
4068                         tcg_gen_ext32s_tl(s->T0, s->T0);
4069                     }
4070                     tcg_gen_sar_tl(s->T0, s->T0, s->T1);
4071                 } else {
4072                     if (ot != MO_64) {
4073                         tcg_gen_ext32u_tl(s->T0, s->T0);
4074                     }
4075                     tcg_gen_shr_tl(s->T0, s->T0, s->T1);
4076                 }
4077                 gen_op_mov_reg_v(s, ot, reg, s->T0);
4078                 break;
4079 
4080             case 0x0f3:
4081             case 0x1f3:
4082             case 0x2f3:
4083             case 0x3f3: /* Group 17 */
4084                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
4085                     || !(s->prefix & PREFIX_VEX)
4086                     || s->vex_l != 0) {
4087                     goto illegal_op;
4088                 }
4089                 ot = mo_64_32(s->dflag);
4090                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4091 
4092                 tcg_gen_mov_tl(cpu_cc_src, s->T0);
4093                 switch (reg & 7) {
4094                 case 1: /* blsr By,Ey */
4095                     tcg_gen_subi_tl(s->T1, s->T0, 1);
4096                     tcg_gen_and_tl(s->T0, s->T0, s->T1);
4097                     break;
4098                 case 2: /* blsmsk By,Ey */
4099                     tcg_gen_subi_tl(s->T1, s->T0, 1);
4100                     tcg_gen_xor_tl(s->T0, s->T0, s->T1);
4101                     break;
4102                 case 3: /* blsi By, Ey */
4103                     tcg_gen_neg_tl(s->T1, s->T0);
4104                     tcg_gen_and_tl(s->T0, s->T0, s->T1);
4105                     break;
4106                 default:
4107                     goto unknown_op;
4108                 }
4109                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
4110                 gen_op_mov_reg_v(s, ot, s->vex_v, s->T0);
4111                 set_cc_op(s, CC_OP_BMILGB + ot);
4112                 break;
4113 
4114             default:
4115                 goto unknown_op;
4116             }
4117             break;
4118 
4119         case 0x03a:
4120         case 0x13a:
4121             b = modrm;
4122             modrm = x86_ldub_code(env, s);
4123             rm = modrm & 7;
4124             reg = ((modrm >> 3) & 7) | rex_r;
4125             mod = (modrm >> 6) & 3;
4126             if (b1 >= 2) {
4127                 goto unknown_op;
4128             }
4129 
4130             sse_fn_eppi = sse_op_table7[b].op[b1];
4131             if (!sse_fn_eppi) {
4132                 goto unknown_op;
4133             }
4134             if (!(s->cpuid_ext_features & sse_op_table7[b].ext_mask))
4135                 goto illegal_op;
4136 
4137             s->rip_offset = 1;
4138 
4139             if (sse_fn_eppi == SSE_SPECIAL) {
4140                 ot = mo_64_32(s->dflag);
4141                 rm = (modrm & 7) | REX_B(s);
4142                 if (mod != 3)
4143                     gen_lea_modrm(env, s, modrm);
4144                 reg = ((modrm >> 3) & 7) | rex_r;
4145                 val = x86_ldub_code(env, s);
4146                 switch (b) {
4147                 case 0x14: /* pextrb */
4148                     tcg_gen_ld8u_tl(s->T0, cpu_env, offsetof(CPUX86State,
4149                                             xmm_regs[reg].ZMM_B(val & 15)));
4150                     if (mod == 3) {
4151                         gen_op_mov_reg_v(s, ot, rm, s->T0);
4152                     } else {
4153                         tcg_gen_qemu_st_tl(s->T0, s->A0,
4154                                            s->mem_index, MO_UB);
4155                     }
4156                     break;
4157                 case 0x15: /* pextrw */
4158                     tcg_gen_ld16u_tl(s->T0, cpu_env, offsetof(CPUX86State,
4159                                             xmm_regs[reg].ZMM_W(val & 7)));
4160                     if (mod == 3) {
4161                         gen_op_mov_reg_v(s, ot, rm, s->T0);
4162                     } else {
4163                         tcg_gen_qemu_st_tl(s->T0, s->A0,
4164                                            s->mem_index, MO_LEUW);
4165                     }
4166                     break;
4167                 case 0x16:
4168                     if (ot == MO_32) { /* pextrd */
4169                         tcg_gen_ld_i32(s->tmp2_i32, cpu_env,
4170                                         offsetof(CPUX86State,
4171                                                 xmm_regs[reg].ZMM_L(val & 3)));
4172                         if (mod == 3) {
4173                             tcg_gen_extu_i32_tl(cpu_regs[rm], s->tmp2_i32);
4174                         } else {
4175                             tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
4176                                                 s->mem_index, MO_LEUL);
4177                         }
4178                     } else { /* pextrq */
4179 #ifdef TARGET_X86_64
4180                         tcg_gen_ld_i64(s->tmp1_i64, cpu_env,
4181                                         offsetof(CPUX86State,
4182                                                 xmm_regs[reg].ZMM_Q(val & 1)));
4183                         if (mod == 3) {
4184                             tcg_gen_mov_i64(cpu_regs[rm], s->tmp1_i64);
4185                         } else {
4186                             tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
4187                                                 s->mem_index, MO_LEQ);
4188                         }
4189 #else
4190                         goto illegal_op;
4191 #endif
4192                     }
4193                     break;
4194                 case 0x17: /* extractps */
4195                     tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State,
4196                                             xmm_regs[reg].ZMM_L(val & 3)));
4197                     if (mod == 3) {
4198                         gen_op_mov_reg_v(s, ot, rm, s->T0);
4199                     } else {
4200                         tcg_gen_qemu_st_tl(s->T0, s->A0,
4201                                            s->mem_index, MO_LEUL);
4202                     }
4203                     break;
4204                 case 0x20: /* pinsrb */
4205                     if (mod == 3) {
4206                         gen_op_mov_v_reg(s, MO_32, s->T0, rm);
4207                     } else {
4208                         tcg_gen_qemu_ld_tl(s->T0, s->A0,
4209                                            s->mem_index, MO_UB);
4210                     }
4211                     tcg_gen_st8_tl(s->T0, cpu_env, offsetof(CPUX86State,
4212                                             xmm_regs[reg].ZMM_B(val & 15)));
4213                     break;
4214                 case 0x21: /* insertps */
4215                     if (mod == 3) {
4216                         tcg_gen_ld_i32(s->tmp2_i32, cpu_env,
4217                                         offsetof(CPUX86State,xmm_regs[rm]
4218                                                 .ZMM_L((val >> 6) & 3)));
4219                     } else {
4220                         tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
4221                                             s->mem_index, MO_LEUL);
4222                     }
4223                     tcg_gen_st_i32(s->tmp2_i32, cpu_env,
4224                                     offsetof(CPUX86State,xmm_regs[reg]
4225                                             .ZMM_L((val >> 4) & 3)));
4226                     if ((val >> 0) & 1)
4227                         tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4228                                         cpu_env, offsetof(CPUX86State,
4229                                                 xmm_regs[reg].ZMM_L(0)));
4230                     if ((val >> 1) & 1)
4231                         tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4232                                         cpu_env, offsetof(CPUX86State,
4233                                                 xmm_regs[reg].ZMM_L(1)));
4234                     if ((val >> 2) & 1)
4235                         tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4236                                         cpu_env, offsetof(CPUX86State,
4237                                                 xmm_regs[reg].ZMM_L(2)));
4238                     if ((val >> 3) & 1)
4239                         tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4240                                         cpu_env, offsetof(CPUX86State,
4241                                                 xmm_regs[reg].ZMM_L(3)));
4242                     break;
4243                 case 0x22:
4244                     if (ot == MO_32) { /* pinsrd */
4245                         if (mod == 3) {
4246                             tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[rm]);
4247                         } else {
4248                             tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
4249                                                 s->mem_index, MO_LEUL);
4250                         }
4251                         tcg_gen_st_i32(s->tmp2_i32, cpu_env,
4252                                         offsetof(CPUX86State,
4253                                                 xmm_regs[reg].ZMM_L(val & 3)));
4254                     } else { /* pinsrq */
4255 #ifdef TARGET_X86_64
4256                         if (mod == 3) {
4257                             gen_op_mov_v_reg(s, ot, s->tmp1_i64, rm);
4258                         } else {
4259                             tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
4260                                                 s->mem_index, MO_LEQ);
4261                         }
4262                         tcg_gen_st_i64(s->tmp1_i64, cpu_env,
4263                                         offsetof(CPUX86State,
4264                                                 xmm_regs[reg].ZMM_Q(val & 1)));
4265 #else
4266                         goto illegal_op;
4267 #endif
4268                     }
4269                     break;
4270                 }
4271                 return;
4272             }
4273 
4274             if (b1) {
4275                 op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
4276                 if (mod == 3) {
4277                     op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
4278                 } else {
4279                     op2_offset = offsetof(CPUX86State,xmm_t0);
4280                     gen_lea_modrm(env, s, modrm);
4281                     gen_ldo_env_A0(s, op2_offset);
4282                 }
4283             } else {
4284                 op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
4285                 if (mod == 3) {
4286                     op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
4287                 } else {
4288                     op2_offset = offsetof(CPUX86State,mmx_t0);
4289                     gen_lea_modrm(env, s, modrm);
4290                     gen_ldq_env_A0(s, op2_offset);
4291                 }
4292             }
4293             val = x86_ldub_code(env, s);
4294 
4295             if ((b & 0xfc) == 0x60) { /* pcmpXstrX */
4296                 set_cc_op(s, CC_OP_EFLAGS);
4297 
4298                 if (s->dflag == MO_64) {
4299                     /* The helper must use entire 64-bit gp registers */
4300                     val |= 1 << 8;
4301                 }
4302             }
4303 
4304             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4305             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4306             sse_fn_eppi(cpu_env, s->ptr0, s->ptr1, tcg_const_i32(val));
4307             break;
4308 
4309         case 0x33a:
4310             /* Various integer extensions at 0f 3a f[0-f].  */
4311             b = modrm | (b1 << 8);
4312             modrm = x86_ldub_code(env, s);
4313             reg = ((modrm >> 3) & 7) | rex_r;
4314 
4315             switch (b) {
4316             case 0x3f0: /* rorx Gy,Ey, Ib */
4317                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
4318                     || !(s->prefix & PREFIX_VEX)
4319                     || s->vex_l != 0) {
4320                     goto illegal_op;
4321                 }
4322                 ot = mo_64_32(s->dflag);
4323                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4324                 b = x86_ldub_code(env, s);
4325                 if (ot == MO_64) {
4326                     tcg_gen_rotri_tl(s->T0, s->T0, b & 63);
4327                 } else {
4328                     tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
4329                     tcg_gen_rotri_i32(s->tmp2_i32, s->tmp2_i32, b & 31);
4330                     tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
4331                 }
4332                 gen_op_mov_reg_v(s, ot, reg, s->T0);
4333                 break;
4334 
4335             default:
4336                 goto unknown_op;
4337             }
4338             break;
4339 
4340         default:
4341         unknown_op:
4342             gen_unknown_opcode(env, s);
4343             return;
4344         }
4345     } else {
4346         /* generic MMX or SSE operation */
4347         switch(b) {
4348         case 0x70: /* pshufx insn */
4349         case 0xc6: /* pshufx insn */
4350         case 0xc2: /* compare insns */
4351             s->rip_offset = 1;
4352             break;
4353         default:
4354             break;
4355         }
4356         if (is_xmm) {
4357             op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
4358             if (mod != 3) {
4359                 int sz = 4;
4360 
4361                 gen_lea_modrm(env, s, modrm);
4362                 op2_offset = offsetof(CPUX86State,xmm_t0);
4363 
4364                 switch (b) {
4365                 case 0x50 ... 0x5a:
4366                 case 0x5c ... 0x5f:
4367                 case 0xc2:
4368                     /* Most sse scalar operations.  */
4369                     if (b1 == 2) {
4370                         sz = 2;
4371                     } else if (b1 == 3) {
4372                         sz = 3;
4373                     }
4374                     break;
4375 
4376                 case 0x2e:  /* ucomis[sd] */
4377                 case 0x2f:  /* comis[sd] */
4378                     if (b1 == 0) {
4379                         sz = 2;
4380                     } else {
4381                         sz = 3;
4382                     }
4383                     break;
4384                 }
4385 
4386                 switch (sz) {
4387                 case 2:
4388                     /* 32 bit access */
4389                     gen_op_ld_v(s, MO_32, s->T0, s->A0);
4390                     tcg_gen_st32_tl(s->T0, cpu_env,
4391                                     offsetof(CPUX86State,xmm_t0.ZMM_L(0)));
4392                     break;
4393                 case 3:
4394                     /* 64 bit access */
4395                     gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.ZMM_D(0)));
4396                     break;
4397                 default:
4398                     /* 128 bit access */
4399                     gen_ldo_env_A0(s, op2_offset);
4400                     break;
4401                 }
4402             } else {
4403                 rm = (modrm & 7) | REX_B(s);
4404                 op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
4405             }
4406         } else {
4407             op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
4408             if (mod != 3) {
4409                 gen_lea_modrm(env, s, modrm);
4410                 op2_offset = offsetof(CPUX86State,mmx_t0);
4411                 gen_ldq_env_A0(s, op2_offset);
4412             } else {
4413                 rm = (modrm & 7);
4414                 op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
4415             }
4416         }
4417         switch(b) {
4418         case 0x0f: /* 3DNow! data insns */
4419             val = x86_ldub_code(env, s);
4420             sse_fn_epp = sse_op_table5[val];
4421             if (!sse_fn_epp) {
4422                 goto unknown_op;
4423             }
4424             if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) {
4425                 goto illegal_op;
4426             }
4427             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4428             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4429             sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
4430             break;
4431         case 0x70: /* pshufx insn */
4432         case 0xc6: /* pshufx insn */
4433             val = x86_ldub_code(env, s);
4434             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4435             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4436             /* XXX: introduce a new table? */
4437             sse_fn_ppi = (SSEFunc_0_ppi)sse_fn_epp;
4438             sse_fn_ppi(s->ptr0, s->ptr1, tcg_const_i32(val));
4439             break;
4440         case 0xc2:
4441             /* compare insns */
4442             val = x86_ldub_code(env, s);
4443             if (val >= 8)
4444                 goto unknown_op;
4445             sse_fn_epp = sse_op_table4[val][b1];
4446 
4447             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4448             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4449             sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
4450             break;
4451         case 0xf7:
4452             /* maskmov : we must prepare A0 */
4453             if (mod != 3)
4454                 goto illegal_op;
4455             tcg_gen_mov_tl(s->A0, cpu_regs[R_EDI]);
4456             gen_extu(s->aflag, s->A0);
4457             gen_add_A0_ds_seg(s);
4458 
4459             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4460             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4461             /* XXX: introduce a new table? */
4462             sse_fn_eppt = (SSEFunc_0_eppt)sse_fn_epp;
4463             sse_fn_eppt(cpu_env, s->ptr0, s->ptr1, s->A0);
4464             break;
4465         default:
4466             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4467             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4468             sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
4469             break;
4470         }
4471         if (b == 0x2e || b == 0x2f) {
4472             set_cc_op(s, CC_OP_EFLAGS);
4473         }
4474     }
4475 }
4476 
4477 /* convert one instruction. s->base.is_jmp is set if the translation must
4478    be stopped. Return the next pc value */
4479 static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
4480 {
4481     CPUX86State *env = cpu->env_ptr;
4482     int b, prefixes;
4483     int shift;
4484     MemOp ot, aflag, dflag;
4485     int modrm, reg, rm, mod, op, opreg, val;
4486     target_ulong next_eip, tval;
4487     int rex_w, rex_r;
4488     target_ulong pc_start = s->base.pc_next;
4489 
4490     s->pc_start = s->pc = pc_start;
4491     s->override = -1;
4492 #ifdef TARGET_X86_64
4493     s->rex_x = 0;
4494     s->rex_b = 0;
4495     s->x86_64_hregs = false;
4496 #endif
4497     s->rip_offset = 0; /* for relative ip address */
4498     s->vex_l = 0;
4499     s->vex_v = 0;
4500     if (sigsetjmp(s->jmpbuf, 0) != 0) {
4501         gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
4502         return s->pc;
4503     }
4504 
4505     prefixes = 0;
4506     rex_w = -1;
4507     rex_r = 0;
4508 
4509  next_byte:
4510     b = x86_ldub_code(env, s);
4511     /* Collect prefixes.  */
4512     switch (b) {
4513     case 0xf3:
4514         prefixes |= PREFIX_REPZ;
4515         goto next_byte;
4516     case 0xf2:
4517         prefixes |= PREFIX_REPNZ;
4518         goto next_byte;
4519     case 0xf0:
4520         prefixes |= PREFIX_LOCK;
4521         goto next_byte;
4522     case 0x2e:
4523         s->override = R_CS;
4524         goto next_byte;
4525     case 0x36:
4526         s->override = R_SS;
4527         goto next_byte;
4528     case 0x3e:
4529         s->override = R_DS;
4530         goto next_byte;
4531     case 0x26:
4532         s->override = R_ES;
4533         goto next_byte;
4534     case 0x64:
4535         s->override = R_FS;
4536         goto next_byte;
4537     case 0x65:
4538         s->override = R_GS;
4539         goto next_byte;
4540     case 0x66:
4541         prefixes |= PREFIX_DATA;
4542         goto next_byte;
4543     case 0x67:
4544         prefixes |= PREFIX_ADR;
4545         goto next_byte;
4546 #ifdef TARGET_X86_64
4547     case 0x40 ... 0x4f:
4548         if (CODE64(s)) {
4549             /* REX prefix */
4550             rex_w = (b >> 3) & 1;
4551             rex_r = (b & 0x4) << 1;
4552             s->rex_x = (b & 0x2) << 2;
4553             REX_B(s) = (b & 0x1) << 3;
4554             /* select uniform byte register addressing */
4555             s->x86_64_hregs = true;
4556             goto next_byte;
4557         }
4558         break;
4559 #endif
4560     case 0xc5: /* 2-byte VEX */
4561     case 0xc4: /* 3-byte VEX */
4562         /* VEX prefixes cannot be used except in 32-bit mode.
4563            Otherwise the instruction is LES or LDS.  */
4564         if (s->code32 && !s->vm86) {
4565             static const int pp_prefix[4] = {
4566                 0, PREFIX_DATA, PREFIX_REPZ, PREFIX_REPNZ
4567             };
4568             int vex3, vex2 = x86_ldub_code(env, s);
4569 
4570             if (!CODE64(s) && (vex2 & 0xc0) != 0xc0) {
4571                 /* 4.1.4.6: In 32-bit mode, bits [7:6] must be 11b,
4572                    otherwise the instruction is LES or LDS.  */
4573                 s->pc--; /* rewind the advance_pc() x86_ldub_code() did */
4574                 break;
4575             }
4576 
4577             /* 4.1.1-4.1.3: No preceding lock, 66, f2, f3, or rex prefixes. */
4578             if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ
4579                             | PREFIX_LOCK | PREFIX_DATA)) {
4580                 goto illegal_op;
4581             }
4582 #ifdef TARGET_X86_64
4583             if (s->x86_64_hregs) {
4584                 goto illegal_op;
4585             }
4586 #endif
4587             rex_r = (~vex2 >> 4) & 8;
4588             if (b == 0xc5) {
4589                 /* 2-byte VEX prefix: RVVVVlpp, implied 0f leading opcode byte */
4590                 vex3 = vex2;
4591                 b = x86_ldub_code(env, s) | 0x100;
4592             } else {
4593                 /* 3-byte VEX prefix: RXBmmmmm wVVVVlpp */
4594 #ifdef TARGET_X86_64
4595                 s->rex_x = (~vex2 >> 3) & 8;
4596                 s->rex_b = (~vex2 >> 2) & 8;
4597 #endif
4598                 vex3 = x86_ldub_code(env, s);
4599                 rex_w = (vex3 >> 7) & 1;
4600                 switch (vex2 & 0x1f) {
4601                 case 0x01: /* Implied 0f leading opcode bytes.  */
4602                     b = x86_ldub_code(env, s) | 0x100;
4603                     break;
4604                 case 0x02: /* Implied 0f 38 leading opcode bytes.  */
4605                     b = 0x138;
4606                     break;
4607                 case 0x03: /* Implied 0f 3a leading opcode bytes.  */
4608                     b = 0x13a;
4609                     break;
4610                 default:   /* Reserved for future use.  */
4611                     goto unknown_op;
4612                 }
4613             }
4614             s->vex_v = (~vex3 >> 3) & 0xf;
4615             s->vex_l = (vex3 >> 2) & 1;
4616             prefixes |= pp_prefix[vex3 & 3] | PREFIX_VEX;
4617         }
4618         break;
4619     }
4620 
4621     /* Post-process prefixes.  */
4622     if (CODE64(s)) {
4623         /* In 64-bit mode, the default data size is 32-bit.  Select 64-bit
4624            data with rex_w, and 16-bit data with 0x66; rex_w takes precedence
4625            over 0x66 if both are present.  */
4626         dflag = (rex_w > 0 ? MO_64 : prefixes & PREFIX_DATA ? MO_16 : MO_32);
4627         /* In 64-bit mode, 0x67 selects 32-bit addressing.  */
4628         aflag = (prefixes & PREFIX_ADR ? MO_32 : MO_64);
4629     } else {
4630         /* In 16/32-bit mode, 0x66 selects the opposite data size.  */
4631         if (s->code32 ^ ((prefixes & PREFIX_DATA) != 0)) {
4632             dflag = MO_32;
4633         } else {
4634             dflag = MO_16;
4635         }
4636         /* In 16/32-bit mode, 0x67 selects the opposite addressing.  */
4637         if (s->code32 ^ ((prefixes & PREFIX_ADR) != 0)) {
4638             aflag = MO_32;
4639         }  else {
4640             aflag = MO_16;
4641         }
4642     }
4643 
4644     s->prefix = prefixes;
4645     s->aflag = aflag;
4646     s->dflag = dflag;
4647 
4648     /* now check op code */
4649  reswitch:
4650     switch(b) {
4651     case 0x0f:
4652         /**************************/
4653         /* extended op code */
4654         b = x86_ldub_code(env, s) | 0x100;
4655         goto reswitch;
4656 
4657         /**************************/
4658         /* arith & logic */
4659     case 0x00 ... 0x05:
4660     case 0x08 ... 0x0d:
4661     case 0x10 ... 0x15:
4662     case 0x18 ... 0x1d:
4663     case 0x20 ... 0x25:
4664     case 0x28 ... 0x2d:
4665     case 0x30 ... 0x35:
4666     case 0x38 ... 0x3d:
4667         {
4668             int op, f, val;
4669             op = (b >> 3) & 7;
4670             f = (b >> 1) & 3;
4671 
4672             ot = mo_b_d(b, dflag);
4673 
4674             switch(f) {
4675             case 0: /* OP Ev, Gv */
4676                 modrm = x86_ldub_code(env, s);
4677                 reg = ((modrm >> 3) & 7) | rex_r;
4678                 mod = (modrm >> 6) & 3;
4679                 rm = (modrm & 7) | REX_B(s);
4680                 if (mod != 3) {
4681                     gen_lea_modrm(env, s, modrm);
4682                     opreg = OR_TMP0;
4683                 } else if (op == OP_XORL && rm == reg) {
4684                 xor_zero:
4685                     /* xor reg, reg optimisation */
4686                     set_cc_op(s, CC_OP_CLR);
4687                     tcg_gen_movi_tl(s->T0, 0);
4688                     gen_op_mov_reg_v(s, ot, reg, s->T0);
4689                     break;
4690                 } else {
4691                     opreg = rm;
4692                 }
4693                 gen_op_mov_v_reg(s, ot, s->T1, reg);
4694                 gen_op(s, op, ot, opreg);
4695                 break;
4696             case 1: /* OP Gv, Ev */
4697                 modrm = x86_ldub_code(env, s);
4698                 mod = (modrm >> 6) & 3;
4699                 reg = ((modrm >> 3) & 7) | rex_r;
4700                 rm = (modrm & 7) | REX_B(s);
4701                 if (mod != 3) {
4702                     gen_lea_modrm(env, s, modrm);
4703                     gen_op_ld_v(s, ot, s->T1, s->A0);
4704                 } else if (op == OP_XORL && rm == reg) {
4705                     goto xor_zero;
4706                 } else {
4707                     gen_op_mov_v_reg(s, ot, s->T1, rm);
4708                 }
4709                 gen_op(s, op, ot, reg);
4710                 break;
4711             case 2: /* OP A, Iv */
4712                 val = insn_get(env, s, ot);
4713                 tcg_gen_movi_tl(s->T1, val);
4714                 gen_op(s, op, ot, OR_EAX);
4715                 break;
4716             }
4717         }
4718         break;
4719 
4720     case 0x82:
4721         if (CODE64(s))
4722             goto illegal_op;
4723         /* fall through */
4724     case 0x80: /* GRP1 */
4725     case 0x81:
4726     case 0x83:
4727         {
4728             int val;
4729 
4730             ot = mo_b_d(b, dflag);
4731 
4732             modrm = x86_ldub_code(env, s);
4733             mod = (modrm >> 6) & 3;
4734             rm = (modrm & 7) | REX_B(s);
4735             op = (modrm >> 3) & 7;
4736 
4737             if (mod != 3) {
4738                 if (b == 0x83)
4739                     s->rip_offset = 1;
4740                 else
4741                     s->rip_offset = insn_const_size(ot);
4742                 gen_lea_modrm(env, s, modrm);
4743                 opreg = OR_TMP0;
4744             } else {
4745                 opreg = rm;
4746             }
4747 
4748             switch(b) {
4749             default:
4750             case 0x80:
4751             case 0x81:
4752             case 0x82:
4753                 val = insn_get(env, s, ot);
4754                 break;
4755             case 0x83:
4756                 val = (int8_t)insn_get(env, s, MO_8);
4757                 break;
4758             }
4759             tcg_gen_movi_tl(s->T1, val);
4760             gen_op(s, op, ot, opreg);
4761         }
4762         break;
4763 
4764         /**************************/
4765         /* inc, dec, and other misc arith */
4766     case 0x40 ... 0x47: /* inc Gv */
4767         ot = dflag;
4768         gen_inc(s, ot, OR_EAX + (b & 7), 1);
4769         break;
4770     case 0x48 ... 0x4f: /* dec Gv */
4771         ot = dflag;
4772         gen_inc(s, ot, OR_EAX + (b & 7), -1);
4773         break;
4774     case 0xf6: /* GRP3 */
4775     case 0xf7:
4776         ot = mo_b_d(b, dflag);
4777 
4778         modrm = x86_ldub_code(env, s);
4779         mod = (modrm >> 6) & 3;
4780         rm = (modrm & 7) | REX_B(s);
4781         op = (modrm >> 3) & 7;
4782         if (mod != 3) {
4783             if (op == 0) {
4784                 s->rip_offset = insn_const_size(ot);
4785             }
4786             gen_lea_modrm(env, s, modrm);
4787             /* For those below that handle locked memory, don't load here.  */
4788             if (!(s->prefix & PREFIX_LOCK)
4789                 || op != 2) {
4790                 gen_op_ld_v(s, ot, s->T0, s->A0);
4791             }
4792         } else {
4793             gen_op_mov_v_reg(s, ot, s->T0, rm);
4794         }
4795 
4796         switch(op) {
4797         case 0: /* test */
4798             val = insn_get(env, s, ot);
4799             tcg_gen_movi_tl(s->T1, val);
4800             gen_op_testl_T0_T1_cc(s);
4801             set_cc_op(s, CC_OP_LOGICB + ot);
4802             break;
4803         case 2: /* not */
4804             if (s->prefix & PREFIX_LOCK) {
4805                 if (mod == 3) {
4806                     goto illegal_op;
4807                 }
4808                 tcg_gen_movi_tl(s->T0, ~0);
4809                 tcg_gen_atomic_xor_fetch_tl(s->T0, s->A0, s->T0,
4810                                             s->mem_index, ot | MO_LE);
4811             } else {
4812                 tcg_gen_not_tl(s->T0, s->T0);
4813                 if (mod != 3) {
4814                     gen_op_st_v(s, ot, s->T0, s->A0);
4815                 } else {
4816                     gen_op_mov_reg_v(s, ot, rm, s->T0);
4817                 }
4818             }
4819             break;
4820         case 3: /* neg */
4821             if (s->prefix & PREFIX_LOCK) {
4822                 TCGLabel *label1;
4823                 TCGv a0, t0, t1, t2;
4824 
4825                 if (mod == 3) {
4826                     goto illegal_op;
4827                 }
4828                 a0 = tcg_temp_local_new();
4829                 t0 = tcg_temp_local_new();
4830                 label1 = gen_new_label();
4831 
4832                 tcg_gen_mov_tl(a0, s->A0);
4833                 tcg_gen_mov_tl(t0, s->T0);
4834 
4835                 gen_set_label(label1);
4836                 t1 = tcg_temp_new();
4837                 t2 = tcg_temp_new();
4838                 tcg_gen_mov_tl(t2, t0);
4839                 tcg_gen_neg_tl(t1, t0);
4840                 tcg_gen_atomic_cmpxchg_tl(t0, a0, t0, t1,
4841                                           s->mem_index, ot | MO_LE);
4842                 tcg_temp_free(t1);
4843                 tcg_gen_brcond_tl(TCG_COND_NE, t0, t2, label1);
4844 
4845                 tcg_temp_free(t2);
4846                 tcg_temp_free(a0);
4847                 tcg_gen_mov_tl(s->T0, t0);
4848                 tcg_temp_free(t0);
4849             } else {
4850                 tcg_gen_neg_tl(s->T0, s->T0);
4851                 if (mod != 3) {
4852                     gen_op_st_v(s, ot, s->T0, s->A0);
4853                 } else {
4854                     gen_op_mov_reg_v(s, ot, rm, s->T0);
4855                 }
4856             }
4857             gen_op_update_neg_cc(s);
4858             set_cc_op(s, CC_OP_SUBB + ot);
4859             break;
4860         case 4: /* mul */
4861             switch(ot) {
4862             case MO_8:
4863                 gen_op_mov_v_reg(s, MO_8, s->T1, R_EAX);
4864                 tcg_gen_ext8u_tl(s->T0, s->T0);
4865                 tcg_gen_ext8u_tl(s->T1, s->T1);
4866                 /* XXX: use 32 bit mul which could be faster */
4867                 tcg_gen_mul_tl(s->T0, s->T0, s->T1);
4868                 gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
4869                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
4870                 tcg_gen_andi_tl(cpu_cc_src, s->T0, 0xff00);
4871                 set_cc_op(s, CC_OP_MULB);
4872                 break;
4873             case MO_16:
4874                 gen_op_mov_v_reg(s, MO_16, s->T1, R_EAX);
4875                 tcg_gen_ext16u_tl(s->T0, s->T0);
4876                 tcg_gen_ext16u_tl(s->T1, s->T1);
4877                 /* XXX: use 32 bit mul which could be faster */
4878                 tcg_gen_mul_tl(s->T0, s->T0, s->T1);
4879                 gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
4880                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
4881                 tcg_gen_shri_tl(s->T0, s->T0, 16);
4882                 gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0);
4883                 tcg_gen_mov_tl(cpu_cc_src, s->T0);
4884                 set_cc_op(s, CC_OP_MULW);
4885                 break;
4886             default:
4887             case MO_32:
4888                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
4889                 tcg_gen_trunc_tl_i32(s->tmp3_i32, cpu_regs[R_EAX]);
4890                 tcg_gen_mulu2_i32(s->tmp2_i32, s->tmp3_i32,
4891                                   s->tmp2_i32, s->tmp3_i32);
4892                 tcg_gen_extu_i32_tl(cpu_regs[R_EAX], s->tmp2_i32);
4893                 tcg_gen_extu_i32_tl(cpu_regs[R_EDX], s->tmp3_i32);
4894                 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4895                 tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]);
4896                 set_cc_op(s, CC_OP_MULL);
4897                 break;
4898 #ifdef TARGET_X86_64
4899             case MO_64:
4900                 tcg_gen_mulu2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
4901                                   s->T0, cpu_regs[R_EAX]);
4902                 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4903                 tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]);
4904                 set_cc_op(s, CC_OP_MULQ);
4905                 break;
4906 #endif
4907             }
4908             break;
4909         case 5: /* imul */
4910             switch(ot) {
4911             case MO_8:
4912                 gen_op_mov_v_reg(s, MO_8, s->T1, R_EAX);
4913                 tcg_gen_ext8s_tl(s->T0, s->T0);
4914                 tcg_gen_ext8s_tl(s->T1, s->T1);
4915                 /* XXX: use 32 bit mul which could be faster */
4916                 tcg_gen_mul_tl(s->T0, s->T0, s->T1);
4917                 gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
4918                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
4919                 tcg_gen_ext8s_tl(s->tmp0, s->T0);
4920                 tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
4921                 set_cc_op(s, CC_OP_MULB);
4922                 break;
4923             case MO_16:
4924                 gen_op_mov_v_reg(s, MO_16, s->T1, R_EAX);
4925                 tcg_gen_ext16s_tl(s->T0, s->T0);
4926                 tcg_gen_ext16s_tl(s->T1, s->T1);
4927                 /* XXX: use 32 bit mul which could be faster */
4928                 tcg_gen_mul_tl(s->T0, s->T0, s->T1);
4929                 gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
4930                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
4931                 tcg_gen_ext16s_tl(s->tmp0, s->T0);
4932                 tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
4933                 tcg_gen_shri_tl(s->T0, s->T0, 16);
4934                 gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0);
4935                 set_cc_op(s, CC_OP_MULW);
4936                 break;
4937             default:
4938             case MO_32:
4939                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
4940                 tcg_gen_trunc_tl_i32(s->tmp3_i32, cpu_regs[R_EAX]);
4941                 tcg_gen_muls2_i32(s->tmp2_i32, s->tmp3_i32,
4942                                   s->tmp2_i32, s->tmp3_i32);
4943                 tcg_gen_extu_i32_tl(cpu_regs[R_EAX], s->tmp2_i32);
4944                 tcg_gen_extu_i32_tl(cpu_regs[R_EDX], s->tmp3_i32);
4945                 tcg_gen_sari_i32(s->tmp2_i32, s->tmp2_i32, 31);
4946                 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4947                 tcg_gen_sub_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
4948                 tcg_gen_extu_i32_tl(cpu_cc_src, s->tmp2_i32);
4949                 set_cc_op(s, CC_OP_MULL);
4950                 break;
4951 #ifdef TARGET_X86_64
4952             case MO_64:
4953                 tcg_gen_muls2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
4954                                   s->T0, cpu_regs[R_EAX]);
4955                 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4956                 tcg_gen_sari_tl(cpu_cc_src, cpu_regs[R_EAX], 63);
4957                 tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, cpu_regs[R_EDX]);
4958                 set_cc_op(s, CC_OP_MULQ);
4959                 break;
4960 #endif
4961             }
4962             break;
4963         case 6: /* div */
4964             switch(ot) {
4965             case MO_8:
4966                 gen_helper_divb_AL(cpu_env, s->T0);
4967                 break;
4968             case MO_16:
4969                 gen_helper_divw_AX(cpu_env, s->T0);
4970                 break;
4971             default:
4972             case MO_32:
4973                 gen_helper_divl_EAX(cpu_env, s->T0);
4974                 break;
4975 #ifdef TARGET_X86_64
4976             case MO_64:
4977                 gen_helper_divq_EAX(cpu_env, s->T0);
4978                 break;
4979 #endif
4980             }
4981             break;
4982         case 7: /* idiv */
4983             switch(ot) {
4984             case MO_8:
4985                 gen_helper_idivb_AL(cpu_env, s->T0);
4986                 break;
4987             case MO_16:
4988                 gen_helper_idivw_AX(cpu_env, s->T0);
4989                 break;
4990             default:
4991             case MO_32:
4992                 gen_helper_idivl_EAX(cpu_env, s->T0);
4993                 break;
4994 #ifdef TARGET_X86_64
4995             case MO_64:
4996                 gen_helper_idivq_EAX(cpu_env, s->T0);
4997                 break;
4998 #endif
4999             }
5000             break;
5001         default:
5002             goto unknown_op;
5003         }
5004         break;
5005 
5006     case 0xfe: /* GRP4 */
5007     case 0xff: /* GRP5 */
5008         ot = mo_b_d(b, dflag);
5009 
5010         modrm = x86_ldub_code(env, s);
5011         mod = (modrm >> 6) & 3;
5012         rm = (modrm & 7) | REX_B(s);
5013         op = (modrm >> 3) & 7;
5014         if (op >= 2 && b == 0xfe) {
5015             goto unknown_op;
5016         }
5017         if (CODE64(s)) {
5018             if (op == 2 || op == 4) {
5019                 /* operand size for jumps is 64 bit */
5020                 ot = MO_64;
5021             } else if (op == 3 || op == 5) {
5022                 ot = dflag != MO_16 ? MO_32 + (rex_w == 1) : MO_16;
5023             } else if (op == 6) {
5024                 /* default push size is 64 bit */
5025                 ot = mo_pushpop(s, dflag);
5026             }
5027         }
5028         if (mod != 3) {
5029             gen_lea_modrm(env, s, modrm);
5030             if (op >= 2 && op != 3 && op != 5)
5031                 gen_op_ld_v(s, ot, s->T0, s->A0);
5032         } else {
5033             gen_op_mov_v_reg(s, ot, s->T0, rm);
5034         }
5035 
5036         switch(op) {
5037         case 0: /* inc Ev */
5038             if (mod != 3)
5039                 opreg = OR_TMP0;
5040             else
5041                 opreg = rm;
5042             gen_inc(s, ot, opreg, 1);
5043             break;
5044         case 1: /* dec Ev */
5045             if (mod != 3)
5046                 opreg = OR_TMP0;
5047             else
5048                 opreg = rm;
5049             gen_inc(s, ot, opreg, -1);
5050             break;
5051         case 2: /* call Ev */
5052             /* XXX: optimize if memory (no 'and' is necessary) */
5053             if (dflag == MO_16) {
5054                 tcg_gen_ext16u_tl(s->T0, s->T0);
5055             }
5056             next_eip = s->pc - s->cs_base;
5057             tcg_gen_movi_tl(s->T1, next_eip);
5058             gen_push_v(s, s->T1);
5059             gen_op_jmp_v(s->T0);
5060             gen_bnd_jmp(s);
5061             gen_jr(s, s->T0);
5062             break;
5063         case 3: /* lcall Ev */
5064             gen_op_ld_v(s, ot, s->T1, s->A0);
5065             gen_add_A0_im(s, 1 << ot);
5066             gen_op_ld_v(s, MO_16, s->T0, s->A0);
5067         do_lcall:
5068             if (s->pe && !s->vm86) {
5069                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5070                 gen_helper_lcall_protected(cpu_env, s->tmp2_i32, s->T1,
5071                                            tcg_const_i32(dflag - 1),
5072                                            tcg_const_tl(s->pc - s->cs_base));
5073             } else {
5074                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5075                 gen_helper_lcall_real(cpu_env, s->tmp2_i32, s->T1,
5076                                       tcg_const_i32(dflag - 1),
5077                                       tcg_const_i32(s->pc - s->cs_base));
5078             }
5079             tcg_gen_ld_tl(s->tmp4, cpu_env, offsetof(CPUX86State, eip));
5080             gen_jr(s, s->tmp4);
5081             break;
5082         case 4: /* jmp Ev */
5083             if (dflag == MO_16) {
5084                 tcg_gen_ext16u_tl(s->T0, s->T0);
5085             }
5086             gen_op_jmp_v(s->T0);
5087             gen_bnd_jmp(s);
5088             gen_jr(s, s->T0);
5089             break;
5090         case 5: /* ljmp Ev */
5091             gen_op_ld_v(s, ot, s->T1, s->A0);
5092             gen_add_A0_im(s, 1 << ot);
5093             gen_op_ld_v(s, MO_16, s->T0, s->A0);
5094         do_ljmp:
5095             if (s->pe && !s->vm86) {
5096                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5097                 gen_helper_ljmp_protected(cpu_env, s->tmp2_i32, s->T1,
5098                                           tcg_const_tl(s->pc - s->cs_base));
5099             } else {
5100                 gen_op_movl_seg_T0_vm(s, R_CS);
5101                 gen_op_jmp_v(s->T1);
5102             }
5103             tcg_gen_ld_tl(s->tmp4, cpu_env, offsetof(CPUX86State, eip));
5104             gen_jr(s, s->tmp4);
5105             break;
5106         case 6: /* push Ev */
5107             gen_push_v(s, s->T0);
5108             break;
5109         default:
5110             goto unknown_op;
5111         }
5112         break;
5113 
5114     case 0x84: /* test Ev, Gv */
5115     case 0x85:
5116         ot = mo_b_d(b, dflag);
5117 
5118         modrm = x86_ldub_code(env, s);
5119         reg = ((modrm >> 3) & 7) | rex_r;
5120 
5121         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
5122         gen_op_mov_v_reg(s, ot, s->T1, reg);
5123         gen_op_testl_T0_T1_cc(s);
5124         set_cc_op(s, CC_OP_LOGICB + ot);
5125         break;
5126 
5127     case 0xa8: /* test eAX, Iv */
5128     case 0xa9:
5129         ot = mo_b_d(b, dflag);
5130         val = insn_get(env, s, ot);
5131 
5132         gen_op_mov_v_reg(s, ot, s->T0, OR_EAX);
5133         tcg_gen_movi_tl(s->T1, val);
5134         gen_op_testl_T0_T1_cc(s);
5135         set_cc_op(s, CC_OP_LOGICB + ot);
5136         break;
5137 
5138     case 0x98: /* CWDE/CBW */
5139         switch (dflag) {
5140 #ifdef TARGET_X86_64
5141         case MO_64:
5142             gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
5143             tcg_gen_ext32s_tl(s->T0, s->T0);
5144             gen_op_mov_reg_v(s, MO_64, R_EAX, s->T0);
5145             break;
5146 #endif
5147         case MO_32:
5148             gen_op_mov_v_reg(s, MO_16, s->T0, R_EAX);
5149             tcg_gen_ext16s_tl(s->T0, s->T0);
5150             gen_op_mov_reg_v(s, MO_32, R_EAX, s->T0);
5151             break;
5152         case MO_16:
5153             gen_op_mov_v_reg(s, MO_8, s->T0, R_EAX);
5154             tcg_gen_ext8s_tl(s->T0, s->T0);
5155             gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
5156             break;
5157         default:
5158             tcg_abort();
5159         }
5160         break;
5161     case 0x99: /* CDQ/CWD */
5162         switch (dflag) {
5163 #ifdef TARGET_X86_64
5164         case MO_64:
5165             gen_op_mov_v_reg(s, MO_64, s->T0, R_EAX);
5166             tcg_gen_sari_tl(s->T0, s->T0, 63);
5167             gen_op_mov_reg_v(s, MO_64, R_EDX, s->T0);
5168             break;
5169 #endif
5170         case MO_32:
5171             gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
5172             tcg_gen_ext32s_tl(s->T0, s->T0);
5173             tcg_gen_sari_tl(s->T0, s->T0, 31);
5174             gen_op_mov_reg_v(s, MO_32, R_EDX, s->T0);
5175             break;
5176         case MO_16:
5177             gen_op_mov_v_reg(s, MO_16, s->T0, R_EAX);
5178             tcg_gen_ext16s_tl(s->T0, s->T0);
5179             tcg_gen_sari_tl(s->T0, s->T0, 15);
5180             gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0);
5181             break;
5182         default:
5183             tcg_abort();
5184         }
5185         break;
5186     case 0x1af: /* imul Gv, Ev */
5187     case 0x69: /* imul Gv, Ev, I */
5188     case 0x6b:
5189         ot = dflag;
5190         modrm = x86_ldub_code(env, s);
5191         reg = ((modrm >> 3) & 7) | rex_r;
5192         if (b == 0x69)
5193             s->rip_offset = insn_const_size(ot);
5194         else if (b == 0x6b)
5195             s->rip_offset = 1;
5196         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
5197         if (b == 0x69) {
5198             val = insn_get(env, s, ot);
5199             tcg_gen_movi_tl(s->T1, val);
5200         } else if (b == 0x6b) {
5201             val = (int8_t)insn_get(env, s, MO_8);
5202             tcg_gen_movi_tl(s->T1, val);
5203         } else {
5204             gen_op_mov_v_reg(s, ot, s->T1, reg);
5205         }
5206         switch (ot) {
5207 #ifdef TARGET_X86_64
5208         case MO_64:
5209             tcg_gen_muls2_i64(cpu_regs[reg], s->T1, s->T0, s->T1);
5210             tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
5211             tcg_gen_sari_tl(cpu_cc_src, cpu_cc_dst, 63);
5212             tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, s->T1);
5213             break;
5214 #endif
5215         case MO_32:
5216             tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5217             tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
5218             tcg_gen_muls2_i32(s->tmp2_i32, s->tmp3_i32,
5219                               s->tmp2_i32, s->tmp3_i32);
5220             tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
5221             tcg_gen_sari_i32(s->tmp2_i32, s->tmp2_i32, 31);
5222             tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
5223             tcg_gen_sub_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
5224             tcg_gen_extu_i32_tl(cpu_cc_src, s->tmp2_i32);
5225             break;
5226         default:
5227             tcg_gen_ext16s_tl(s->T0, s->T0);
5228             tcg_gen_ext16s_tl(s->T1, s->T1);
5229             /* XXX: use 32 bit mul which could be faster */
5230             tcg_gen_mul_tl(s->T0, s->T0, s->T1);
5231             tcg_gen_mov_tl(cpu_cc_dst, s->T0);
5232             tcg_gen_ext16s_tl(s->tmp0, s->T0);
5233             tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
5234             gen_op_mov_reg_v(s, ot, reg, s->T0);
5235             break;
5236         }
5237         set_cc_op(s, CC_OP_MULB + ot);
5238         break;
5239     case 0x1c0:
5240     case 0x1c1: /* xadd Ev, Gv */
5241         ot = mo_b_d(b, dflag);
5242         modrm = x86_ldub_code(env, s);
5243         reg = ((modrm >> 3) & 7) | rex_r;
5244         mod = (modrm >> 6) & 3;
5245         gen_op_mov_v_reg(s, ot, s->T0, reg);
5246         if (mod == 3) {
5247             rm = (modrm & 7) | REX_B(s);
5248             gen_op_mov_v_reg(s, ot, s->T1, rm);
5249             tcg_gen_add_tl(s->T0, s->T0, s->T1);
5250             gen_op_mov_reg_v(s, ot, reg, s->T1);
5251             gen_op_mov_reg_v(s, ot, rm, s->T0);
5252         } else {
5253             gen_lea_modrm(env, s, modrm);
5254             if (s->prefix & PREFIX_LOCK) {
5255                 tcg_gen_atomic_fetch_add_tl(s->T1, s->A0, s->T0,
5256                                             s->mem_index, ot | MO_LE);
5257                 tcg_gen_add_tl(s->T0, s->T0, s->T1);
5258             } else {
5259                 gen_op_ld_v(s, ot, s->T1, s->A0);
5260                 tcg_gen_add_tl(s->T0, s->T0, s->T1);
5261                 gen_op_st_v(s, ot, s->T0, s->A0);
5262             }
5263             gen_op_mov_reg_v(s, ot, reg, s->T1);
5264         }
5265         gen_op_update2_cc(s);
5266         set_cc_op(s, CC_OP_ADDB + ot);
5267         break;
5268     case 0x1b0:
5269     case 0x1b1: /* cmpxchg Ev, Gv */
5270         {
5271             TCGv oldv, newv, cmpv;
5272 
5273             ot = mo_b_d(b, dflag);
5274             modrm = x86_ldub_code(env, s);
5275             reg = ((modrm >> 3) & 7) | rex_r;
5276             mod = (modrm >> 6) & 3;
5277             oldv = tcg_temp_new();
5278             newv = tcg_temp_new();
5279             cmpv = tcg_temp_new();
5280             gen_op_mov_v_reg(s, ot, newv, reg);
5281             tcg_gen_mov_tl(cmpv, cpu_regs[R_EAX]);
5282 
5283             if (s->prefix & PREFIX_LOCK) {
5284                 if (mod == 3) {
5285                     goto illegal_op;
5286                 }
5287                 gen_lea_modrm(env, s, modrm);
5288                 tcg_gen_atomic_cmpxchg_tl(oldv, s->A0, cmpv, newv,
5289                                           s->mem_index, ot | MO_LE);
5290                 gen_op_mov_reg_v(s, ot, R_EAX, oldv);
5291             } else {
5292                 if (mod == 3) {
5293                     rm = (modrm & 7) | REX_B(s);
5294                     gen_op_mov_v_reg(s, ot, oldv, rm);
5295                 } else {
5296                     gen_lea_modrm(env, s, modrm);
5297                     gen_op_ld_v(s, ot, oldv, s->A0);
5298                     rm = 0; /* avoid warning */
5299                 }
5300                 gen_extu(ot, oldv);
5301                 gen_extu(ot, cmpv);
5302                 /* store value = (old == cmp ? new : old);  */
5303                 tcg_gen_movcond_tl(TCG_COND_EQ, newv, oldv, cmpv, newv, oldv);
5304                 if (mod == 3) {
5305                     gen_op_mov_reg_v(s, ot, R_EAX, oldv);
5306                     gen_op_mov_reg_v(s, ot, rm, newv);
5307                 } else {
5308                     /* Perform an unconditional store cycle like physical cpu;
5309                        must be before changing accumulator to ensure
5310                        idempotency if the store faults and the instruction
5311                        is restarted */
5312                     gen_op_st_v(s, ot, newv, s->A0);
5313                     gen_op_mov_reg_v(s, ot, R_EAX, oldv);
5314                 }
5315             }
5316             tcg_gen_mov_tl(cpu_cc_src, oldv);
5317             tcg_gen_mov_tl(s->cc_srcT, cmpv);
5318             tcg_gen_sub_tl(cpu_cc_dst, cmpv, oldv);
5319             set_cc_op(s, CC_OP_SUBB + ot);
5320             tcg_temp_free(oldv);
5321             tcg_temp_free(newv);
5322             tcg_temp_free(cmpv);
5323         }
5324         break;
5325     case 0x1c7: /* cmpxchg8b */
5326         modrm = x86_ldub_code(env, s);
5327         mod = (modrm >> 6) & 3;
5328         switch ((modrm >> 3) & 7) {
5329         case 1: /* CMPXCHG8, CMPXCHG16 */
5330             if (mod == 3) {
5331                 goto illegal_op;
5332             }
5333 #ifdef TARGET_X86_64
5334             if (dflag == MO_64) {
5335                 if (!(s->cpuid_ext_features & CPUID_EXT_CX16)) {
5336                     goto illegal_op;
5337                 }
5338                 gen_lea_modrm(env, s, modrm);
5339                 if ((s->prefix & PREFIX_LOCK) &&
5340                     (tb_cflags(s->base.tb) & CF_PARALLEL)) {
5341                     gen_helper_cmpxchg16b(cpu_env, s->A0);
5342                 } else {
5343                     gen_helper_cmpxchg16b_unlocked(cpu_env, s->A0);
5344                 }
5345                 set_cc_op(s, CC_OP_EFLAGS);
5346                 break;
5347             }
5348 #endif
5349             if (!(s->cpuid_features & CPUID_CX8)) {
5350                 goto illegal_op;
5351             }
5352             gen_lea_modrm(env, s, modrm);
5353             if ((s->prefix & PREFIX_LOCK) &&
5354                 (tb_cflags(s->base.tb) & CF_PARALLEL)) {
5355                 gen_helper_cmpxchg8b(cpu_env, s->A0);
5356             } else {
5357                 gen_helper_cmpxchg8b_unlocked(cpu_env, s->A0);
5358             }
5359             set_cc_op(s, CC_OP_EFLAGS);
5360             break;
5361 
5362         case 7: /* RDSEED */
5363         case 6: /* RDRAND */
5364             if (mod != 3 ||
5365                 (s->prefix & (PREFIX_LOCK | PREFIX_REPZ | PREFIX_REPNZ)) ||
5366                 !(s->cpuid_ext_features & CPUID_EXT_RDRAND)) {
5367                 goto illegal_op;
5368             }
5369             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
5370                 gen_io_start();
5371             }
5372             gen_helper_rdrand(s->T0, cpu_env);
5373             rm = (modrm & 7) | REX_B(s);
5374             gen_op_mov_reg_v(s, dflag, rm, s->T0);
5375             set_cc_op(s, CC_OP_EFLAGS);
5376             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
5377                 gen_jmp(s, s->pc - s->cs_base);
5378             }
5379             break;
5380 
5381         default:
5382             goto illegal_op;
5383         }
5384         break;
5385 
5386         /**************************/
5387         /* push/pop */
5388     case 0x50 ... 0x57: /* push */
5389         gen_op_mov_v_reg(s, MO_32, s->T0, (b & 7) | REX_B(s));
5390         gen_push_v(s, s->T0);
5391         break;
5392     case 0x58 ... 0x5f: /* pop */
5393         ot = gen_pop_T0(s);
5394         /* NOTE: order is important for pop %sp */
5395         gen_pop_update(s, ot);
5396         gen_op_mov_reg_v(s, ot, (b & 7) | REX_B(s), s->T0);
5397         break;
5398     case 0x60: /* pusha */
5399         if (CODE64(s))
5400             goto illegal_op;
5401         gen_pusha(s);
5402         break;
5403     case 0x61: /* popa */
5404         if (CODE64(s))
5405             goto illegal_op;
5406         gen_popa(s);
5407         break;
5408     case 0x68: /* push Iv */
5409     case 0x6a:
5410         ot = mo_pushpop(s, dflag);
5411         if (b == 0x68)
5412             val = insn_get(env, s, ot);
5413         else
5414             val = (int8_t)insn_get(env, s, MO_8);
5415         tcg_gen_movi_tl(s->T0, val);
5416         gen_push_v(s, s->T0);
5417         break;
5418     case 0x8f: /* pop Ev */
5419         modrm = x86_ldub_code(env, s);
5420         mod = (modrm >> 6) & 3;
5421         ot = gen_pop_T0(s);
5422         if (mod == 3) {
5423             /* NOTE: order is important for pop %sp */
5424             gen_pop_update(s, ot);
5425             rm = (modrm & 7) | REX_B(s);
5426             gen_op_mov_reg_v(s, ot, rm, s->T0);
5427         } else {
5428             /* NOTE: order is important too for MMU exceptions */
5429             s->popl_esp_hack = 1 << ot;
5430             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
5431             s->popl_esp_hack = 0;
5432             gen_pop_update(s, ot);
5433         }
5434         break;
5435     case 0xc8: /* enter */
5436         {
5437             int level;
5438             val = x86_lduw_code(env, s);
5439             level = x86_ldub_code(env, s);
5440             gen_enter(s, val, level);
5441         }
5442         break;
5443     case 0xc9: /* leave */
5444         gen_leave(s);
5445         break;
5446     case 0x06: /* push es */
5447     case 0x0e: /* push cs */
5448     case 0x16: /* push ss */
5449     case 0x1e: /* push ds */
5450         if (CODE64(s))
5451             goto illegal_op;
5452         gen_op_movl_T0_seg(s, b >> 3);
5453         gen_push_v(s, s->T0);
5454         break;
5455     case 0x1a0: /* push fs */
5456     case 0x1a8: /* push gs */
5457         gen_op_movl_T0_seg(s, (b >> 3) & 7);
5458         gen_push_v(s, s->T0);
5459         break;
5460     case 0x07: /* pop es */
5461     case 0x17: /* pop ss */
5462     case 0x1f: /* pop ds */
5463         if (CODE64(s))
5464             goto illegal_op;
5465         reg = b >> 3;
5466         ot = gen_pop_T0(s);
5467         gen_movl_seg_T0(s, reg);
5468         gen_pop_update(s, ot);
5469         /* Note that reg == R_SS in gen_movl_seg_T0 always sets is_jmp.  */
5470         if (s->base.is_jmp) {
5471             gen_jmp_im(s, s->pc - s->cs_base);
5472             if (reg == R_SS) {
5473                 s->tf = 0;
5474                 gen_eob_inhibit_irq(s, true);
5475             } else {
5476                 gen_eob(s);
5477             }
5478         }
5479         break;
5480     case 0x1a1: /* pop fs */
5481     case 0x1a9: /* pop gs */
5482         ot = gen_pop_T0(s);
5483         gen_movl_seg_T0(s, (b >> 3) & 7);
5484         gen_pop_update(s, ot);
5485         if (s->base.is_jmp) {
5486             gen_jmp_im(s, s->pc - s->cs_base);
5487             gen_eob(s);
5488         }
5489         break;
5490 
5491         /**************************/
5492         /* mov */
5493     case 0x88:
5494     case 0x89: /* mov Gv, Ev */
5495         ot = mo_b_d(b, dflag);
5496         modrm = x86_ldub_code(env, s);
5497         reg = ((modrm >> 3) & 7) | rex_r;
5498 
5499         /* generate a generic store */
5500         gen_ldst_modrm(env, s, modrm, ot, reg, 1);
5501         break;
5502     case 0xc6:
5503     case 0xc7: /* mov Ev, Iv */
5504         ot = mo_b_d(b, dflag);
5505         modrm = x86_ldub_code(env, s);
5506         mod = (modrm >> 6) & 3;
5507         if (mod != 3) {
5508             s->rip_offset = insn_const_size(ot);
5509             gen_lea_modrm(env, s, modrm);
5510         }
5511         val = insn_get(env, s, ot);
5512         tcg_gen_movi_tl(s->T0, val);
5513         if (mod != 3) {
5514             gen_op_st_v(s, ot, s->T0, s->A0);
5515         } else {
5516             gen_op_mov_reg_v(s, ot, (modrm & 7) | REX_B(s), s->T0);
5517         }
5518         break;
5519     case 0x8a:
5520     case 0x8b: /* mov Ev, Gv */
5521         ot = mo_b_d(b, dflag);
5522         modrm = x86_ldub_code(env, s);
5523         reg = ((modrm >> 3) & 7) | rex_r;
5524 
5525         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
5526         gen_op_mov_reg_v(s, ot, reg, s->T0);
5527         break;
5528     case 0x8e: /* mov seg, Gv */
5529         modrm = x86_ldub_code(env, s);
5530         reg = (modrm >> 3) & 7;
5531         if (reg >= 6 || reg == R_CS)
5532             goto illegal_op;
5533         gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
5534         gen_movl_seg_T0(s, reg);
5535         /* Note that reg == R_SS in gen_movl_seg_T0 always sets is_jmp.  */
5536         if (s->base.is_jmp) {
5537             gen_jmp_im(s, s->pc - s->cs_base);
5538             if (reg == R_SS) {
5539                 s->tf = 0;
5540                 gen_eob_inhibit_irq(s, true);
5541             } else {
5542                 gen_eob(s);
5543             }
5544         }
5545         break;
5546     case 0x8c: /* mov Gv, seg */
5547         modrm = x86_ldub_code(env, s);
5548         reg = (modrm >> 3) & 7;
5549         mod = (modrm >> 6) & 3;
5550         if (reg >= 6)
5551             goto illegal_op;
5552         gen_op_movl_T0_seg(s, reg);
5553         ot = mod == 3 ? dflag : MO_16;
5554         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
5555         break;
5556 
5557     case 0x1b6: /* movzbS Gv, Eb */
5558     case 0x1b7: /* movzwS Gv, Eb */
5559     case 0x1be: /* movsbS Gv, Eb */
5560     case 0x1bf: /* movswS Gv, Eb */
5561         {
5562             MemOp d_ot;
5563             MemOp s_ot;
5564 
5565             /* d_ot is the size of destination */
5566             d_ot = dflag;
5567             /* ot is the size of source */
5568             ot = (b & 1) + MO_8;
5569             /* s_ot is the sign+size of source */
5570             s_ot = b & 8 ? MO_SIGN | ot : ot;
5571 
5572             modrm = x86_ldub_code(env, s);
5573             reg = ((modrm >> 3) & 7) | rex_r;
5574             mod = (modrm >> 6) & 3;
5575             rm = (modrm & 7) | REX_B(s);
5576 
5577             if (mod == 3) {
5578                 if (s_ot == MO_SB && byte_reg_is_xH(s, rm)) {
5579                     tcg_gen_sextract_tl(s->T0, cpu_regs[rm - 4], 8, 8);
5580                 } else {
5581                     gen_op_mov_v_reg(s, ot, s->T0, rm);
5582                     switch (s_ot) {
5583                     case MO_UB:
5584                         tcg_gen_ext8u_tl(s->T0, s->T0);
5585                         break;
5586                     case MO_SB:
5587                         tcg_gen_ext8s_tl(s->T0, s->T0);
5588                         break;
5589                     case MO_UW:
5590                         tcg_gen_ext16u_tl(s->T0, s->T0);
5591                         break;
5592                     default:
5593                     case MO_SW:
5594                         tcg_gen_ext16s_tl(s->T0, s->T0);
5595                         break;
5596                     }
5597                 }
5598                 gen_op_mov_reg_v(s, d_ot, reg, s->T0);
5599             } else {
5600                 gen_lea_modrm(env, s, modrm);
5601                 gen_op_ld_v(s, s_ot, s->T0, s->A0);
5602                 gen_op_mov_reg_v(s, d_ot, reg, s->T0);
5603             }
5604         }
5605         break;
5606 
5607     case 0x8d: /* lea */
5608         modrm = x86_ldub_code(env, s);
5609         mod = (modrm >> 6) & 3;
5610         if (mod == 3)
5611             goto illegal_op;
5612         reg = ((modrm >> 3) & 7) | rex_r;
5613         {
5614             AddressParts a = gen_lea_modrm_0(env, s, modrm);
5615             TCGv ea = gen_lea_modrm_1(s, a);
5616             gen_lea_v_seg(s, s->aflag, ea, -1, -1);
5617             gen_op_mov_reg_v(s, dflag, reg, s->A0);
5618         }
5619         break;
5620 
5621     case 0xa0: /* mov EAX, Ov */
5622     case 0xa1:
5623     case 0xa2: /* mov Ov, EAX */
5624     case 0xa3:
5625         {
5626             target_ulong offset_addr;
5627 
5628             ot = mo_b_d(b, dflag);
5629             switch (s->aflag) {
5630 #ifdef TARGET_X86_64
5631             case MO_64:
5632                 offset_addr = x86_ldq_code(env, s);
5633                 break;
5634 #endif
5635             default:
5636                 offset_addr = insn_get(env, s, s->aflag);
5637                 break;
5638             }
5639             tcg_gen_movi_tl(s->A0, offset_addr);
5640             gen_add_A0_ds_seg(s);
5641             if ((b & 2) == 0) {
5642                 gen_op_ld_v(s, ot, s->T0, s->A0);
5643                 gen_op_mov_reg_v(s, ot, R_EAX, s->T0);
5644             } else {
5645                 gen_op_mov_v_reg(s, ot, s->T0, R_EAX);
5646                 gen_op_st_v(s, ot, s->T0, s->A0);
5647             }
5648         }
5649         break;
5650     case 0xd7: /* xlat */
5651         tcg_gen_mov_tl(s->A0, cpu_regs[R_EBX]);
5652         tcg_gen_ext8u_tl(s->T0, cpu_regs[R_EAX]);
5653         tcg_gen_add_tl(s->A0, s->A0, s->T0);
5654         gen_extu(s->aflag, s->A0);
5655         gen_add_A0_ds_seg(s);
5656         gen_op_ld_v(s, MO_8, s->T0, s->A0);
5657         gen_op_mov_reg_v(s, MO_8, R_EAX, s->T0);
5658         break;
5659     case 0xb0 ... 0xb7: /* mov R, Ib */
5660         val = insn_get(env, s, MO_8);
5661         tcg_gen_movi_tl(s->T0, val);
5662         gen_op_mov_reg_v(s, MO_8, (b & 7) | REX_B(s), s->T0);
5663         break;
5664     case 0xb8 ... 0xbf: /* mov R, Iv */
5665 #ifdef TARGET_X86_64
5666         if (dflag == MO_64) {
5667             uint64_t tmp;
5668             /* 64 bit case */
5669             tmp = x86_ldq_code(env, s);
5670             reg = (b & 7) | REX_B(s);
5671             tcg_gen_movi_tl(s->T0, tmp);
5672             gen_op_mov_reg_v(s, MO_64, reg, s->T0);
5673         } else
5674 #endif
5675         {
5676             ot = dflag;
5677             val = insn_get(env, s, ot);
5678             reg = (b & 7) | REX_B(s);
5679             tcg_gen_movi_tl(s->T0, val);
5680             gen_op_mov_reg_v(s, ot, reg, s->T0);
5681         }
5682         break;
5683 
5684     case 0x91 ... 0x97: /* xchg R, EAX */
5685     do_xchg_reg_eax:
5686         ot = dflag;
5687         reg = (b & 7) | REX_B(s);
5688         rm = R_EAX;
5689         goto do_xchg_reg;
5690     case 0x86:
5691     case 0x87: /* xchg Ev, Gv */
5692         ot = mo_b_d(b, dflag);
5693         modrm = x86_ldub_code(env, s);
5694         reg = ((modrm >> 3) & 7) | rex_r;
5695         mod = (modrm >> 6) & 3;
5696         if (mod == 3) {
5697             rm = (modrm & 7) | REX_B(s);
5698         do_xchg_reg:
5699             gen_op_mov_v_reg(s, ot, s->T0, reg);
5700             gen_op_mov_v_reg(s, ot, s->T1, rm);
5701             gen_op_mov_reg_v(s, ot, rm, s->T0);
5702             gen_op_mov_reg_v(s, ot, reg, s->T1);
5703         } else {
5704             gen_lea_modrm(env, s, modrm);
5705             gen_op_mov_v_reg(s, ot, s->T0, reg);
5706             /* for xchg, lock is implicit */
5707             tcg_gen_atomic_xchg_tl(s->T1, s->A0, s->T0,
5708                                    s->mem_index, ot | MO_LE);
5709             gen_op_mov_reg_v(s, ot, reg, s->T1);
5710         }
5711         break;
5712     case 0xc4: /* les Gv */
5713         /* In CODE64 this is VEX3; see above.  */
5714         op = R_ES;
5715         goto do_lxx;
5716     case 0xc5: /* lds Gv */
5717         /* In CODE64 this is VEX2; see above.  */
5718         op = R_DS;
5719         goto do_lxx;
5720     case 0x1b2: /* lss Gv */
5721         op = R_SS;
5722         goto do_lxx;
5723     case 0x1b4: /* lfs Gv */
5724         op = R_FS;
5725         goto do_lxx;
5726     case 0x1b5: /* lgs Gv */
5727         op = R_GS;
5728     do_lxx:
5729         ot = dflag != MO_16 ? MO_32 : MO_16;
5730         modrm = x86_ldub_code(env, s);
5731         reg = ((modrm >> 3) & 7) | rex_r;
5732         mod = (modrm >> 6) & 3;
5733         if (mod == 3)
5734             goto illegal_op;
5735         gen_lea_modrm(env, s, modrm);
5736         gen_op_ld_v(s, ot, s->T1, s->A0);
5737         gen_add_A0_im(s, 1 << ot);
5738         /* load the segment first to handle exceptions properly */
5739         gen_op_ld_v(s, MO_16, s->T0, s->A0);
5740         gen_movl_seg_T0(s, op);
5741         /* then put the data */
5742         gen_op_mov_reg_v(s, ot, reg, s->T1);
5743         if (s->base.is_jmp) {
5744             gen_jmp_im(s, s->pc - s->cs_base);
5745             gen_eob(s);
5746         }
5747         break;
5748 
5749         /************************/
5750         /* shifts */
5751     case 0xc0:
5752     case 0xc1:
5753         /* shift Ev,Ib */
5754         shift = 2;
5755     grp2:
5756         {
5757             ot = mo_b_d(b, dflag);
5758             modrm = x86_ldub_code(env, s);
5759             mod = (modrm >> 6) & 3;
5760             op = (modrm >> 3) & 7;
5761 
5762             if (mod != 3) {
5763                 if (shift == 2) {
5764                     s->rip_offset = 1;
5765                 }
5766                 gen_lea_modrm(env, s, modrm);
5767                 opreg = OR_TMP0;
5768             } else {
5769                 opreg = (modrm & 7) | REX_B(s);
5770             }
5771 
5772             /* simpler op */
5773             if (shift == 0) {
5774                 gen_shift(s, op, ot, opreg, OR_ECX);
5775             } else {
5776                 if (shift == 2) {
5777                     shift = x86_ldub_code(env, s);
5778                 }
5779                 gen_shifti(s, op, ot, opreg, shift);
5780             }
5781         }
5782         break;
5783     case 0xd0:
5784     case 0xd1:
5785         /* shift Ev,1 */
5786         shift = 1;
5787         goto grp2;
5788     case 0xd2:
5789     case 0xd3:
5790         /* shift Ev,cl */
5791         shift = 0;
5792         goto grp2;
5793 
5794     case 0x1a4: /* shld imm */
5795         op = 0;
5796         shift = 1;
5797         goto do_shiftd;
5798     case 0x1a5: /* shld cl */
5799         op = 0;
5800         shift = 0;
5801         goto do_shiftd;
5802     case 0x1ac: /* shrd imm */
5803         op = 1;
5804         shift = 1;
5805         goto do_shiftd;
5806     case 0x1ad: /* shrd cl */
5807         op = 1;
5808         shift = 0;
5809     do_shiftd:
5810         ot = dflag;
5811         modrm = x86_ldub_code(env, s);
5812         mod = (modrm >> 6) & 3;
5813         rm = (modrm & 7) | REX_B(s);
5814         reg = ((modrm >> 3) & 7) | rex_r;
5815         if (mod != 3) {
5816             gen_lea_modrm(env, s, modrm);
5817             opreg = OR_TMP0;
5818         } else {
5819             opreg = rm;
5820         }
5821         gen_op_mov_v_reg(s, ot, s->T1, reg);
5822 
5823         if (shift) {
5824             TCGv imm = tcg_const_tl(x86_ldub_code(env, s));
5825             gen_shiftd_rm_T1(s, ot, opreg, op, imm);
5826             tcg_temp_free(imm);
5827         } else {
5828             gen_shiftd_rm_T1(s, ot, opreg, op, cpu_regs[R_ECX]);
5829         }
5830         break;
5831 
5832         /************************/
5833         /* floats */
5834     case 0xd8 ... 0xdf:
5835         if (s->flags & (HF_EM_MASK | HF_TS_MASK)) {
5836             /* if CR0.EM or CR0.TS are set, generate an FPU exception */
5837             /* XXX: what to do if illegal op ? */
5838             gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
5839             break;
5840         }
5841         modrm = x86_ldub_code(env, s);
5842         mod = (modrm >> 6) & 3;
5843         rm = modrm & 7;
5844         op = ((b & 7) << 3) | ((modrm >> 3) & 7);
5845         if (mod != 3) {
5846             /* memory op */
5847             gen_lea_modrm(env, s, modrm);
5848             switch(op) {
5849             case 0x00 ... 0x07: /* fxxxs */
5850             case 0x10 ... 0x17: /* fixxxl */
5851             case 0x20 ... 0x27: /* fxxxl */
5852             case 0x30 ... 0x37: /* fixxx */
5853                 {
5854                     int op1;
5855                     op1 = op & 7;
5856 
5857                     switch(op >> 4) {
5858                     case 0:
5859                         tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5860                                             s->mem_index, MO_LEUL);
5861                         gen_helper_flds_FT0(cpu_env, s->tmp2_i32);
5862                         break;
5863                     case 1:
5864                         tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5865                                             s->mem_index, MO_LEUL);
5866                         gen_helper_fildl_FT0(cpu_env, s->tmp2_i32);
5867                         break;
5868                     case 2:
5869                         tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
5870                                             s->mem_index, MO_LEQ);
5871                         gen_helper_fldl_FT0(cpu_env, s->tmp1_i64);
5872                         break;
5873                     case 3:
5874                     default:
5875                         tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5876                                             s->mem_index, MO_LESW);
5877                         gen_helper_fildl_FT0(cpu_env, s->tmp2_i32);
5878                         break;
5879                     }
5880 
5881                     gen_helper_fp_arith_ST0_FT0(op1);
5882                     if (op1 == 3) {
5883                         /* fcomp needs pop */
5884                         gen_helper_fpop(cpu_env);
5885                     }
5886                 }
5887                 break;
5888             case 0x08: /* flds */
5889             case 0x0a: /* fsts */
5890             case 0x0b: /* fstps */
5891             case 0x18 ... 0x1b: /* fildl, fisttpl, fistl, fistpl */
5892             case 0x28 ... 0x2b: /* fldl, fisttpll, fstl, fstpl */
5893             case 0x38 ... 0x3b: /* filds, fisttps, fists, fistps */
5894                 switch(op & 7) {
5895                 case 0:
5896                     switch(op >> 4) {
5897                     case 0:
5898                         tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5899                                             s->mem_index, MO_LEUL);
5900                         gen_helper_flds_ST0(cpu_env, s->tmp2_i32);
5901                         break;
5902                     case 1:
5903                         tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5904                                             s->mem_index, MO_LEUL);
5905                         gen_helper_fildl_ST0(cpu_env, s->tmp2_i32);
5906                         break;
5907                     case 2:
5908                         tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
5909                                             s->mem_index, MO_LEQ);
5910                         gen_helper_fldl_ST0(cpu_env, s->tmp1_i64);
5911                         break;
5912                     case 3:
5913                     default:
5914                         tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5915                                             s->mem_index, MO_LESW);
5916                         gen_helper_fildl_ST0(cpu_env, s->tmp2_i32);
5917                         break;
5918                     }
5919                     break;
5920                 case 1:
5921                     /* XXX: the corresponding CPUID bit must be tested ! */
5922                     switch(op >> 4) {
5923                     case 1:
5924                         gen_helper_fisttl_ST0(s->tmp2_i32, cpu_env);
5925                         tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
5926                                             s->mem_index, MO_LEUL);
5927                         break;
5928                     case 2:
5929                         gen_helper_fisttll_ST0(s->tmp1_i64, cpu_env);
5930                         tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
5931                                             s->mem_index, MO_LEQ);
5932                         break;
5933                     case 3:
5934                     default:
5935                         gen_helper_fistt_ST0(s->tmp2_i32, cpu_env);
5936                         tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
5937                                             s->mem_index, MO_LEUW);
5938                         break;
5939                     }
5940                     gen_helper_fpop(cpu_env);
5941                     break;
5942                 default:
5943                     switch(op >> 4) {
5944                     case 0:
5945                         gen_helper_fsts_ST0(s->tmp2_i32, cpu_env);
5946                         tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
5947                                             s->mem_index, MO_LEUL);
5948                         break;
5949                     case 1:
5950                         gen_helper_fistl_ST0(s->tmp2_i32, cpu_env);
5951                         tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
5952                                             s->mem_index, MO_LEUL);
5953                         break;
5954                     case 2:
5955                         gen_helper_fstl_ST0(s->tmp1_i64, cpu_env);
5956                         tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
5957                                             s->mem_index, MO_LEQ);
5958                         break;
5959                     case 3:
5960                     default:
5961                         gen_helper_fist_ST0(s->tmp2_i32, cpu_env);
5962                         tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
5963                                             s->mem_index, MO_LEUW);
5964                         break;
5965                     }
5966                     if ((op & 7) == 3)
5967                         gen_helper_fpop(cpu_env);
5968                     break;
5969                 }
5970                 break;
5971             case 0x0c: /* fldenv mem */
5972                 gen_helper_fldenv(cpu_env, s->A0, tcg_const_i32(dflag - 1));
5973                 break;
5974             case 0x0d: /* fldcw mem */
5975                 tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5976                                     s->mem_index, MO_LEUW);
5977                 gen_helper_fldcw(cpu_env, s->tmp2_i32);
5978                 break;
5979             case 0x0e: /* fnstenv mem */
5980                 gen_helper_fstenv(cpu_env, s->A0, tcg_const_i32(dflag - 1));
5981                 break;
5982             case 0x0f: /* fnstcw mem */
5983                 gen_helper_fnstcw(s->tmp2_i32, cpu_env);
5984                 tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
5985                                     s->mem_index, MO_LEUW);
5986                 break;
5987             case 0x1d: /* fldt mem */
5988                 gen_helper_fldt_ST0(cpu_env, s->A0);
5989                 break;
5990             case 0x1f: /* fstpt mem */
5991                 gen_helper_fstt_ST0(cpu_env, s->A0);
5992                 gen_helper_fpop(cpu_env);
5993                 break;
5994             case 0x2c: /* frstor mem */
5995                 gen_helper_frstor(cpu_env, s->A0, tcg_const_i32(dflag - 1));
5996                 break;
5997             case 0x2e: /* fnsave mem */
5998                 gen_helper_fsave(cpu_env, s->A0, tcg_const_i32(dflag - 1));
5999                 break;
6000             case 0x2f: /* fnstsw mem */
6001                 gen_helper_fnstsw(s->tmp2_i32, cpu_env);
6002                 tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6003                                     s->mem_index, MO_LEUW);
6004                 break;
6005             case 0x3c: /* fbld */
6006                 gen_helper_fbld_ST0(cpu_env, s->A0);
6007                 break;
6008             case 0x3e: /* fbstp */
6009                 gen_helper_fbst_ST0(cpu_env, s->A0);
6010                 gen_helper_fpop(cpu_env);
6011                 break;
6012             case 0x3d: /* fildll */
6013                 tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ);
6014                 gen_helper_fildll_ST0(cpu_env, s->tmp1_i64);
6015                 break;
6016             case 0x3f: /* fistpll */
6017                 gen_helper_fistll_ST0(s->tmp1_i64, cpu_env);
6018                 tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ);
6019                 gen_helper_fpop(cpu_env);
6020                 break;
6021             default:
6022                 goto unknown_op;
6023             }
6024         } else {
6025             /* register float ops */
6026             opreg = rm;
6027 
6028             switch(op) {
6029             case 0x08: /* fld sti */
6030                 gen_helper_fpush(cpu_env);
6031                 gen_helper_fmov_ST0_STN(cpu_env,
6032                                         tcg_const_i32((opreg + 1) & 7));
6033                 break;
6034             case 0x09: /* fxchg sti */
6035             case 0x29: /* fxchg4 sti, undocumented op */
6036             case 0x39: /* fxchg7 sti, undocumented op */
6037                 gen_helper_fxchg_ST0_STN(cpu_env, tcg_const_i32(opreg));
6038                 break;
6039             case 0x0a: /* grp d9/2 */
6040                 switch(rm) {
6041                 case 0: /* fnop */
6042                     /* check exceptions (FreeBSD FPU probe) */
6043                     gen_helper_fwait(cpu_env);
6044                     break;
6045                 default:
6046                     goto unknown_op;
6047                 }
6048                 break;
6049             case 0x0c: /* grp d9/4 */
6050                 switch(rm) {
6051                 case 0: /* fchs */
6052                     gen_helper_fchs_ST0(cpu_env);
6053                     break;
6054                 case 1: /* fabs */
6055                     gen_helper_fabs_ST0(cpu_env);
6056                     break;
6057                 case 4: /* ftst */
6058                     gen_helper_fldz_FT0(cpu_env);
6059                     gen_helper_fcom_ST0_FT0(cpu_env);
6060                     break;
6061                 case 5: /* fxam */
6062                     gen_helper_fxam_ST0(cpu_env);
6063                     break;
6064                 default:
6065                     goto unknown_op;
6066                 }
6067                 break;
6068             case 0x0d: /* grp d9/5 */
6069                 {
6070                     switch(rm) {
6071                     case 0:
6072                         gen_helper_fpush(cpu_env);
6073                         gen_helper_fld1_ST0(cpu_env);
6074                         break;
6075                     case 1:
6076                         gen_helper_fpush(cpu_env);
6077                         gen_helper_fldl2t_ST0(cpu_env);
6078                         break;
6079                     case 2:
6080                         gen_helper_fpush(cpu_env);
6081                         gen_helper_fldl2e_ST0(cpu_env);
6082                         break;
6083                     case 3:
6084                         gen_helper_fpush(cpu_env);
6085                         gen_helper_fldpi_ST0(cpu_env);
6086                         break;
6087                     case 4:
6088                         gen_helper_fpush(cpu_env);
6089                         gen_helper_fldlg2_ST0(cpu_env);
6090                         break;
6091                     case 5:
6092                         gen_helper_fpush(cpu_env);
6093                         gen_helper_fldln2_ST0(cpu_env);
6094                         break;
6095                     case 6:
6096                         gen_helper_fpush(cpu_env);
6097                         gen_helper_fldz_ST0(cpu_env);
6098                         break;
6099                     default:
6100                         goto unknown_op;
6101                     }
6102                 }
6103                 break;
6104             case 0x0e: /* grp d9/6 */
6105                 switch(rm) {
6106                 case 0: /* f2xm1 */
6107                     gen_helper_f2xm1(cpu_env);
6108                     break;
6109                 case 1: /* fyl2x */
6110                     gen_helper_fyl2x(cpu_env);
6111                     break;
6112                 case 2: /* fptan */
6113                     gen_helper_fptan(cpu_env);
6114                     break;
6115                 case 3: /* fpatan */
6116                     gen_helper_fpatan(cpu_env);
6117                     break;
6118                 case 4: /* fxtract */
6119                     gen_helper_fxtract(cpu_env);
6120                     break;
6121                 case 5: /* fprem1 */
6122                     gen_helper_fprem1(cpu_env);
6123                     break;
6124                 case 6: /* fdecstp */
6125                     gen_helper_fdecstp(cpu_env);
6126                     break;
6127                 default:
6128                 case 7: /* fincstp */
6129                     gen_helper_fincstp(cpu_env);
6130                     break;
6131                 }
6132                 break;
6133             case 0x0f: /* grp d9/7 */
6134                 switch(rm) {
6135                 case 0: /* fprem */
6136                     gen_helper_fprem(cpu_env);
6137                     break;
6138                 case 1: /* fyl2xp1 */
6139                     gen_helper_fyl2xp1(cpu_env);
6140                     break;
6141                 case 2: /* fsqrt */
6142                     gen_helper_fsqrt(cpu_env);
6143                     break;
6144                 case 3: /* fsincos */
6145                     gen_helper_fsincos(cpu_env);
6146                     break;
6147                 case 5: /* fscale */
6148                     gen_helper_fscale(cpu_env);
6149                     break;
6150                 case 4: /* frndint */
6151                     gen_helper_frndint(cpu_env);
6152                     break;
6153                 case 6: /* fsin */
6154                     gen_helper_fsin(cpu_env);
6155                     break;
6156                 default:
6157                 case 7: /* fcos */
6158                     gen_helper_fcos(cpu_env);
6159                     break;
6160                 }
6161                 break;
6162             case 0x00: case 0x01: case 0x04 ... 0x07: /* fxxx st, sti */
6163             case 0x20: case 0x21: case 0x24 ... 0x27: /* fxxx sti, st */
6164             case 0x30: case 0x31: case 0x34 ... 0x37: /* fxxxp sti, st */
6165                 {
6166                     int op1;
6167 
6168                     op1 = op & 7;
6169                     if (op >= 0x20) {
6170                         gen_helper_fp_arith_STN_ST0(op1, opreg);
6171                         if (op >= 0x30)
6172                             gen_helper_fpop(cpu_env);
6173                     } else {
6174                         gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6175                         gen_helper_fp_arith_ST0_FT0(op1);
6176                     }
6177                 }
6178                 break;
6179             case 0x02: /* fcom */
6180             case 0x22: /* fcom2, undocumented op */
6181                 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6182                 gen_helper_fcom_ST0_FT0(cpu_env);
6183                 break;
6184             case 0x03: /* fcomp */
6185             case 0x23: /* fcomp3, undocumented op */
6186             case 0x32: /* fcomp5, undocumented op */
6187                 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6188                 gen_helper_fcom_ST0_FT0(cpu_env);
6189                 gen_helper_fpop(cpu_env);
6190                 break;
6191             case 0x15: /* da/5 */
6192                 switch(rm) {
6193                 case 1: /* fucompp */
6194                     gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(1));
6195                     gen_helper_fucom_ST0_FT0(cpu_env);
6196                     gen_helper_fpop(cpu_env);
6197                     gen_helper_fpop(cpu_env);
6198                     break;
6199                 default:
6200                     goto unknown_op;
6201                 }
6202                 break;
6203             case 0x1c:
6204                 switch(rm) {
6205                 case 0: /* feni (287 only, just do nop here) */
6206                     break;
6207                 case 1: /* fdisi (287 only, just do nop here) */
6208                     break;
6209                 case 2: /* fclex */
6210                     gen_helper_fclex(cpu_env);
6211                     break;
6212                 case 3: /* fninit */
6213                     gen_helper_fninit(cpu_env);
6214                     break;
6215                 case 4: /* fsetpm (287 only, just do nop here) */
6216                     break;
6217                 default:
6218                     goto unknown_op;
6219                 }
6220                 break;
6221             case 0x1d: /* fucomi */
6222                 if (!(s->cpuid_features & CPUID_CMOV)) {
6223                     goto illegal_op;
6224                 }
6225                 gen_update_cc_op(s);
6226                 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6227                 gen_helper_fucomi_ST0_FT0(cpu_env);
6228                 set_cc_op(s, CC_OP_EFLAGS);
6229                 break;
6230             case 0x1e: /* fcomi */
6231                 if (!(s->cpuid_features & CPUID_CMOV)) {
6232                     goto illegal_op;
6233                 }
6234                 gen_update_cc_op(s);
6235                 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6236                 gen_helper_fcomi_ST0_FT0(cpu_env);
6237                 set_cc_op(s, CC_OP_EFLAGS);
6238                 break;
6239             case 0x28: /* ffree sti */
6240                 gen_helper_ffree_STN(cpu_env, tcg_const_i32(opreg));
6241                 break;
6242             case 0x2a: /* fst sti */
6243                 gen_helper_fmov_STN_ST0(cpu_env, tcg_const_i32(opreg));
6244                 break;
6245             case 0x2b: /* fstp sti */
6246             case 0x0b: /* fstp1 sti, undocumented op */
6247             case 0x3a: /* fstp8 sti, undocumented op */
6248             case 0x3b: /* fstp9 sti, undocumented op */
6249                 gen_helper_fmov_STN_ST0(cpu_env, tcg_const_i32(opreg));
6250                 gen_helper_fpop(cpu_env);
6251                 break;
6252             case 0x2c: /* fucom st(i) */
6253                 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6254                 gen_helper_fucom_ST0_FT0(cpu_env);
6255                 break;
6256             case 0x2d: /* fucomp st(i) */
6257                 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6258                 gen_helper_fucom_ST0_FT0(cpu_env);
6259                 gen_helper_fpop(cpu_env);
6260                 break;
6261             case 0x33: /* de/3 */
6262                 switch(rm) {
6263                 case 1: /* fcompp */
6264                     gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(1));
6265                     gen_helper_fcom_ST0_FT0(cpu_env);
6266                     gen_helper_fpop(cpu_env);
6267                     gen_helper_fpop(cpu_env);
6268                     break;
6269                 default:
6270                     goto unknown_op;
6271                 }
6272                 break;
6273             case 0x38: /* ffreep sti, undocumented op */
6274                 gen_helper_ffree_STN(cpu_env, tcg_const_i32(opreg));
6275                 gen_helper_fpop(cpu_env);
6276                 break;
6277             case 0x3c: /* df/4 */
6278                 switch(rm) {
6279                 case 0:
6280                     gen_helper_fnstsw(s->tmp2_i32, cpu_env);
6281                     tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
6282                     gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
6283                     break;
6284                 default:
6285                     goto unknown_op;
6286                 }
6287                 break;
6288             case 0x3d: /* fucomip */
6289                 if (!(s->cpuid_features & CPUID_CMOV)) {
6290                     goto illegal_op;
6291                 }
6292                 gen_update_cc_op(s);
6293                 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6294                 gen_helper_fucomi_ST0_FT0(cpu_env);
6295                 gen_helper_fpop(cpu_env);
6296                 set_cc_op(s, CC_OP_EFLAGS);
6297                 break;
6298             case 0x3e: /* fcomip */
6299                 if (!(s->cpuid_features & CPUID_CMOV)) {
6300                     goto illegal_op;
6301                 }
6302                 gen_update_cc_op(s);
6303                 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6304                 gen_helper_fcomi_ST0_FT0(cpu_env);
6305                 gen_helper_fpop(cpu_env);
6306                 set_cc_op(s, CC_OP_EFLAGS);
6307                 break;
6308             case 0x10 ... 0x13: /* fcmovxx */
6309             case 0x18 ... 0x1b:
6310                 {
6311                     int op1;
6312                     TCGLabel *l1;
6313                     static const uint8_t fcmov_cc[8] = {
6314                         (JCC_B << 1),
6315                         (JCC_Z << 1),
6316                         (JCC_BE << 1),
6317                         (JCC_P << 1),
6318                     };
6319 
6320                     if (!(s->cpuid_features & CPUID_CMOV)) {
6321                         goto illegal_op;
6322                     }
6323                     op1 = fcmov_cc[op & 3] | (((op >> 3) & 1) ^ 1);
6324                     l1 = gen_new_label();
6325                     gen_jcc1_noeob(s, op1, l1);
6326                     gen_helper_fmov_ST0_STN(cpu_env, tcg_const_i32(opreg));
6327                     gen_set_label(l1);
6328                 }
6329                 break;
6330             default:
6331                 goto unknown_op;
6332             }
6333         }
6334         break;
6335         /************************/
6336         /* string ops */
6337 
6338     case 0xa4: /* movsS */
6339     case 0xa5:
6340         ot = mo_b_d(b, dflag);
6341         if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6342             gen_repz_movs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6343         } else {
6344             gen_movs(s, ot);
6345         }
6346         break;
6347 
6348     case 0xaa: /* stosS */
6349     case 0xab:
6350         ot = mo_b_d(b, dflag);
6351         if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6352             gen_repz_stos(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6353         } else {
6354             gen_stos(s, ot);
6355         }
6356         break;
6357     case 0xac: /* lodsS */
6358     case 0xad:
6359         ot = mo_b_d(b, dflag);
6360         if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6361             gen_repz_lods(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6362         } else {
6363             gen_lods(s, ot);
6364         }
6365         break;
6366     case 0xae: /* scasS */
6367     case 0xaf:
6368         ot = mo_b_d(b, dflag);
6369         if (prefixes & PREFIX_REPNZ) {
6370             gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1);
6371         } else if (prefixes & PREFIX_REPZ) {
6372             gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0);
6373         } else {
6374             gen_scas(s, ot);
6375         }
6376         break;
6377 
6378     case 0xa6: /* cmpsS */
6379     case 0xa7:
6380         ot = mo_b_d(b, dflag);
6381         if (prefixes & PREFIX_REPNZ) {
6382             gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1);
6383         } else if (prefixes & PREFIX_REPZ) {
6384             gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0);
6385         } else {
6386             gen_cmps(s, ot);
6387         }
6388         break;
6389     case 0x6c: /* insS */
6390     case 0x6d:
6391         ot = mo_b_d32(b, dflag);
6392         tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]);
6393         gen_check_io(s, ot, pc_start - s->cs_base,
6394                      SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes) | 4);
6395         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6396             gen_io_start();
6397         }
6398         if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6399             gen_repz_ins(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6400             /* jump generated by gen_repz_ins */
6401         } else {
6402             gen_ins(s, ot);
6403             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6404                 gen_jmp(s, s->pc - s->cs_base);
6405             }
6406         }
6407         break;
6408     case 0x6e: /* outsS */
6409     case 0x6f:
6410         ot = mo_b_d32(b, dflag);
6411         tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]);
6412         gen_check_io(s, ot, pc_start - s->cs_base,
6413                      svm_is_rep(prefixes) | 4);
6414         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6415             gen_io_start();
6416         }
6417         if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6418             gen_repz_outs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6419             /* jump generated by gen_repz_outs */
6420         } else {
6421             gen_outs(s, ot);
6422             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6423                 gen_jmp(s, s->pc - s->cs_base);
6424             }
6425         }
6426         break;
6427 
6428         /************************/
6429         /* port I/O */
6430 
6431     case 0xe4:
6432     case 0xe5:
6433         ot = mo_b_d32(b, dflag);
6434         val = x86_ldub_code(env, s);
6435         tcg_gen_movi_tl(s->T0, val);
6436         gen_check_io(s, ot, pc_start - s->cs_base,
6437                      SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes));
6438         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6439             gen_io_start();
6440         }
6441         tcg_gen_movi_i32(s->tmp2_i32, val);
6442         gen_helper_in_func(ot, s->T1, s->tmp2_i32);
6443         gen_op_mov_reg_v(s, ot, R_EAX, s->T1);
6444         gen_bpt_io(s, s->tmp2_i32, ot);
6445         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6446             gen_jmp(s, s->pc - s->cs_base);
6447         }
6448         break;
6449     case 0xe6:
6450     case 0xe7:
6451         ot = mo_b_d32(b, dflag);
6452         val = x86_ldub_code(env, s);
6453         tcg_gen_movi_tl(s->T0, val);
6454         gen_check_io(s, ot, pc_start - s->cs_base,
6455                      svm_is_rep(prefixes));
6456         gen_op_mov_v_reg(s, ot, s->T1, R_EAX);
6457 
6458         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6459             gen_io_start();
6460         }
6461         tcg_gen_movi_i32(s->tmp2_i32, val);
6462         tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
6463         gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32);
6464         gen_bpt_io(s, s->tmp2_i32, ot);
6465         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6466             gen_jmp(s, s->pc - s->cs_base);
6467         }
6468         break;
6469     case 0xec:
6470     case 0xed:
6471         ot = mo_b_d32(b, dflag);
6472         tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]);
6473         gen_check_io(s, ot, pc_start - s->cs_base,
6474                      SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes));
6475         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6476             gen_io_start();
6477         }
6478         tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
6479         gen_helper_in_func(ot, s->T1, s->tmp2_i32);
6480         gen_op_mov_reg_v(s, ot, R_EAX, s->T1);
6481         gen_bpt_io(s, s->tmp2_i32, ot);
6482         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6483             gen_jmp(s, s->pc - s->cs_base);
6484         }
6485         break;
6486     case 0xee:
6487     case 0xef:
6488         ot = mo_b_d32(b, dflag);
6489         tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]);
6490         gen_check_io(s, ot, pc_start - s->cs_base,
6491                      svm_is_rep(prefixes));
6492         gen_op_mov_v_reg(s, ot, s->T1, R_EAX);
6493 
6494         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6495             gen_io_start();
6496         }
6497         tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
6498         tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
6499         gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32);
6500         gen_bpt_io(s, s->tmp2_i32, ot);
6501         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6502             gen_jmp(s, s->pc - s->cs_base);
6503         }
6504         break;
6505 
6506         /************************/
6507         /* control */
6508     case 0xc2: /* ret im */
6509         val = x86_ldsw_code(env, s);
6510         ot = gen_pop_T0(s);
6511         gen_stack_update(s, val + (1 << ot));
6512         /* Note that gen_pop_T0 uses a zero-extending load.  */
6513         gen_op_jmp_v(s->T0);
6514         gen_bnd_jmp(s);
6515         gen_jr(s, s->T0);
6516         break;
6517     case 0xc3: /* ret */
6518         ot = gen_pop_T0(s);
6519         gen_pop_update(s, ot);
6520         /* Note that gen_pop_T0 uses a zero-extending load.  */
6521         gen_op_jmp_v(s->T0);
6522         gen_bnd_jmp(s);
6523         gen_jr(s, s->T0);
6524         break;
6525     case 0xca: /* lret im */
6526         val = x86_ldsw_code(env, s);
6527     do_lret:
6528         if (s->pe && !s->vm86) {
6529             gen_update_cc_op(s);
6530             gen_jmp_im(s, pc_start - s->cs_base);
6531             gen_helper_lret_protected(cpu_env, tcg_const_i32(dflag - 1),
6532                                       tcg_const_i32(val));
6533         } else {
6534             gen_stack_A0(s);
6535             /* pop offset */
6536             gen_op_ld_v(s, dflag, s->T0, s->A0);
6537             /* NOTE: keeping EIP updated is not a problem in case of
6538                exception */
6539             gen_op_jmp_v(s->T0);
6540             /* pop selector */
6541             gen_add_A0_im(s, 1 << dflag);
6542             gen_op_ld_v(s, dflag, s->T0, s->A0);
6543             gen_op_movl_seg_T0_vm(s, R_CS);
6544             /* add stack offset */
6545             gen_stack_update(s, val + (2 << dflag));
6546         }
6547         gen_eob(s);
6548         break;
6549     case 0xcb: /* lret */
6550         val = 0;
6551         goto do_lret;
6552     case 0xcf: /* iret */
6553         gen_svm_check_intercept(s, pc_start, SVM_EXIT_IRET);
6554         if (!s->pe) {
6555             /* real mode */
6556             gen_helper_iret_real(cpu_env, tcg_const_i32(dflag - 1));
6557             set_cc_op(s, CC_OP_EFLAGS);
6558         } else if (s->vm86) {
6559             if (s->iopl != 3) {
6560                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6561             } else {
6562                 gen_helper_iret_real(cpu_env, tcg_const_i32(dflag - 1));
6563                 set_cc_op(s, CC_OP_EFLAGS);
6564             }
6565         } else {
6566             gen_helper_iret_protected(cpu_env, tcg_const_i32(dflag - 1),
6567                                       tcg_const_i32(s->pc - s->cs_base));
6568             set_cc_op(s, CC_OP_EFLAGS);
6569         }
6570         gen_eob(s);
6571         break;
6572     case 0xe8: /* call im */
6573         {
6574             if (dflag != MO_16) {
6575                 tval = (int32_t)insn_get(env, s, MO_32);
6576             } else {
6577                 tval = (int16_t)insn_get(env, s, MO_16);
6578             }
6579             next_eip = s->pc - s->cs_base;
6580             tval += next_eip;
6581             if (dflag == MO_16) {
6582                 tval &= 0xffff;
6583             } else if (!CODE64(s)) {
6584                 tval &= 0xffffffff;
6585             }
6586             tcg_gen_movi_tl(s->T0, next_eip);
6587             gen_push_v(s, s->T0);
6588             gen_bnd_jmp(s);
6589             gen_jmp(s, tval);
6590         }
6591         break;
6592     case 0x9a: /* lcall im */
6593         {
6594             unsigned int selector, offset;
6595 
6596             if (CODE64(s))
6597                 goto illegal_op;
6598             ot = dflag;
6599             offset = insn_get(env, s, ot);
6600             selector = insn_get(env, s, MO_16);
6601 
6602             tcg_gen_movi_tl(s->T0, selector);
6603             tcg_gen_movi_tl(s->T1, offset);
6604         }
6605         goto do_lcall;
6606     case 0xe9: /* jmp im */
6607         if (dflag != MO_16) {
6608             tval = (int32_t)insn_get(env, s, MO_32);
6609         } else {
6610             tval = (int16_t)insn_get(env, s, MO_16);
6611         }
6612         tval += s->pc - s->cs_base;
6613         if (dflag == MO_16) {
6614             tval &= 0xffff;
6615         } else if (!CODE64(s)) {
6616             tval &= 0xffffffff;
6617         }
6618         gen_bnd_jmp(s);
6619         gen_jmp(s, tval);
6620         break;
6621     case 0xea: /* ljmp im */
6622         {
6623             unsigned int selector, offset;
6624 
6625             if (CODE64(s))
6626                 goto illegal_op;
6627             ot = dflag;
6628             offset = insn_get(env, s, ot);
6629             selector = insn_get(env, s, MO_16);
6630 
6631             tcg_gen_movi_tl(s->T0, selector);
6632             tcg_gen_movi_tl(s->T1, offset);
6633         }
6634         goto do_ljmp;
6635     case 0xeb: /* jmp Jb */
6636         tval = (int8_t)insn_get(env, s, MO_8);
6637         tval += s->pc - s->cs_base;
6638         if (dflag == MO_16) {
6639             tval &= 0xffff;
6640         }
6641         gen_jmp(s, tval);
6642         break;
6643     case 0x70 ... 0x7f: /* jcc Jb */
6644         tval = (int8_t)insn_get(env, s, MO_8);
6645         goto do_jcc;
6646     case 0x180 ... 0x18f: /* jcc Jv */
6647         if (dflag != MO_16) {
6648             tval = (int32_t)insn_get(env, s, MO_32);
6649         } else {
6650             tval = (int16_t)insn_get(env, s, MO_16);
6651         }
6652     do_jcc:
6653         next_eip = s->pc - s->cs_base;
6654         tval += next_eip;
6655         if (dflag == MO_16) {
6656             tval &= 0xffff;
6657         }
6658         gen_bnd_jmp(s);
6659         gen_jcc(s, b, tval, next_eip);
6660         break;
6661 
6662     case 0x190 ... 0x19f: /* setcc Gv */
6663         modrm = x86_ldub_code(env, s);
6664         gen_setcc1(s, b, s->T0);
6665         gen_ldst_modrm(env, s, modrm, MO_8, OR_TMP0, 1);
6666         break;
6667     case 0x140 ... 0x14f: /* cmov Gv, Ev */
6668         if (!(s->cpuid_features & CPUID_CMOV)) {
6669             goto illegal_op;
6670         }
6671         ot = dflag;
6672         modrm = x86_ldub_code(env, s);
6673         reg = ((modrm >> 3) & 7) | rex_r;
6674         gen_cmovcc1(env, s, ot, b, modrm, reg);
6675         break;
6676 
6677         /************************/
6678         /* flags */
6679     case 0x9c: /* pushf */
6680         gen_svm_check_intercept(s, pc_start, SVM_EXIT_PUSHF);
6681         if (s->vm86 && s->iopl != 3) {
6682             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6683         } else {
6684             gen_update_cc_op(s);
6685             gen_helper_read_eflags(s->T0, cpu_env);
6686             gen_push_v(s, s->T0);
6687         }
6688         break;
6689     case 0x9d: /* popf */
6690         gen_svm_check_intercept(s, pc_start, SVM_EXIT_POPF);
6691         if (s->vm86 && s->iopl != 3) {
6692             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6693         } else {
6694             ot = gen_pop_T0(s);
6695             if (s->cpl == 0) {
6696                 if (dflag != MO_16) {
6697                     gen_helper_write_eflags(cpu_env, s->T0,
6698                                             tcg_const_i32((TF_MASK | AC_MASK |
6699                                                            ID_MASK | NT_MASK |
6700                                                            IF_MASK |
6701                                                            IOPL_MASK)));
6702                 } else {
6703                     gen_helper_write_eflags(cpu_env, s->T0,
6704                                             tcg_const_i32((TF_MASK | AC_MASK |
6705                                                            ID_MASK | NT_MASK |
6706                                                            IF_MASK | IOPL_MASK)
6707                                                           & 0xffff));
6708                 }
6709             } else {
6710                 if (s->cpl <= s->iopl) {
6711                     if (dflag != MO_16) {
6712                         gen_helper_write_eflags(cpu_env, s->T0,
6713                                                 tcg_const_i32((TF_MASK |
6714                                                                AC_MASK |
6715                                                                ID_MASK |
6716                                                                NT_MASK |
6717                                                                IF_MASK)));
6718                     } else {
6719                         gen_helper_write_eflags(cpu_env, s->T0,
6720                                                 tcg_const_i32((TF_MASK |
6721                                                                AC_MASK |
6722                                                                ID_MASK |
6723                                                                NT_MASK |
6724                                                                IF_MASK)
6725                                                               & 0xffff));
6726                     }
6727                 } else {
6728                     if (dflag != MO_16) {
6729                         gen_helper_write_eflags(cpu_env, s->T0,
6730                                            tcg_const_i32((TF_MASK | AC_MASK |
6731                                                           ID_MASK | NT_MASK)));
6732                     } else {
6733                         gen_helper_write_eflags(cpu_env, s->T0,
6734                                            tcg_const_i32((TF_MASK | AC_MASK |
6735                                                           ID_MASK | NT_MASK)
6736                                                          & 0xffff));
6737                     }
6738                 }
6739             }
6740             gen_pop_update(s, ot);
6741             set_cc_op(s, CC_OP_EFLAGS);
6742             /* abort translation because TF/AC flag may change */
6743             gen_jmp_im(s, s->pc - s->cs_base);
6744             gen_eob(s);
6745         }
6746         break;
6747     case 0x9e: /* sahf */
6748         if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
6749             goto illegal_op;
6750         gen_op_mov_v_reg(s, MO_8, s->T0, R_AH);
6751         gen_compute_eflags(s);
6752         tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, CC_O);
6753         tcg_gen_andi_tl(s->T0, s->T0, CC_S | CC_Z | CC_A | CC_P | CC_C);
6754         tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, s->T0);
6755         break;
6756     case 0x9f: /* lahf */
6757         if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
6758             goto illegal_op;
6759         gen_compute_eflags(s);
6760         /* Note: gen_compute_eflags() only gives the condition codes */
6761         tcg_gen_ori_tl(s->T0, cpu_cc_src, 0x02);
6762         gen_op_mov_reg_v(s, MO_8, R_AH, s->T0);
6763         break;
6764     case 0xf5: /* cmc */
6765         gen_compute_eflags(s);
6766         tcg_gen_xori_tl(cpu_cc_src, cpu_cc_src, CC_C);
6767         break;
6768     case 0xf8: /* clc */
6769         gen_compute_eflags(s);
6770         tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_C);
6771         break;
6772     case 0xf9: /* stc */
6773         gen_compute_eflags(s);
6774         tcg_gen_ori_tl(cpu_cc_src, cpu_cc_src, CC_C);
6775         break;
6776     case 0xfc: /* cld */
6777         tcg_gen_movi_i32(s->tmp2_i32, 1);
6778         tcg_gen_st_i32(s->tmp2_i32, cpu_env, offsetof(CPUX86State, df));
6779         break;
6780     case 0xfd: /* std */
6781         tcg_gen_movi_i32(s->tmp2_i32, -1);
6782         tcg_gen_st_i32(s->tmp2_i32, cpu_env, offsetof(CPUX86State, df));
6783         break;
6784 
6785         /************************/
6786         /* bit operations */
6787     case 0x1ba: /* bt/bts/btr/btc Gv, im */
6788         ot = dflag;
6789         modrm = x86_ldub_code(env, s);
6790         op = (modrm >> 3) & 7;
6791         mod = (modrm >> 6) & 3;
6792         rm = (modrm & 7) | REX_B(s);
6793         if (mod != 3) {
6794             s->rip_offset = 1;
6795             gen_lea_modrm(env, s, modrm);
6796             if (!(s->prefix & PREFIX_LOCK)) {
6797                 gen_op_ld_v(s, ot, s->T0, s->A0);
6798             }
6799         } else {
6800             gen_op_mov_v_reg(s, ot, s->T0, rm);
6801         }
6802         /* load shift */
6803         val = x86_ldub_code(env, s);
6804         tcg_gen_movi_tl(s->T1, val);
6805         if (op < 4)
6806             goto unknown_op;
6807         op -= 4;
6808         goto bt_op;
6809     case 0x1a3: /* bt Gv, Ev */
6810         op = 0;
6811         goto do_btx;
6812     case 0x1ab: /* bts */
6813         op = 1;
6814         goto do_btx;
6815     case 0x1b3: /* btr */
6816         op = 2;
6817         goto do_btx;
6818     case 0x1bb: /* btc */
6819         op = 3;
6820     do_btx:
6821         ot = dflag;
6822         modrm = x86_ldub_code(env, s);
6823         reg = ((modrm >> 3) & 7) | rex_r;
6824         mod = (modrm >> 6) & 3;
6825         rm = (modrm & 7) | REX_B(s);
6826         gen_op_mov_v_reg(s, MO_32, s->T1, reg);
6827         if (mod != 3) {
6828             AddressParts a = gen_lea_modrm_0(env, s, modrm);
6829             /* specific case: we need to add a displacement */
6830             gen_exts(ot, s->T1);
6831             tcg_gen_sari_tl(s->tmp0, s->T1, 3 + ot);
6832             tcg_gen_shli_tl(s->tmp0, s->tmp0, ot);
6833             tcg_gen_add_tl(s->A0, gen_lea_modrm_1(s, a), s->tmp0);
6834             gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
6835             if (!(s->prefix & PREFIX_LOCK)) {
6836                 gen_op_ld_v(s, ot, s->T0, s->A0);
6837             }
6838         } else {
6839             gen_op_mov_v_reg(s, ot, s->T0, rm);
6840         }
6841     bt_op:
6842         tcg_gen_andi_tl(s->T1, s->T1, (1 << (3 + ot)) - 1);
6843         tcg_gen_movi_tl(s->tmp0, 1);
6844         tcg_gen_shl_tl(s->tmp0, s->tmp0, s->T1);
6845         if (s->prefix & PREFIX_LOCK) {
6846             switch (op) {
6847             case 0: /* bt */
6848                 /* Needs no atomic ops; we surpressed the normal
6849                    memory load for LOCK above so do it now.  */
6850                 gen_op_ld_v(s, ot, s->T0, s->A0);
6851                 break;
6852             case 1: /* bts */
6853                 tcg_gen_atomic_fetch_or_tl(s->T0, s->A0, s->tmp0,
6854                                            s->mem_index, ot | MO_LE);
6855                 break;
6856             case 2: /* btr */
6857                 tcg_gen_not_tl(s->tmp0, s->tmp0);
6858                 tcg_gen_atomic_fetch_and_tl(s->T0, s->A0, s->tmp0,
6859                                             s->mem_index, ot | MO_LE);
6860                 break;
6861             default:
6862             case 3: /* btc */
6863                 tcg_gen_atomic_fetch_xor_tl(s->T0, s->A0, s->tmp0,
6864                                             s->mem_index, ot | MO_LE);
6865                 break;
6866             }
6867             tcg_gen_shr_tl(s->tmp4, s->T0, s->T1);
6868         } else {
6869             tcg_gen_shr_tl(s->tmp4, s->T0, s->T1);
6870             switch (op) {
6871             case 0: /* bt */
6872                 /* Data already loaded; nothing to do.  */
6873                 break;
6874             case 1: /* bts */
6875                 tcg_gen_or_tl(s->T0, s->T0, s->tmp0);
6876                 break;
6877             case 2: /* btr */
6878                 tcg_gen_andc_tl(s->T0, s->T0, s->tmp0);
6879                 break;
6880             default:
6881             case 3: /* btc */
6882                 tcg_gen_xor_tl(s->T0, s->T0, s->tmp0);
6883                 break;
6884             }
6885             if (op != 0) {
6886                 if (mod != 3) {
6887                     gen_op_st_v(s, ot, s->T0, s->A0);
6888                 } else {
6889                     gen_op_mov_reg_v(s, ot, rm, s->T0);
6890                 }
6891             }
6892         }
6893 
6894         /* Delay all CC updates until after the store above.  Note that
6895            C is the result of the test, Z is unchanged, and the others
6896            are all undefined.  */
6897         switch (s->cc_op) {
6898         case CC_OP_MULB ... CC_OP_MULQ:
6899         case CC_OP_ADDB ... CC_OP_ADDQ:
6900         case CC_OP_ADCB ... CC_OP_ADCQ:
6901         case CC_OP_SUBB ... CC_OP_SUBQ:
6902         case CC_OP_SBBB ... CC_OP_SBBQ:
6903         case CC_OP_LOGICB ... CC_OP_LOGICQ:
6904         case CC_OP_INCB ... CC_OP_INCQ:
6905         case CC_OP_DECB ... CC_OP_DECQ:
6906         case CC_OP_SHLB ... CC_OP_SHLQ:
6907         case CC_OP_SARB ... CC_OP_SARQ:
6908         case CC_OP_BMILGB ... CC_OP_BMILGQ:
6909             /* Z was going to be computed from the non-zero status of CC_DST.
6910                We can get that same Z value (and the new C value) by leaving
6911                CC_DST alone, setting CC_SRC, and using a CC_OP_SAR of the
6912                same width.  */
6913             tcg_gen_mov_tl(cpu_cc_src, s->tmp4);
6914             set_cc_op(s, ((s->cc_op - CC_OP_MULB) & 3) + CC_OP_SARB);
6915             break;
6916         default:
6917             /* Otherwise, generate EFLAGS and replace the C bit.  */
6918             gen_compute_eflags(s);
6919             tcg_gen_deposit_tl(cpu_cc_src, cpu_cc_src, s->tmp4,
6920                                ctz32(CC_C), 1);
6921             break;
6922         }
6923         break;
6924     case 0x1bc: /* bsf / tzcnt */
6925     case 0x1bd: /* bsr / lzcnt */
6926         ot = dflag;
6927         modrm = x86_ldub_code(env, s);
6928         reg = ((modrm >> 3) & 7) | rex_r;
6929         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
6930         gen_extu(ot, s->T0);
6931 
6932         /* Note that lzcnt and tzcnt are in different extensions.  */
6933         if ((prefixes & PREFIX_REPZ)
6934             && (b & 1
6935                 ? s->cpuid_ext3_features & CPUID_EXT3_ABM
6936                 : s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)) {
6937             int size = 8 << ot;
6938             /* For lzcnt/tzcnt, C bit is defined related to the input. */
6939             tcg_gen_mov_tl(cpu_cc_src, s->T0);
6940             if (b & 1) {
6941                 /* For lzcnt, reduce the target_ulong result by the
6942                    number of zeros that we expect to find at the top.  */
6943                 tcg_gen_clzi_tl(s->T0, s->T0, TARGET_LONG_BITS);
6944                 tcg_gen_subi_tl(s->T0, s->T0, TARGET_LONG_BITS - size);
6945             } else {
6946                 /* For tzcnt, a zero input must return the operand size.  */
6947                 tcg_gen_ctzi_tl(s->T0, s->T0, size);
6948             }
6949             /* For lzcnt/tzcnt, Z bit is defined related to the result.  */
6950             gen_op_update1_cc(s);
6951             set_cc_op(s, CC_OP_BMILGB + ot);
6952         } else {
6953             /* For bsr/bsf, only the Z bit is defined and it is related
6954                to the input and not the result.  */
6955             tcg_gen_mov_tl(cpu_cc_dst, s->T0);
6956             set_cc_op(s, CC_OP_LOGICB + ot);
6957 
6958             /* ??? The manual says that the output is undefined when the
6959                input is zero, but real hardware leaves it unchanged, and
6960                real programs appear to depend on that.  Accomplish this
6961                by passing the output as the value to return upon zero.  */
6962             if (b & 1) {
6963                 /* For bsr, return the bit index of the first 1 bit,
6964                    not the count of leading zeros.  */
6965                 tcg_gen_xori_tl(s->T1, cpu_regs[reg], TARGET_LONG_BITS - 1);
6966                 tcg_gen_clz_tl(s->T0, s->T0, s->T1);
6967                 tcg_gen_xori_tl(s->T0, s->T0, TARGET_LONG_BITS - 1);
6968             } else {
6969                 tcg_gen_ctz_tl(s->T0, s->T0, cpu_regs[reg]);
6970             }
6971         }
6972         gen_op_mov_reg_v(s, ot, reg, s->T0);
6973         break;
6974         /************************/
6975         /* bcd */
6976     case 0x27: /* daa */
6977         if (CODE64(s))
6978             goto illegal_op;
6979         gen_update_cc_op(s);
6980         gen_helper_daa(cpu_env);
6981         set_cc_op(s, CC_OP_EFLAGS);
6982         break;
6983     case 0x2f: /* das */
6984         if (CODE64(s))
6985             goto illegal_op;
6986         gen_update_cc_op(s);
6987         gen_helper_das(cpu_env);
6988         set_cc_op(s, CC_OP_EFLAGS);
6989         break;
6990     case 0x37: /* aaa */
6991         if (CODE64(s))
6992             goto illegal_op;
6993         gen_update_cc_op(s);
6994         gen_helper_aaa(cpu_env);
6995         set_cc_op(s, CC_OP_EFLAGS);
6996         break;
6997     case 0x3f: /* aas */
6998         if (CODE64(s))
6999             goto illegal_op;
7000         gen_update_cc_op(s);
7001         gen_helper_aas(cpu_env);
7002         set_cc_op(s, CC_OP_EFLAGS);
7003         break;
7004     case 0xd4: /* aam */
7005         if (CODE64(s))
7006             goto illegal_op;
7007         val = x86_ldub_code(env, s);
7008         if (val == 0) {
7009             gen_exception(s, EXCP00_DIVZ, pc_start - s->cs_base);
7010         } else {
7011             gen_helper_aam(cpu_env, tcg_const_i32(val));
7012             set_cc_op(s, CC_OP_LOGICB);
7013         }
7014         break;
7015     case 0xd5: /* aad */
7016         if (CODE64(s))
7017             goto illegal_op;
7018         val = x86_ldub_code(env, s);
7019         gen_helper_aad(cpu_env, tcg_const_i32(val));
7020         set_cc_op(s, CC_OP_LOGICB);
7021         break;
7022         /************************/
7023         /* misc */
7024     case 0x90: /* nop */
7025         /* XXX: correct lock test for all insn */
7026         if (prefixes & PREFIX_LOCK) {
7027             goto illegal_op;
7028         }
7029         /* If REX_B is set, then this is xchg eax, r8d, not a nop.  */
7030         if (REX_B(s)) {
7031             goto do_xchg_reg_eax;
7032         }
7033         if (prefixes & PREFIX_REPZ) {
7034             gen_update_cc_op(s);
7035             gen_jmp_im(s, pc_start - s->cs_base);
7036             gen_helper_pause(cpu_env, tcg_const_i32(s->pc - pc_start));
7037             s->base.is_jmp = DISAS_NORETURN;
7038         }
7039         break;
7040     case 0x9b: /* fwait */
7041         if ((s->flags & (HF_MP_MASK | HF_TS_MASK)) ==
7042             (HF_MP_MASK | HF_TS_MASK)) {
7043             gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
7044         } else {
7045             gen_helper_fwait(cpu_env);
7046         }
7047         break;
7048     case 0xcc: /* int3 */
7049         gen_interrupt(s, EXCP03_INT3, pc_start - s->cs_base, s->pc - s->cs_base);
7050         break;
7051     case 0xcd: /* int N */
7052         val = x86_ldub_code(env, s);
7053         if (s->vm86 && s->iopl != 3) {
7054             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7055         } else {
7056             gen_interrupt(s, val, pc_start - s->cs_base, s->pc - s->cs_base);
7057         }
7058         break;
7059     case 0xce: /* into */
7060         if (CODE64(s))
7061             goto illegal_op;
7062         gen_update_cc_op(s);
7063         gen_jmp_im(s, pc_start - s->cs_base);
7064         gen_helper_into(cpu_env, tcg_const_i32(s->pc - pc_start));
7065         break;
7066 #ifdef WANT_ICEBP
7067     case 0xf1: /* icebp (undocumented, exits to external debugger) */
7068         gen_svm_check_intercept(s, pc_start, SVM_EXIT_ICEBP);
7069         gen_debug(s, pc_start - s->cs_base);
7070         break;
7071 #endif
7072     case 0xfa: /* cli */
7073         if (!s->vm86) {
7074             if (s->cpl <= s->iopl) {
7075                 gen_helper_cli(cpu_env);
7076             } else {
7077                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7078             }
7079         } else {
7080             if (s->iopl == 3) {
7081                 gen_helper_cli(cpu_env);
7082             } else {
7083                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7084             }
7085         }
7086         break;
7087     case 0xfb: /* sti */
7088         if (s->vm86 ? s->iopl == 3 : s->cpl <= s->iopl) {
7089             gen_helper_sti(cpu_env);
7090             /* interruptions are enabled only the first insn after sti */
7091             gen_jmp_im(s, s->pc - s->cs_base);
7092             gen_eob_inhibit_irq(s, true);
7093         } else {
7094             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7095         }
7096         break;
7097     case 0x62: /* bound */
7098         if (CODE64(s))
7099             goto illegal_op;
7100         ot = dflag;
7101         modrm = x86_ldub_code(env, s);
7102         reg = (modrm >> 3) & 7;
7103         mod = (modrm >> 6) & 3;
7104         if (mod == 3)
7105             goto illegal_op;
7106         gen_op_mov_v_reg(s, ot, s->T0, reg);
7107         gen_lea_modrm(env, s, modrm);
7108         tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
7109         if (ot == MO_16) {
7110             gen_helper_boundw(cpu_env, s->A0, s->tmp2_i32);
7111         } else {
7112             gen_helper_boundl(cpu_env, s->A0, s->tmp2_i32);
7113         }
7114         break;
7115     case 0x1c8 ... 0x1cf: /* bswap reg */
7116         reg = (b & 7) | REX_B(s);
7117 #ifdef TARGET_X86_64
7118         if (dflag == MO_64) {
7119             gen_op_mov_v_reg(s, MO_64, s->T0, reg);
7120             tcg_gen_bswap64_i64(s->T0, s->T0);
7121             gen_op_mov_reg_v(s, MO_64, reg, s->T0);
7122         } else
7123 #endif
7124         {
7125             gen_op_mov_v_reg(s, MO_32, s->T0, reg);
7126             tcg_gen_ext32u_tl(s->T0, s->T0);
7127             tcg_gen_bswap32_tl(s->T0, s->T0);
7128             gen_op_mov_reg_v(s, MO_32, reg, s->T0);
7129         }
7130         break;
7131     case 0xd6: /* salc */
7132         if (CODE64(s))
7133             goto illegal_op;
7134         gen_compute_eflags_c(s, s->T0);
7135         tcg_gen_neg_tl(s->T0, s->T0);
7136         gen_op_mov_reg_v(s, MO_8, R_EAX, s->T0);
7137         break;
7138     case 0xe0: /* loopnz */
7139     case 0xe1: /* loopz */
7140     case 0xe2: /* loop */
7141     case 0xe3: /* jecxz */
7142         {
7143             TCGLabel *l1, *l2, *l3;
7144 
7145             tval = (int8_t)insn_get(env, s, MO_8);
7146             next_eip = s->pc - s->cs_base;
7147             tval += next_eip;
7148             if (dflag == MO_16) {
7149                 tval &= 0xffff;
7150             }
7151 
7152             l1 = gen_new_label();
7153             l2 = gen_new_label();
7154             l3 = gen_new_label();
7155             gen_update_cc_op(s);
7156             b &= 3;
7157             switch(b) {
7158             case 0: /* loopnz */
7159             case 1: /* loopz */
7160                 gen_op_add_reg_im(s, s->aflag, R_ECX, -1);
7161                 gen_op_jz_ecx(s, s->aflag, l3);
7162                 gen_jcc1(s, (JCC_Z << 1) | (b ^ 1), l1);
7163                 break;
7164             case 2: /* loop */
7165                 gen_op_add_reg_im(s, s->aflag, R_ECX, -1);
7166                 gen_op_jnz_ecx(s, s->aflag, l1);
7167                 break;
7168             default:
7169             case 3: /* jcxz */
7170                 gen_op_jz_ecx(s, s->aflag, l1);
7171                 break;
7172             }
7173 
7174             gen_set_label(l3);
7175             gen_jmp_im(s, next_eip);
7176             tcg_gen_br(l2);
7177 
7178             gen_set_label(l1);
7179             gen_jmp_im(s, tval);
7180             gen_set_label(l2);
7181             gen_eob(s);
7182         }
7183         break;
7184     case 0x130: /* wrmsr */
7185     case 0x132: /* rdmsr */
7186         if (s->cpl != 0) {
7187             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7188         } else {
7189             gen_update_cc_op(s);
7190             gen_jmp_im(s, pc_start - s->cs_base);
7191             if (b & 2) {
7192                 gen_helper_rdmsr(cpu_env);
7193             } else {
7194                 gen_helper_wrmsr(cpu_env);
7195             }
7196         }
7197         break;
7198     case 0x131: /* rdtsc */
7199         gen_update_cc_op(s);
7200         gen_jmp_im(s, pc_start - s->cs_base);
7201         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7202             gen_io_start();
7203         }
7204         gen_helper_rdtsc(cpu_env);
7205         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7206             gen_jmp(s, s->pc - s->cs_base);
7207         }
7208         break;
7209     case 0x133: /* rdpmc */
7210         gen_update_cc_op(s);
7211         gen_jmp_im(s, pc_start - s->cs_base);
7212         gen_helper_rdpmc(cpu_env);
7213         break;
7214     case 0x134: /* sysenter */
7215         /* For Intel SYSENTER is valid on 64-bit */
7216         if (CODE64(s) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1)
7217             goto illegal_op;
7218         if (!s->pe) {
7219             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7220         } else {
7221             gen_helper_sysenter(cpu_env);
7222             gen_eob(s);
7223         }
7224         break;
7225     case 0x135: /* sysexit */
7226         /* For Intel SYSEXIT is valid on 64-bit */
7227         if (CODE64(s) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1)
7228             goto illegal_op;
7229         if (!s->pe) {
7230             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7231         } else {
7232             gen_helper_sysexit(cpu_env, tcg_const_i32(dflag - 1));
7233             gen_eob(s);
7234         }
7235         break;
7236 #ifdef TARGET_X86_64
7237     case 0x105: /* syscall */
7238         /* XXX: is it usable in real mode ? */
7239         gen_update_cc_op(s);
7240         gen_jmp_im(s, pc_start - s->cs_base);
7241         gen_helper_syscall(cpu_env, tcg_const_i32(s->pc - pc_start));
7242         /* TF handling for the syscall insn is different. The TF bit is  checked
7243            after the syscall insn completes. This allows #DB to not be
7244            generated after one has entered CPL0 if TF is set in FMASK.  */
7245         gen_eob_worker(s, false, true);
7246         break;
7247     case 0x107: /* sysret */
7248         if (!s->pe) {
7249             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7250         } else {
7251             gen_helper_sysret(cpu_env, tcg_const_i32(dflag - 1));
7252             /* condition codes are modified only in long mode */
7253             if (s->lma) {
7254                 set_cc_op(s, CC_OP_EFLAGS);
7255             }
7256             /* TF handling for the sysret insn is different. The TF bit is
7257                checked after the sysret insn completes. This allows #DB to be
7258                generated "as if" the syscall insn in userspace has just
7259                completed.  */
7260             gen_eob_worker(s, false, true);
7261         }
7262         break;
7263 #endif
7264     case 0x1a2: /* cpuid */
7265         gen_update_cc_op(s);
7266         gen_jmp_im(s, pc_start - s->cs_base);
7267         gen_helper_cpuid(cpu_env);
7268         break;
7269     case 0xf4: /* hlt */
7270         if (s->cpl != 0) {
7271             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7272         } else {
7273             gen_update_cc_op(s);
7274             gen_jmp_im(s, pc_start - s->cs_base);
7275             gen_helper_hlt(cpu_env, tcg_const_i32(s->pc - pc_start));
7276             s->base.is_jmp = DISAS_NORETURN;
7277         }
7278         break;
7279     case 0x100:
7280         modrm = x86_ldub_code(env, s);
7281         mod = (modrm >> 6) & 3;
7282         op = (modrm >> 3) & 7;
7283         switch(op) {
7284         case 0: /* sldt */
7285             if (!s->pe || s->vm86)
7286                 goto illegal_op;
7287             gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_READ);
7288             tcg_gen_ld32u_tl(s->T0, cpu_env,
7289                              offsetof(CPUX86State, ldt.selector));
7290             ot = mod == 3 ? dflag : MO_16;
7291             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7292             break;
7293         case 2: /* lldt */
7294             if (!s->pe || s->vm86)
7295                 goto illegal_op;
7296             if (s->cpl != 0) {
7297                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7298             } else {
7299                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_WRITE);
7300                 gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7301                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
7302                 gen_helper_lldt(cpu_env, s->tmp2_i32);
7303             }
7304             break;
7305         case 1: /* str */
7306             if (!s->pe || s->vm86)
7307                 goto illegal_op;
7308             gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_READ);
7309             tcg_gen_ld32u_tl(s->T0, cpu_env,
7310                              offsetof(CPUX86State, tr.selector));
7311             ot = mod == 3 ? dflag : MO_16;
7312             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7313             break;
7314         case 3: /* ltr */
7315             if (!s->pe || s->vm86)
7316                 goto illegal_op;
7317             if (s->cpl != 0) {
7318                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7319             } else {
7320                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_WRITE);
7321                 gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7322                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
7323                 gen_helper_ltr(cpu_env, s->tmp2_i32);
7324             }
7325             break;
7326         case 4: /* verr */
7327         case 5: /* verw */
7328             if (!s->pe || s->vm86)
7329                 goto illegal_op;
7330             gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7331             gen_update_cc_op(s);
7332             if (op == 4) {
7333                 gen_helper_verr(cpu_env, s->T0);
7334             } else {
7335                 gen_helper_verw(cpu_env, s->T0);
7336             }
7337             set_cc_op(s, CC_OP_EFLAGS);
7338             break;
7339         default:
7340             goto unknown_op;
7341         }
7342         break;
7343 
7344     case 0x101:
7345         modrm = x86_ldub_code(env, s);
7346         switch (modrm) {
7347         CASE_MODRM_MEM_OP(0): /* sgdt */
7348             gen_svm_check_intercept(s, pc_start, SVM_EXIT_GDTR_READ);
7349             gen_lea_modrm(env, s, modrm);
7350             tcg_gen_ld32u_tl(s->T0,
7351                              cpu_env, offsetof(CPUX86State, gdt.limit));
7352             gen_op_st_v(s, MO_16, s->T0, s->A0);
7353             gen_add_A0_im(s, 2);
7354             tcg_gen_ld_tl(s->T0, cpu_env, offsetof(CPUX86State, gdt.base));
7355             if (dflag == MO_16) {
7356                 tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
7357             }
7358             gen_op_st_v(s, CODE64(s) + MO_32, s->T0, s->A0);
7359             break;
7360 
7361         case 0xc8: /* monitor */
7362             if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) || s->cpl != 0) {
7363                 goto illegal_op;
7364             }
7365             gen_update_cc_op(s);
7366             gen_jmp_im(s, pc_start - s->cs_base);
7367             tcg_gen_mov_tl(s->A0, cpu_regs[R_EAX]);
7368             gen_extu(s->aflag, s->A0);
7369             gen_add_A0_ds_seg(s);
7370             gen_helper_monitor(cpu_env, s->A0);
7371             break;
7372 
7373         case 0xc9: /* mwait */
7374             if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) || s->cpl != 0) {
7375                 goto illegal_op;
7376             }
7377             gen_update_cc_op(s);
7378             gen_jmp_im(s, pc_start - s->cs_base);
7379             gen_helper_mwait(cpu_env, tcg_const_i32(s->pc - pc_start));
7380             gen_eob(s);
7381             break;
7382 
7383         case 0xca: /* clac */
7384             if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SMAP)
7385                 || s->cpl != 0) {
7386                 goto illegal_op;
7387             }
7388             gen_helper_clac(cpu_env);
7389             gen_jmp_im(s, s->pc - s->cs_base);
7390             gen_eob(s);
7391             break;
7392 
7393         case 0xcb: /* stac */
7394             if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SMAP)
7395                 || s->cpl != 0) {
7396                 goto illegal_op;
7397             }
7398             gen_helper_stac(cpu_env);
7399             gen_jmp_im(s, s->pc - s->cs_base);
7400             gen_eob(s);
7401             break;
7402 
7403         CASE_MODRM_MEM_OP(1): /* sidt */
7404             gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_READ);
7405             gen_lea_modrm(env, s, modrm);
7406             tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.limit));
7407             gen_op_st_v(s, MO_16, s->T0, s->A0);
7408             gen_add_A0_im(s, 2);
7409             tcg_gen_ld_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.base));
7410             if (dflag == MO_16) {
7411                 tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
7412             }
7413             gen_op_st_v(s, CODE64(s) + MO_32, s->T0, s->A0);
7414             break;
7415 
7416         case 0xd0: /* xgetbv */
7417             if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
7418                 || (s->prefix & (PREFIX_LOCK | PREFIX_DATA
7419                                  | PREFIX_REPZ | PREFIX_REPNZ))) {
7420                 goto illegal_op;
7421             }
7422             tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
7423             gen_helper_xgetbv(s->tmp1_i64, cpu_env, s->tmp2_i32);
7424             tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], s->tmp1_i64);
7425             break;
7426 
7427         case 0xd1: /* xsetbv */
7428             if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
7429                 || (s->prefix & (PREFIX_LOCK | PREFIX_DATA
7430                                  | PREFIX_REPZ | PREFIX_REPNZ))) {
7431                 goto illegal_op;
7432             }
7433             if (s->cpl != 0) {
7434                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7435                 break;
7436             }
7437             tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
7438                                   cpu_regs[R_EDX]);
7439             tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
7440             gen_helper_xsetbv(cpu_env, s->tmp2_i32, s->tmp1_i64);
7441             /* End TB because translation flags may change.  */
7442             gen_jmp_im(s, s->pc - s->cs_base);
7443             gen_eob(s);
7444             break;
7445 
7446         case 0xd8: /* VMRUN */
7447             if (!(s->flags & HF_SVME_MASK) || !s->pe) {
7448                 goto illegal_op;
7449             }
7450             if (s->cpl != 0) {
7451                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7452                 break;
7453             }
7454             gen_update_cc_op(s);
7455             gen_jmp_im(s, pc_start - s->cs_base);
7456             gen_helper_vmrun(cpu_env, tcg_const_i32(s->aflag - 1),
7457                              tcg_const_i32(s->pc - pc_start));
7458             tcg_gen_exit_tb(NULL, 0);
7459             s->base.is_jmp = DISAS_NORETURN;
7460             break;
7461 
7462         case 0xd9: /* VMMCALL */
7463             if (!(s->flags & HF_SVME_MASK)) {
7464                 goto illegal_op;
7465             }
7466             gen_update_cc_op(s);
7467             gen_jmp_im(s, pc_start - s->cs_base);
7468             gen_helper_vmmcall(cpu_env);
7469             break;
7470 
7471         case 0xda: /* VMLOAD */
7472             if (!(s->flags & HF_SVME_MASK) || !s->pe) {
7473                 goto illegal_op;
7474             }
7475             if (s->cpl != 0) {
7476                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7477                 break;
7478             }
7479             gen_update_cc_op(s);
7480             gen_jmp_im(s, pc_start - s->cs_base);
7481             gen_helper_vmload(cpu_env, tcg_const_i32(s->aflag - 1));
7482             break;
7483 
7484         case 0xdb: /* VMSAVE */
7485             if (!(s->flags & HF_SVME_MASK) || !s->pe) {
7486                 goto illegal_op;
7487             }
7488             if (s->cpl != 0) {
7489                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7490                 break;
7491             }
7492             gen_update_cc_op(s);
7493             gen_jmp_im(s, pc_start - s->cs_base);
7494             gen_helper_vmsave(cpu_env, tcg_const_i32(s->aflag - 1));
7495             break;
7496 
7497         case 0xdc: /* STGI */
7498             if ((!(s->flags & HF_SVME_MASK)
7499                    && !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT))
7500                 || !s->pe) {
7501                 goto illegal_op;
7502             }
7503             if (s->cpl != 0) {
7504                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7505                 break;
7506             }
7507             gen_update_cc_op(s);
7508             gen_helper_stgi(cpu_env);
7509             gen_jmp_im(s, s->pc - s->cs_base);
7510             gen_eob(s);
7511             break;
7512 
7513         case 0xdd: /* CLGI */
7514             if (!(s->flags & HF_SVME_MASK) || !s->pe) {
7515                 goto illegal_op;
7516             }
7517             if (s->cpl != 0) {
7518                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7519                 break;
7520             }
7521             gen_update_cc_op(s);
7522             gen_jmp_im(s, pc_start - s->cs_base);
7523             gen_helper_clgi(cpu_env);
7524             break;
7525 
7526         case 0xde: /* SKINIT */
7527             if ((!(s->flags & HF_SVME_MASK)
7528                  && !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT))
7529                 || !s->pe) {
7530                 goto illegal_op;
7531             }
7532             gen_update_cc_op(s);
7533             gen_jmp_im(s, pc_start - s->cs_base);
7534             gen_helper_skinit(cpu_env);
7535             break;
7536 
7537         case 0xdf: /* INVLPGA */
7538             if (!(s->flags & HF_SVME_MASK) || !s->pe) {
7539                 goto illegal_op;
7540             }
7541             if (s->cpl != 0) {
7542                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7543                 break;
7544             }
7545             gen_update_cc_op(s);
7546             gen_jmp_im(s, pc_start - s->cs_base);
7547             gen_helper_invlpga(cpu_env, tcg_const_i32(s->aflag - 1));
7548             break;
7549 
7550         CASE_MODRM_MEM_OP(2): /* lgdt */
7551             if (s->cpl != 0) {
7552                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7553                 break;
7554             }
7555             gen_svm_check_intercept(s, pc_start, SVM_EXIT_GDTR_WRITE);
7556             gen_lea_modrm(env, s, modrm);
7557             gen_op_ld_v(s, MO_16, s->T1, s->A0);
7558             gen_add_A0_im(s, 2);
7559             gen_op_ld_v(s, CODE64(s) + MO_32, s->T0, s->A0);
7560             if (dflag == MO_16) {
7561                 tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
7562             }
7563             tcg_gen_st_tl(s->T0, cpu_env, offsetof(CPUX86State, gdt.base));
7564             tcg_gen_st32_tl(s->T1, cpu_env, offsetof(CPUX86State, gdt.limit));
7565             break;
7566 
7567         CASE_MODRM_MEM_OP(3): /* lidt */
7568             if (s->cpl != 0) {
7569                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7570                 break;
7571             }
7572             gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_WRITE);
7573             gen_lea_modrm(env, s, modrm);
7574             gen_op_ld_v(s, MO_16, s->T1, s->A0);
7575             gen_add_A0_im(s, 2);
7576             gen_op_ld_v(s, CODE64(s) + MO_32, s->T0, s->A0);
7577             if (dflag == MO_16) {
7578                 tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
7579             }
7580             tcg_gen_st_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.base));
7581             tcg_gen_st32_tl(s->T1, cpu_env, offsetof(CPUX86State, idt.limit));
7582             break;
7583 
7584         CASE_MODRM_OP(4): /* smsw */
7585             gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_CR0);
7586             tcg_gen_ld_tl(s->T0, cpu_env, offsetof(CPUX86State, cr[0]));
7587             /*
7588              * In 32-bit mode, the higher 16 bits of the destination
7589              * register are undefined.  In practice CR0[31:0] is stored
7590              * just like in 64-bit mode.
7591              */
7592             mod = (modrm >> 6) & 3;
7593             ot = (mod != 3 ? MO_16 : s->dflag);
7594             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7595             break;
7596         case 0xee: /* rdpkru */
7597             if (prefixes & PREFIX_LOCK) {
7598                 goto illegal_op;
7599             }
7600             tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
7601             gen_helper_rdpkru(s->tmp1_i64, cpu_env, s->tmp2_i32);
7602             tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], s->tmp1_i64);
7603             break;
7604         case 0xef: /* wrpkru */
7605             if (prefixes & PREFIX_LOCK) {
7606                 goto illegal_op;
7607             }
7608             tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
7609                                   cpu_regs[R_EDX]);
7610             tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
7611             gen_helper_wrpkru(cpu_env, s->tmp2_i32, s->tmp1_i64);
7612             break;
7613         CASE_MODRM_OP(6): /* lmsw */
7614             if (s->cpl != 0) {
7615                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7616                 break;
7617             }
7618             gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0);
7619             gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7620             gen_helper_lmsw(cpu_env, s->T0);
7621             gen_jmp_im(s, s->pc - s->cs_base);
7622             gen_eob(s);
7623             break;
7624 
7625         CASE_MODRM_MEM_OP(7): /* invlpg */
7626             if (s->cpl != 0) {
7627                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7628                 break;
7629             }
7630             gen_update_cc_op(s);
7631             gen_jmp_im(s, pc_start - s->cs_base);
7632             gen_lea_modrm(env, s, modrm);
7633             gen_helper_invlpg(cpu_env, s->A0);
7634             gen_jmp_im(s, s->pc - s->cs_base);
7635             gen_eob(s);
7636             break;
7637 
7638         case 0xf8: /* swapgs */
7639 #ifdef TARGET_X86_64
7640             if (CODE64(s)) {
7641                 if (s->cpl != 0) {
7642                     gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7643                 } else {
7644                     tcg_gen_mov_tl(s->T0, cpu_seg_base[R_GS]);
7645                     tcg_gen_ld_tl(cpu_seg_base[R_GS], cpu_env,
7646                                   offsetof(CPUX86State, kernelgsbase));
7647                     tcg_gen_st_tl(s->T0, cpu_env,
7648                                   offsetof(CPUX86State, kernelgsbase));
7649                 }
7650                 break;
7651             }
7652 #endif
7653             goto illegal_op;
7654 
7655         case 0xf9: /* rdtscp */
7656             if (!(s->cpuid_ext2_features & CPUID_EXT2_RDTSCP)) {
7657                 goto illegal_op;
7658             }
7659             gen_update_cc_op(s);
7660             gen_jmp_im(s, pc_start - s->cs_base);
7661             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7662                 gen_io_start();
7663             }
7664             gen_helper_rdtscp(cpu_env);
7665             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7666                 gen_jmp(s, s->pc - s->cs_base);
7667             }
7668             break;
7669 
7670         default:
7671             goto unknown_op;
7672         }
7673         break;
7674 
7675     case 0x108: /* invd */
7676     case 0x109: /* wbinvd */
7677         if (s->cpl != 0) {
7678             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7679         } else {
7680             gen_svm_check_intercept(s, pc_start, (b & 2) ? SVM_EXIT_INVD : SVM_EXIT_WBINVD);
7681             /* nothing to do */
7682         }
7683         break;
7684     case 0x63: /* arpl or movslS (x86_64) */
7685 #ifdef TARGET_X86_64
7686         if (CODE64(s)) {
7687             int d_ot;
7688             /* d_ot is the size of destination */
7689             d_ot = dflag;
7690 
7691             modrm = x86_ldub_code(env, s);
7692             reg = ((modrm >> 3) & 7) | rex_r;
7693             mod = (modrm >> 6) & 3;
7694             rm = (modrm & 7) | REX_B(s);
7695 
7696             if (mod == 3) {
7697                 gen_op_mov_v_reg(s, MO_32, s->T0, rm);
7698                 /* sign extend */
7699                 if (d_ot == MO_64) {
7700                     tcg_gen_ext32s_tl(s->T0, s->T0);
7701                 }
7702                 gen_op_mov_reg_v(s, d_ot, reg, s->T0);
7703             } else {
7704                 gen_lea_modrm(env, s, modrm);
7705                 gen_op_ld_v(s, MO_32 | MO_SIGN, s->T0, s->A0);
7706                 gen_op_mov_reg_v(s, d_ot, reg, s->T0);
7707             }
7708         } else
7709 #endif
7710         {
7711             TCGLabel *label1;
7712             TCGv t0, t1, t2, a0;
7713 
7714             if (!s->pe || s->vm86)
7715                 goto illegal_op;
7716             t0 = tcg_temp_local_new();
7717             t1 = tcg_temp_local_new();
7718             t2 = tcg_temp_local_new();
7719             ot = MO_16;
7720             modrm = x86_ldub_code(env, s);
7721             reg = (modrm >> 3) & 7;
7722             mod = (modrm >> 6) & 3;
7723             rm = modrm & 7;
7724             if (mod != 3) {
7725                 gen_lea_modrm(env, s, modrm);
7726                 gen_op_ld_v(s, ot, t0, s->A0);
7727                 a0 = tcg_temp_local_new();
7728                 tcg_gen_mov_tl(a0, s->A0);
7729             } else {
7730                 gen_op_mov_v_reg(s, ot, t0, rm);
7731                 a0 = NULL;
7732             }
7733             gen_op_mov_v_reg(s, ot, t1, reg);
7734             tcg_gen_andi_tl(s->tmp0, t0, 3);
7735             tcg_gen_andi_tl(t1, t1, 3);
7736             tcg_gen_movi_tl(t2, 0);
7737             label1 = gen_new_label();
7738             tcg_gen_brcond_tl(TCG_COND_GE, s->tmp0, t1, label1);
7739             tcg_gen_andi_tl(t0, t0, ~3);
7740             tcg_gen_or_tl(t0, t0, t1);
7741             tcg_gen_movi_tl(t2, CC_Z);
7742             gen_set_label(label1);
7743             if (mod != 3) {
7744                 gen_op_st_v(s, ot, t0, a0);
7745                 tcg_temp_free(a0);
7746            } else {
7747                 gen_op_mov_reg_v(s, ot, rm, t0);
7748             }
7749             gen_compute_eflags(s);
7750             tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_Z);
7751             tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t2);
7752             tcg_temp_free(t0);
7753             tcg_temp_free(t1);
7754             tcg_temp_free(t2);
7755         }
7756         break;
7757     case 0x102: /* lar */
7758     case 0x103: /* lsl */
7759         {
7760             TCGLabel *label1;
7761             TCGv t0;
7762             if (!s->pe || s->vm86)
7763                 goto illegal_op;
7764             ot = dflag != MO_16 ? MO_32 : MO_16;
7765             modrm = x86_ldub_code(env, s);
7766             reg = ((modrm >> 3) & 7) | rex_r;
7767             gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7768             t0 = tcg_temp_local_new();
7769             gen_update_cc_op(s);
7770             if (b == 0x102) {
7771                 gen_helper_lar(t0, cpu_env, s->T0);
7772             } else {
7773                 gen_helper_lsl(t0, cpu_env, s->T0);
7774             }
7775             tcg_gen_andi_tl(s->tmp0, cpu_cc_src, CC_Z);
7776             label1 = gen_new_label();
7777             tcg_gen_brcondi_tl(TCG_COND_EQ, s->tmp0, 0, label1);
7778             gen_op_mov_reg_v(s, ot, reg, t0);
7779             gen_set_label(label1);
7780             set_cc_op(s, CC_OP_EFLAGS);
7781             tcg_temp_free(t0);
7782         }
7783         break;
7784     case 0x118:
7785         modrm = x86_ldub_code(env, s);
7786         mod = (modrm >> 6) & 3;
7787         op = (modrm >> 3) & 7;
7788         switch(op) {
7789         case 0: /* prefetchnta */
7790         case 1: /* prefetchnt0 */
7791         case 2: /* prefetchnt0 */
7792         case 3: /* prefetchnt0 */
7793             if (mod == 3)
7794                 goto illegal_op;
7795             gen_nop_modrm(env, s, modrm);
7796             /* nothing more to do */
7797             break;
7798         default: /* nop (multi byte) */
7799             gen_nop_modrm(env, s, modrm);
7800             break;
7801         }
7802         break;
7803     case 0x11a:
7804         modrm = x86_ldub_code(env, s);
7805         if (s->flags & HF_MPX_EN_MASK) {
7806             mod = (modrm >> 6) & 3;
7807             reg = ((modrm >> 3) & 7) | rex_r;
7808             if (prefixes & PREFIX_REPZ) {
7809                 /* bndcl */
7810                 if (reg >= 4
7811                     || (prefixes & PREFIX_LOCK)
7812                     || s->aflag == MO_16) {
7813                     goto illegal_op;
7814                 }
7815                 gen_bndck(env, s, modrm, TCG_COND_LTU, cpu_bndl[reg]);
7816             } else if (prefixes & PREFIX_REPNZ) {
7817                 /* bndcu */
7818                 if (reg >= 4
7819                     || (prefixes & PREFIX_LOCK)
7820                     || s->aflag == MO_16) {
7821                     goto illegal_op;
7822                 }
7823                 TCGv_i64 notu = tcg_temp_new_i64();
7824                 tcg_gen_not_i64(notu, cpu_bndu[reg]);
7825                 gen_bndck(env, s, modrm, TCG_COND_GTU, notu);
7826                 tcg_temp_free_i64(notu);
7827             } else if (prefixes & PREFIX_DATA) {
7828                 /* bndmov -- from reg/mem */
7829                 if (reg >= 4 || s->aflag == MO_16) {
7830                     goto illegal_op;
7831                 }
7832                 if (mod == 3) {
7833                     int reg2 = (modrm & 7) | REX_B(s);
7834                     if (reg2 >= 4 || (prefixes & PREFIX_LOCK)) {
7835                         goto illegal_op;
7836                     }
7837                     if (s->flags & HF_MPX_IU_MASK) {
7838                         tcg_gen_mov_i64(cpu_bndl[reg], cpu_bndl[reg2]);
7839                         tcg_gen_mov_i64(cpu_bndu[reg], cpu_bndu[reg2]);
7840                     }
7841                 } else {
7842                     gen_lea_modrm(env, s, modrm);
7843                     if (CODE64(s)) {
7844                         tcg_gen_qemu_ld_i64(cpu_bndl[reg], s->A0,
7845                                             s->mem_index, MO_LEQ);
7846                         tcg_gen_addi_tl(s->A0, s->A0, 8);
7847                         tcg_gen_qemu_ld_i64(cpu_bndu[reg], s->A0,
7848                                             s->mem_index, MO_LEQ);
7849                     } else {
7850                         tcg_gen_qemu_ld_i64(cpu_bndl[reg], s->A0,
7851                                             s->mem_index, MO_LEUL);
7852                         tcg_gen_addi_tl(s->A0, s->A0, 4);
7853                         tcg_gen_qemu_ld_i64(cpu_bndu[reg], s->A0,
7854                                             s->mem_index, MO_LEUL);
7855                     }
7856                     /* bnd registers are now in-use */
7857                     gen_set_hflag(s, HF_MPX_IU_MASK);
7858                 }
7859             } else if (mod != 3) {
7860                 /* bndldx */
7861                 AddressParts a = gen_lea_modrm_0(env, s, modrm);
7862                 if (reg >= 4
7863                     || (prefixes & PREFIX_LOCK)
7864                     || s->aflag == MO_16
7865                     || a.base < -1) {
7866                     goto illegal_op;
7867                 }
7868                 if (a.base >= 0) {
7869                     tcg_gen_addi_tl(s->A0, cpu_regs[a.base], a.disp);
7870                 } else {
7871                     tcg_gen_movi_tl(s->A0, 0);
7872                 }
7873                 gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
7874                 if (a.index >= 0) {
7875                     tcg_gen_mov_tl(s->T0, cpu_regs[a.index]);
7876                 } else {
7877                     tcg_gen_movi_tl(s->T0, 0);
7878                 }
7879                 if (CODE64(s)) {
7880                     gen_helper_bndldx64(cpu_bndl[reg], cpu_env, s->A0, s->T0);
7881                     tcg_gen_ld_i64(cpu_bndu[reg], cpu_env,
7882                                    offsetof(CPUX86State, mmx_t0.MMX_Q(0)));
7883                 } else {
7884                     gen_helper_bndldx32(cpu_bndu[reg], cpu_env, s->A0, s->T0);
7885                     tcg_gen_ext32u_i64(cpu_bndl[reg], cpu_bndu[reg]);
7886                     tcg_gen_shri_i64(cpu_bndu[reg], cpu_bndu[reg], 32);
7887                 }
7888                 gen_set_hflag(s, HF_MPX_IU_MASK);
7889             }
7890         }
7891         gen_nop_modrm(env, s, modrm);
7892         break;
7893     case 0x11b:
7894         modrm = x86_ldub_code(env, s);
7895         if (s->flags & HF_MPX_EN_MASK) {
7896             mod = (modrm >> 6) & 3;
7897             reg = ((modrm >> 3) & 7) | rex_r;
7898             if (mod != 3 && (prefixes & PREFIX_REPZ)) {
7899                 /* bndmk */
7900                 if (reg >= 4
7901                     || (prefixes & PREFIX_LOCK)
7902                     || s->aflag == MO_16) {
7903                     goto illegal_op;
7904                 }
7905                 AddressParts a = gen_lea_modrm_0(env, s, modrm);
7906                 if (a.base >= 0) {
7907                     tcg_gen_extu_tl_i64(cpu_bndl[reg], cpu_regs[a.base]);
7908                     if (!CODE64(s)) {
7909                         tcg_gen_ext32u_i64(cpu_bndl[reg], cpu_bndl[reg]);
7910                     }
7911                 } else if (a.base == -1) {
7912                     /* no base register has lower bound of 0 */
7913                     tcg_gen_movi_i64(cpu_bndl[reg], 0);
7914                 } else {
7915                     /* rip-relative generates #ud */
7916                     goto illegal_op;
7917                 }
7918                 tcg_gen_not_tl(s->A0, gen_lea_modrm_1(s, a));
7919                 if (!CODE64(s)) {
7920                     tcg_gen_ext32u_tl(s->A0, s->A0);
7921                 }
7922                 tcg_gen_extu_tl_i64(cpu_bndu[reg], s->A0);
7923                 /* bnd registers are now in-use */
7924                 gen_set_hflag(s, HF_MPX_IU_MASK);
7925                 break;
7926             } else if (prefixes & PREFIX_REPNZ) {
7927                 /* bndcn */
7928                 if (reg >= 4
7929                     || (prefixes & PREFIX_LOCK)
7930                     || s->aflag == MO_16) {
7931                     goto illegal_op;
7932                 }
7933                 gen_bndck(env, s, modrm, TCG_COND_GTU, cpu_bndu[reg]);
7934             } else if (prefixes & PREFIX_DATA) {
7935                 /* bndmov -- to reg/mem */
7936                 if (reg >= 4 || s->aflag == MO_16) {
7937                     goto illegal_op;
7938                 }
7939                 if (mod == 3) {
7940                     int reg2 = (modrm & 7) | REX_B(s);
7941                     if (reg2 >= 4 || (prefixes & PREFIX_LOCK)) {
7942                         goto illegal_op;
7943                     }
7944                     if (s->flags & HF_MPX_IU_MASK) {
7945                         tcg_gen_mov_i64(cpu_bndl[reg2], cpu_bndl[reg]);
7946                         tcg_gen_mov_i64(cpu_bndu[reg2], cpu_bndu[reg]);
7947                     }
7948                 } else {
7949                     gen_lea_modrm(env, s, modrm);
7950                     if (CODE64(s)) {
7951                         tcg_gen_qemu_st_i64(cpu_bndl[reg], s->A0,
7952                                             s->mem_index, MO_LEQ);
7953                         tcg_gen_addi_tl(s->A0, s->A0, 8);
7954                         tcg_gen_qemu_st_i64(cpu_bndu[reg], s->A0,
7955                                             s->mem_index, MO_LEQ);
7956                     } else {
7957                         tcg_gen_qemu_st_i64(cpu_bndl[reg], s->A0,
7958                                             s->mem_index, MO_LEUL);
7959                         tcg_gen_addi_tl(s->A0, s->A0, 4);
7960                         tcg_gen_qemu_st_i64(cpu_bndu[reg], s->A0,
7961                                             s->mem_index, MO_LEUL);
7962                     }
7963                 }
7964             } else if (mod != 3) {
7965                 /* bndstx */
7966                 AddressParts a = gen_lea_modrm_0(env, s, modrm);
7967                 if (reg >= 4
7968                     || (prefixes & PREFIX_LOCK)
7969                     || s->aflag == MO_16
7970                     || a.base < -1) {
7971                     goto illegal_op;
7972                 }
7973                 if (a.base >= 0) {
7974                     tcg_gen_addi_tl(s->A0, cpu_regs[a.base], a.disp);
7975                 } else {
7976                     tcg_gen_movi_tl(s->A0, 0);
7977                 }
7978                 gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
7979                 if (a.index >= 0) {
7980                     tcg_gen_mov_tl(s->T0, cpu_regs[a.index]);
7981                 } else {
7982                     tcg_gen_movi_tl(s->T0, 0);
7983                 }
7984                 if (CODE64(s)) {
7985                     gen_helper_bndstx64(cpu_env, s->A0, s->T0,
7986                                         cpu_bndl[reg], cpu_bndu[reg]);
7987                 } else {
7988                     gen_helper_bndstx32(cpu_env, s->A0, s->T0,
7989                                         cpu_bndl[reg], cpu_bndu[reg]);
7990                 }
7991             }
7992         }
7993         gen_nop_modrm(env, s, modrm);
7994         break;
7995     case 0x119: case 0x11c ... 0x11f: /* nop (multi byte) */
7996         modrm = x86_ldub_code(env, s);
7997         gen_nop_modrm(env, s, modrm);
7998         break;
7999     case 0x120: /* mov reg, crN */
8000     case 0x122: /* mov crN, reg */
8001         if (s->cpl != 0) {
8002             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
8003         } else {
8004             modrm = x86_ldub_code(env, s);
8005             /* Ignore the mod bits (assume (modrm&0xc0)==0xc0).
8006              * AMD documentation (24594.pdf) and testing of
8007              * intel 386 and 486 processors all show that the mod bits
8008              * are assumed to be 1's, regardless of actual values.
8009              */
8010             rm = (modrm & 7) | REX_B(s);
8011             reg = ((modrm >> 3) & 7) | rex_r;
8012             if (CODE64(s))
8013                 ot = MO_64;
8014             else
8015                 ot = MO_32;
8016             if ((prefixes & PREFIX_LOCK) && (reg == 0) &&
8017                 (s->cpuid_ext3_features & CPUID_EXT3_CR8LEG)) {
8018                 reg = 8;
8019             }
8020             switch(reg) {
8021             case 0:
8022             case 2:
8023             case 3:
8024             case 4:
8025             case 8:
8026                 gen_update_cc_op(s);
8027                 gen_jmp_im(s, pc_start - s->cs_base);
8028                 if (b & 2) {
8029                     if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
8030                         gen_io_start();
8031                     }
8032                     gen_op_mov_v_reg(s, ot, s->T0, rm);
8033                     gen_helper_write_crN(cpu_env, tcg_const_i32(reg),
8034                                          s->T0);
8035                     gen_jmp_im(s, s->pc - s->cs_base);
8036                     gen_eob(s);
8037                 } else {
8038                     if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
8039                         gen_io_start();
8040                     }
8041                     gen_helper_read_crN(s->T0, cpu_env, tcg_const_i32(reg));
8042                     gen_op_mov_reg_v(s, ot, rm, s->T0);
8043                     if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
8044                         gen_jmp(s, s->pc - s->cs_base);
8045                     }
8046                 }
8047                 break;
8048             default:
8049                 goto unknown_op;
8050             }
8051         }
8052         break;
8053     case 0x121: /* mov reg, drN */
8054     case 0x123: /* mov drN, reg */
8055         if (s->cpl != 0) {
8056             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
8057         } else {
8058             modrm = x86_ldub_code(env, s);
8059             /* Ignore the mod bits (assume (modrm&0xc0)==0xc0).
8060              * AMD documentation (24594.pdf) and testing of
8061              * intel 386 and 486 processors all show that the mod bits
8062              * are assumed to be 1's, regardless of actual values.
8063              */
8064             rm = (modrm & 7) | REX_B(s);
8065             reg = ((modrm >> 3) & 7) | rex_r;
8066             if (CODE64(s))
8067                 ot = MO_64;
8068             else
8069                 ot = MO_32;
8070             if (reg >= 8) {
8071                 goto illegal_op;
8072             }
8073             if (b & 2) {
8074                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_DR0 + reg);
8075                 gen_op_mov_v_reg(s, ot, s->T0, rm);
8076                 tcg_gen_movi_i32(s->tmp2_i32, reg);
8077                 gen_helper_set_dr(cpu_env, s->tmp2_i32, s->T0);
8078                 gen_jmp_im(s, s->pc - s->cs_base);
8079                 gen_eob(s);
8080             } else {
8081                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_DR0 + reg);
8082                 tcg_gen_movi_i32(s->tmp2_i32, reg);
8083                 gen_helper_get_dr(s->T0, cpu_env, s->tmp2_i32);
8084                 gen_op_mov_reg_v(s, ot, rm, s->T0);
8085             }
8086         }
8087         break;
8088     case 0x106: /* clts */
8089         if (s->cpl != 0) {
8090             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
8091         } else {
8092             gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0);
8093             gen_helper_clts(cpu_env);
8094             /* abort block because static cpu state changed */
8095             gen_jmp_im(s, s->pc - s->cs_base);
8096             gen_eob(s);
8097         }
8098         break;
8099     /* MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4 support */
8100     case 0x1c3: /* MOVNTI reg, mem */
8101         if (!(s->cpuid_features & CPUID_SSE2))
8102             goto illegal_op;
8103         ot = mo_64_32(dflag);
8104         modrm = x86_ldub_code(env, s);
8105         mod = (modrm >> 6) & 3;
8106         if (mod == 3)
8107             goto illegal_op;
8108         reg = ((modrm >> 3) & 7) | rex_r;
8109         /* generate a generic store */
8110         gen_ldst_modrm(env, s, modrm, ot, reg, 1);
8111         break;
8112     case 0x1ae:
8113         modrm = x86_ldub_code(env, s);
8114         switch (modrm) {
8115         CASE_MODRM_MEM_OP(0): /* fxsave */
8116             if (!(s->cpuid_features & CPUID_FXSR)
8117                 || (prefixes & PREFIX_LOCK)) {
8118                 goto illegal_op;
8119             }
8120             if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) {
8121                 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8122                 break;
8123             }
8124             gen_lea_modrm(env, s, modrm);
8125             gen_helper_fxsave(cpu_env, s->A0);
8126             break;
8127 
8128         CASE_MODRM_MEM_OP(1): /* fxrstor */
8129             if (!(s->cpuid_features & CPUID_FXSR)
8130                 || (prefixes & PREFIX_LOCK)) {
8131                 goto illegal_op;
8132             }
8133             if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) {
8134                 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8135                 break;
8136             }
8137             gen_lea_modrm(env, s, modrm);
8138             gen_helper_fxrstor(cpu_env, s->A0);
8139             break;
8140 
8141         CASE_MODRM_MEM_OP(2): /* ldmxcsr */
8142             if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK)) {
8143                 goto illegal_op;
8144             }
8145             if (s->flags & HF_TS_MASK) {
8146                 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8147                 break;
8148             }
8149             gen_lea_modrm(env, s, modrm);
8150             tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUL);
8151             gen_helper_ldmxcsr(cpu_env, s->tmp2_i32);
8152             break;
8153 
8154         CASE_MODRM_MEM_OP(3): /* stmxcsr */
8155             if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK)) {
8156                 goto illegal_op;
8157             }
8158             if (s->flags & HF_TS_MASK) {
8159                 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8160                 break;
8161             }
8162             gen_helper_update_mxcsr(cpu_env);
8163             gen_lea_modrm(env, s, modrm);
8164             tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State, mxcsr));
8165             gen_op_st_v(s, MO_32, s->T0, s->A0);
8166             break;
8167 
8168         CASE_MODRM_MEM_OP(4): /* xsave */
8169             if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
8170                 || (prefixes & (PREFIX_LOCK | PREFIX_DATA
8171                                 | PREFIX_REPZ | PREFIX_REPNZ))) {
8172                 goto illegal_op;
8173             }
8174             gen_lea_modrm(env, s, modrm);
8175             tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
8176                                   cpu_regs[R_EDX]);
8177             gen_helper_xsave(cpu_env, s->A0, s->tmp1_i64);
8178             break;
8179 
8180         CASE_MODRM_MEM_OP(5): /* xrstor */
8181             if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
8182                 || (prefixes & (PREFIX_LOCK | PREFIX_DATA
8183                                 | PREFIX_REPZ | PREFIX_REPNZ))) {
8184                 goto illegal_op;
8185             }
8186             gen_lea_modrm(env, s, modrm);
8187             tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
8188                                   cpu_regs[R_EDX]);
8189             gen_helper_xrstor(cpu_env, s->A0, s->tmp1_i64);
8190             /* XRSTOR is how MPX is enabled, which changes how
8191                we translate.  Thus we need to end the TB.  */
8192             gen_update_cc_op(s);
8193             gen_jmp_im(s, s->pc - s->cs_base);
8194             gen_eob(s);
8195             break;
8196 
8197         CASE_MODRM_MEM_OP(6): /* xsaveopt / clwb */
8198             if (prefixes & PREFIX_LOCK) {
8199                 goto illegal_op;
8200             }
8201             if (prefixes & PREFIX_DATA) {
8202                 /* clwb */
8203                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_CLWB)) {
8204                     goto illegal_op;
8205                 }
8206                 gen_nop_modrm(env, s, modrm);
8207             } else {
8208                 /* xsaveopt */
8209                 if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
8210                     || (s->cpuid_xsave_features & CPUID_XSAVE_XSAVEOPT) == 0
8211                     || (prefixes & (PREFIX_REPZ | PREFIX_REPNZ))) {
8212                     goto illegal_op;
8213                 }
8214                 gen_lea_modrm(env, s, modrm);
8215                 tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
8216                                       cpu_regs[R_EDX]);
8217                 gen_helper_xsaveopt(cpu_env, s->A0, s->tmp1_i64);
8218             }
8219             break;
8220 
8221         CASE_MODRM_MEM_OP(7): /* clflush / clflushopt */
8222             if (prefixes & PREFIX_LOCK) {
8223                 goto illegal_op;
8224             }
8225             if (prefixes & PREFIX_DATA) {
8226                 /* clflushopt */
8227                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_CLFLUSHOPT)) {
8228                     goto illegal_op;
8229                 }
8230             } else {
8231                 /* clflush */
8232                 if ((s->prefix & (PREFIX_REPZ | PREFIX_REPNZ))
8233                     || !(s->cpuid_features & CPUID_CLFLUSH)) {
8234                     goto illegal_op;
8235                 }
8236             }
8237             gen_nop_modrm(env, s, modrm);
8238             break;
8239 
8240         case 0xc0 ... 0xc7: /* rdfsbase (f3 0f ae /0) */
8241         case 0xc8 ... 0xcf: /* rdgsbase (f3 0f ae /1) */
8242         case 0xd0 ... 0xd7: /* wrfsbase (f3 0f ae /2) */
8243         case 0xd8 ... 0xdf: /* wrgsbase (f3 0f ae /3) */
8244             if (CODE64(s)
8245                 && (prefixes & PREFIX_REPZ)
8246                 && !(prefixes & PREFIX_LOCK)
8247                 && (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_FSGSBASE)) {
8248                 TCGv base, treg, src, dst;
8249 
8250                 /* Preserve hflags bits by testing CR4 at runtime.  */
8251                 tcg_gen_movi_i32(s->tmp2_i32, CR4_FSGSBASE_MASK);
8252                 gen_helper_cr4_testbit(cpu_env, s->tmp2_i32);
8253 
8254                 base = cpu_seg_base[modrm & 8 ? R_GS : R_FS];
8255                 treg = cpu_regs[(modrm & 7) | REX_B(s)];
8256 
8257                 if (modrm & 0x10) {
8258                     /* wr*base */
8259                     dst = base, src = treg;
8260                 } else {
8261                     /* rd*base */
8262                     dst = treg, src = base;
8263                 }
8264 
8265                 if (s->dflag == MO_32) {
8266                     tcg_gen_ext32u_tl(dst, src);
8267                 } else {
8268                     tcg_gen_mov_tl(dst, src);
8269                 }
8270                 break;
8271             }
8272             goto unknown_op;
8273 
8274         case 0xf8: /* sfence / pcommit */
8275             if (prefixes & PREFIX_DATA) {
8276                 /* pcommit */
8277                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_PCOMMIT)
8278                     || (prefixes & PREFIX_LOCK)) {
8279                     goto illegal_op;
8280                 }
8281                 break;
8282             }
8283             /* fallthru */
8284         case 0xf9 ... 0xff: /* sfence */
8285             if (!(s->cpuid_features & CPUID_SSE)
8286                 || (prefixes & PREFIX_LOCK)) {
8287                 goto illegal_op;
8288             }
8289             tcg_gen_mb(TCG_MO_ST_ST | TCG_BAR_SC);
8290             break;
8291         case 0xe8 ... 0xef: /* lfence */
8292             if (!(s->cpuid_features & CPUID_SSE)
8293                 || (prefixes & PREFIX_LOCK)) {
8294                 goto illegal_op;
8295             }
8296             tcg_gen_mb(TCG_MO_LD_LD | TCG_BAR_SC);
8297             break;
8298         case 0xf0 ... 0xf7: /* mfence */
8299             if (!(s->cpuid_features & CPUID_SSE2)
8300                 || (prefixes & PREFIX_LOCK)) {
8301                 goto illegal_op;
8302             }
8303             tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
8304             break;
8305 
8306         default:
8307             goto unknown_op;
8308         }
8309         break;
8310 
8311     case 0x10d: /* 3DNow! prefetch(w) */
8312         modrm = x86_ldub_code(env, s);
8313         mod = (modrm >> 6) & 3;
8314         if (mod == 3)
8315             goto illegal_op;
8316         gen_nop_modrm(env, s, modrm);
8317         break;
8318     case 0x1aa: /* rsm */
8319         gen_svm_check_intercept(s, pc_start, SVM_EXIT_RSM);
8320         if (!(s->flags & HF_SMM_MASK))
8321             goto illegal_op;
8322         gen_update_cc_op(s);
8323         gen_jmp_im(s, s->pc - s->cs_base);
8324         gen_helper_rsm(cpu_env);
8325         gen_eob(s);
8326         break;
8327     case 0x1b8: /* SSE4.2 popcnt */
8328         if ((prefixes & (PREFIX_REPZ | PREFIX_LOCK | PREFIX_REPNZ)) !=
8329              PREFIX_REPZ)
8330             goto illegal_op;
8331         if (!(s->cpuid_ext_features & CPUID_EXT_POPCNT))
8332             goto illegal_op;
8333 
8334         modrm = x86_ldub_code(env, s);
8335         reg = ((modrm >> 3) & 7) | rex_r;
8336 
8337         if (s->prefix & PREFIX_DATA) {
8338             ot = MO_16;
8339         } else {
8340             ot = mo_64_32(dflag);
8341         }
8342 
8343         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
8344         gen_extu(ot, s->T0);
8345         tcg_gen_mov_tl(cpu_cc_src, s->T0);
8346         tcg_gen_ctpop_tl(s->T0, s->T0);
8347         gen_op_mov_reg_v(s, ot, reg, s->T0);
8348 
8349         set_cc_op(s, CC_OP_POPCNT);
8350         break;
8351     case 0x10e ... 0x10f:
8352         /* 3DNow! instructions, ignore prefixes */
8353         s->prefix &= ~(PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA);
8354         /* fall through */
8355     case 0x110 ... 0x117:
8356     case 0x128 ... 0x12f:
8357     case 0x138 ... 0x13a:
8358     case 0x150 ... 0x179:
8359     case 0x17c ... 0x17f:
8360     case 0x1c2:
8361     case 0x1c4 ... 0x1c6:
8362     case 0x1d0 ... 0x1fe:
8363         gen_sse(env, s, b, pc_start, rex_r);
8364         break;
8365     default:
8366         goto unknown_op;
8367     }
8368     return s->pc;
8369  illegal_op:
8370     gen_illegal_opcode(s);
8371     return s->pc;
8372  unknown_op:
8373     gen_unknown_opcode(env, s);
8374     return s->pc;
8375 }
8376 
8377 void tcg_x86_init(void)
8378 {
8379     static const char reg_names[CPU_NB_REGS][4] = {
8380 #ifdef TARGET_X86_64
8381         [R_EAX] = "rax",
8382         [R_EBX] = "rbx",
8383         [R_ECX] = "rcx",
8384         [R_EDX] = "rdx",
8385         [R_ESI] = "rsi",
8386         [R_EDI] = "rdi",
8387         [R_EBP] = "rbp",
8388         [R_ESP] = "rsp",
8389         [8]  = "r8",
8390         [9]  = "r9",
8391         [10] = "r10",
8392         [11] = "r11",
8393         [12] = "r12",
8394         [13] = "r13",
8395         [14] = "r14",
8396         [15] = "r15",
8397 #else
8398         [R_EAX] = "eax",
8399         [R_EBX] = "ebx",
8400         [R_ECX] = "ecx",
8401         [R_EDX] = "edx",
8402         [R_ESI] = "esi",
8403         [R_EDI] = "edi",
8404         [R_EBP] = "ebp",
8405         [R_ESP] = "esp",
8406 #endif
8407     };
8408     static const char seg_base_names[6][8] = {
8409         [R_CS] = "cs_base",
8410         [R_DS] = "ds_base",
8411         [R_ES] = "es_base",
8412         [R_FS] = "fs_base",
8413         [R_GS] = "gs_base",
8414         [R_SS] = "ss_base",
8415     };
8416     static const char bnd_regl_names[4][8] = {
8417         "bnd0_lb", "bnd1_lb", "bnd2_lb", "bnd3_lb"
8418     };
8419     static const char bnd_regu_names[4][8] = {
8420         "bnd0_ub", "bnd1_ub", "bnd2_ub", "bnd3_ub"
8421     };
8422     int i;
8423 
8424     cpu_cc_op = tcg_global_mem_new_i32(cpu_env,
8425                                        offsetof(CPUX86State, cc_op), "cc_op");
8426     cpu_cc_dst = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_dst),
8427                                     "cc_dst");
8428     cpu_cc_src = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_src),
8429                                     "cc_src");
8430     cpu_cc_src2 = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_src2),
8431                                      "cc_src2");
8432 
8433     for (i = 0; i < CPU_NB_REGS; ++i) {
8434         cpu_regs[i] = tcg_global_mem_new(cpu_env,
8435                                          offsetof(CPUX86State, regs[i]),
8436                                          reg_names[i]);
8437     }
8438 
8439     for (i = 0; i < 6; ++i) {
8440         cpu_seg_base[i]
8441             = tcg_global_mem_new(cpu_env,
8442                                  offsetof(CPUX86State, segs[i].base),
8443                                  seg_base_names[i]);
8444     }
8445 
8446     for (i = 0; i < 4; ++i) {
8447         cpu_bndl[i]
8448             = tcg_global_mem_new_i64(cpu_env,
8449                                      offsetof(CPUX86State, bnd_regs[i].lb),
8450                                      bnd_regl_names[i]);
8451         cpu_bndu[i]
8452             = tcg_global_mem_new_i64(cpu_env,
8453                                      offsetof(CPUX86State, bnd_regs[i].ub),
8454                                      bnd_regu_names[i]);
8455     }
8456 }
8457 
8458 static void i386_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu)
8459 {
8460     DisasContext *dc = container_of(dcbase, DisasContext, base);
8461     CPUX86State *env = cpu->env_ptr;
8462     uint32_t flags = dc->base.tb->flags;
8463     target_ulong cs_base = dc->base.tb->cs_base;
8464 
8465     dc->pe = (flags >> HF_PE_SHIFT) & 1;
8466     dc->code32 = (flags >> HF_CS32_SHIFT) & 1;
8467     dc->ss32 = (flags >> HF_SS32_SHIFT) & 1;
8468     dc->addseg = (flags >> HF_ADDSEG_SHIFT) & 1;
8469     dc->f_st = 0;
8470     dc->vm86 = (flags >> VM_SHIFT) & 1;
8471     dc->cpl = (flags >> HF_CPL_SHIFT) & 3;
8472     dc->iopl = (flags >> IOPL_SHIFT) & 3;
8473     dc->tf = (flags >> TF_SHIFT) & 1;
8474     dc->cc_op = CC_OP_DYNAMIC;
8475     dc->cc_op_dirty = false;
8476     dc->cs_base = cs_base;
8477     dc->popl_esp_hack = 0;
8478     /* select memory access functions */
8479     dc->mem_index = 0;
8480 #ifdef CONFIG_SOFTMMU
8481     dc->mem_index = cpu_mmu_index(env, false);
8482 #endif
8483     dc->cpuid_features = env->features[FEAT_1_EDX];
8484     dc->cpuid_ext_features = env->features[FEAT_1_ECX];
8485     dc->cpuid_ext2_features = env->features[FEAT_8000_0001_EDX];
8486     dc->cpuid_ext3_features = env->features[FEAT_8000_0001_ECX];
8487     dc->cpuid_7_0_ebx_features = env->features[FEAT_7_0_EBX];
8488     dc->cpuid_xsave_features = env->features[FEAT_XSAVE];
8489 #ifdef TARGET_X86_64
8490     dc->lma = (flags >> HF_LMA_SHIFT) & 1;
8491     dc->code64 = (flags >> HF_CS64_SHIFT) & 1;
8492 #endif
8493     dc->flags = flags;
8494     dc->jmp_opt = !(dc->tf || dc->base.singlestep_enabled ||
8495                     (flags & HF_INHIBIT_IRQ_MASK));
8496     /* Do not optimize repz jumps at all in icount mode, because
8497        rep movsS instructions are execured with different paths
8498        in !repz_opt and repz_opt modes. The first one was used
8499        always except single step mode. And this setting
8500        disables jumps optimization and control paths become
8501        equivalent in run and single step modes.
8502        Now there will be no jump optimization for repz in
8503        record/replay modes and there will always be an
8504        additional step for ecx=0 when icount is enabled.
8505      */
8506     dc->repz_opt = !dc->jmp_opt && !(tb_cflags(dc->base.tb) & CF_USE_ICOUNT);
8507 #if 0
8508     /* check addseg logic */
8509     if (!dc->addseg && (dc->vm86 || !dc->pe || !dc->code32))
8510         printf("ERROR addseg\n");
8511 #endif
8512 
8513     dc->T0 = tcg_temp_new();
8514     dc->T1 = tcg_temp_new();
8515     dc->A0 = tcg_temp_new();
8516 
8517     dc->tmp0 = tcg_temp_new();
8518     dc->tmp1_i64 = tcg_temp_new_i64();
8519     dc->tmp2_i32 = tcg_temp_new_i32();
8520     dc->tmp3_i32 = tcg_temp_new_i32();
8521     dc->tmp4 = tcg_temp_new();
8522     dc->ptr0 = tcg_temp_new_ptr();
8523     dc->ptr1 = tcg_temp_new_ptr();
8524     dc->cc_srcT = tcg_temp_local_new();
8525 }
8526 
8527 static void i386_tr_tb_start(DisasContextBase *db, CPUState *cpu)
8528 {
8529 }
8530 
8531 static void i386_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
8532 {
8533     DisasContext *dc = container_of(dcbase, DisasContext, base);
8534 
8535     tcg_gen_insn_start(dc->base.pc_next, dc->cc_op);
8536 }
8537 
8538 static bool i386_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cpu,
8539                                      const CPUBreakpoint *bp)
8540 {
8541     DisasContext *dc = container_of(dcbase, DisasContext, base);
8542     /* If RF is set, suppress an internally generated breakpoint.  */
8543     int flags = dc->base.tb->flags & HF_RF_MASK ? BP_GDB : BP_ANY;
8544     if (bp->flags & flags) {
8545         gen_debug(dc, dc->base.pc_next - dc->cs_base);
8546         dc->base.is_jmp = DISAS_NORETURN;
8547         /* The address covered by the breakpoint must be included in
8548            [tb->pc, tb->pc + tb->size) in order to for it to be
8549            properly cleared -- thus we increment the PC here so that
8550            the generic logic setting tb->size later does the right thing.  */
8551         dc->base.pc_next += 1;
8552         return true;
8553     } else {
8554         return false;
8555     }
8556 }
8557 
8558 static void i386_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
8559 {
8560     DisasContext *dc = container_of(dcbase, DisasContext, base);
8561     target_ulong pc_next;
8562 
8563 #ifdef TARGET_VSYSCALL_PAGE
8564     /*
8565      * Detect entry into the vsyscall page and invoke the syscall.
8566      */
8567     if ((dc->base.pc_next & TARGET_PAGE_MASK) == TARGET_VSYSCALL_PAGE) {
8568         gen_exception(dc, EXCP_VSYSCALL, dc->base.pc_next);
8569         return;
8570     }
8571 #endif
8572 
8573     pc_next = disas_insn(dc, cpu);
8574 
8575     if (dc->tf || (dc->base.tb->flags & HF_INHIBIT_IRQ_MASK)) {
8576         /* if single step mode, we generate only one instruction and
8577            generate an exception */
8578         /* if irq were inhibited with HF_INHIBIT_IRQ_MASK, we clear
8579            the flag and abort the translation to give the irqs a
8580            chance to happen */
8581         dc->base.is_jmp = DISAS_TOO_MANY;
8582     } else if ((tb_cflags(dc->base.tb) & CF_USE_ICOUNT)
8583                && ((pc_next & TARGET_PAGE_MASK)
8584                    != ((pc_next + TARGET_MAX_INSN_SIZE - 1)
8585                        & TARGET_PAGE_MASK)
8586                    || (pc_next & ~TARGET_PAGE_MASK) == 0)) {
8587         /* Do not cross the boundary of the pages in icount mode,
8588            it can cause an exception. Do it only when boundary is
8589            crossed by the first instruction in the block.
8590            If current instruction already crossed the bound - it's ok,
8591            because an exception hasn't stopped this code.
8592          */
8593         dc->base.is_jmp = DISAS_TOO_MANY;
8594     } else if ((pc_next - dc->base.pc_first) >= (TARGET_PAGE_SIZE - 32)) {
8595         dc->base.is_jmp = DISAS_TOO_MANY;
8596     }
8597 
8598     dc->base.pc_next = pc_next;
8599 }
8600 
8601 static void i386_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
8602 {
8603     DisasContext *dc = container_of(dcbase, DisasContext, base);
8604 
8605     if (dc->base.is_jmp == DISAS_TOO_MANY) {
8606         gen_jmp_im(dc, dc->base.pc_next - dc->cs_base);
8607         gen_eob(dc);
8608     }
8609 }
8610 
8611 static void i386_tr_disas_log(const DisasContextBase *dcbase,
8612                               CPUState *cpu)
8613 {
8614     DisasContext *dc = container_of(dcbase, DisasContext, base);
8615 
8616     qemu_log("IN: %s\n", lookup_symbol(dc->base.pc_first));
8617     log_target_disas(cpu, dc->base.pc_first, dc->base.tb->size);
8618 }
8619 
8620 static const TranslatorOps i386_tr_ops = {
8621     .init_disas_context = i386_tr_init_disas_context,
8622     .tb_start           = i386_tr_tb_start,
8623     .insn_start         = i386_tr_insn_start,
8624     .breakpoint_check   = i386_tr_breakpoint_check,
8625     .translate_insn     = i386_tr_translate_insn,
8626     .tb_stop            = i386_tr_tb_stop,
8627     .disas_log          = i386_tr_disas_log,
8628 };
8629 
8630 /* generate intermediate code for basic block 'tb'.  */
8631 void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns)
8632 {
8633     DisasContext dc;
8634 
8635     translator_loop(&i386_tr_ops, &dc.base, cpu, tb, max_insns);
8636 }
8637 
8638 void restore_state_to_opc(CPUX86State *env, TranslationBlock *tb,
8639                           target_ulong *data)
8640 {
8641     int cc_op = data[1];
8642     env->eip = data[0] - tb->cs_base;
8643     if (cc_op != CC_OP_DYNAMIC) {
8644         env->cc_op = cc_op;
8645     }
8646 }
8647