xref: /qemu/target/i386/tcg/translate.c (revision f917eed3)
1 /*
2  *  i386 translation
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 #include "qemu/osdep.h"
20 
21 #include "qemu/host-utils.h"
22 #include "cpu.h"
23 #include "disas/disas.h"
24 #include "exec/exec-all.h"
25 #include "tcg/tcg-op.h"
26 #include "exec/cpu_ldst.h"
27 #include "exec/translator.h"
28 
29 #include "exec/helper-proto.h"
30 #include "exec/helper-gen.h"
31 #include "helper-tcg.h"
32 
33 #include "trace-tcg.h"
34 #include "exec/log.h"
35 
36 #define PREFIX_REPZ   0x01
37 #define PREFIX_REPNZ  0x02
38 #define PREFIX_LOCK   0x04
39 #define PREFIX_DATA   0x08
40 #define PREFIX_ADR    0x10
41 #define PREFIX_VEX    0x20
42 
43 #ifdef TARGET_X86_64
44 #define CODE64(s) ((s)->code64)
45 #define REX_X(s) ((s)->rex_x)
46 #define REX_B(s) ((s)->rex_b)
47 #else
48 #define CODE64(s) 0
49 #define REX_X(s) 0
50 #define REX_B(s) 0
51 #endif
52 
53 #ifdef TARGET_X86_64
54 # define ctztl  ctz64
55 # define clztl  clz64
56 #else
57 # define ctztl  ctz32
58 # define clztl  clz32
59 #endif
60 
61 /* For a switch indexed by MODRM, match all memory operands for a given OP.  */
62 #define CASE_MODRM_MEM_OP(OP) \
63     case (0 << 6) | (OP << 3) | 0 ... (0 << 6) | (OP << 3) | 7: \
64     case (1 << 6) | (OP << 3) | 0 ... (1 << 6) | (OP << 3) | 7: \
65     case (2 << 6) | (OP << 3) | 0 ... (2 << 6) | (OP << 3) | 7
66 
67 #define CASE_MODRM_OP(OP) \
68     case (0 << 6) | (OP << 3) | 0 ... (0 << 6) | (OP << 3) | 7: \
69     case (1 << 6) | (OP << 3) | 0 ... (1 << 6) | (OP << 3) | 7: \
70     case (2 << 6) | (OP << 3) | 0 ... (2 << 6) | (OP << 3) | 7: \
71     case (3 << 6) | (OP << 3) | 0 ... (3 << 6) | (OP << 3) | 7
72 
73 //#define MACRO_TEST   1
74 
75 /* global register indexes */
76 static TCGv cpu_cc_dst, cpu_cc_src, cpu_cc_src2;
77 static TCGv_i32 cpu_cc_op;
78 static TCGv cpu_regs[CPU_NB_REGS];
79 static TCGv cpu_seg_base[6];
80 static TCGv_i64 cpu_bndl[4];
81 static TCGv_i64 cpu_bndu[4];
82 
83 #include "exec/gen-icount.h"
84 
85 typedef struct DisasContext {
86     DisasContextBase base;
87 
88     /* current insn context */
89     int override; /* -1 if no override */
90     int prefix;
91     MemOp aflag;
92     MemOp dflag;
93     target_ulong pc_start;
94     target_ulong pc; /* pc = eip + cs_base */
95     /* current block context */
96     target_ulong cs_base; /* base of CS segment */
97     int pe;     /* protected mode */
98     int code32; /* 32 bit code segment */
99 #ifdef TARGET_X86_64
100     int lma;    /* long mode active */
101     int code64; /* 64 bit code segment */
102     int rex_x, rex_b;
103 #endif
104     int vex_l;  /* vex vector length */
105     int vex_v;  /* vex vvvv register, without 1's complement.  */
106     int ss32;   /* 32 bit stack segment */
107     CCOp cc_op;  /* current CC operation */
108     bool cc_op_dirty;
109 #ifdef TARGET_X86_64
110     bool x86_64_hregs;
111 #endif
112     int addseg; /* non zero if either DS/ES/SS have a non zero base */
113     int f_st;   /* currently unused */
114     int vm86;   /* vm86 mode */
115     int cpl;
116     int iopl;
117     int tf;     /* TF cpu flag */
118     int jmp_opt; /* use direct block chaining for direct jumps */
119     int repz_opt; /* optimize jumps within repz instructions */
120     int mem_index; /* select memory access functions */
121     uint64_t flags; /* all execution flags */
122     int popl_esp_hack; /* for correct popl with esp base handling */
123     int rip_offset; /* only used in x86_64, but left for simplicity */
124     int cpuid_features;
125     int cpuid_ext_features;
126     int cpuid_ext2_features;
127     int cpuid_ext3_features;
128     int cpuid_7_0_ebx_features;
129     int cpuid_xsave_features;
130 
131     /* TCG local temps */
132     TCGv cc_srcT;
133     TCGv A0;
134     TCGv T0;
135     TCGv T1;
136 
137     /* TCG local register indexes (only used inside old micro ops) */
138     TCGv tmp0;
139     TCGv tmp4;
140     TCGv_ptr ptr0;
141     TCGv_ptr ptr1;
142     TCGv_i32 tmp2_i32;
143     TCGv_i32 tmp3_i32;
144     TCGv_i64 tmp1_i64;
145 
146     sigjmp_buf jmpbuf;
147 } DisasContext;
148 
149 static void gen_eob(DisasContext *s);
150 static void gen_jr(DisasContext *s, TCGv dest);
151 static void gen_jmp(DisasContext *s, target_ulong eip);
152 static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num);
153 static void gen_op(DisasContext *s1, int op, MemOp ot, int d);
154 
155 /* i386 arith/logic operations */
156 enum {
157     OP_ADDL,
158     OP_ORL,
159     OP_ADCL,
160     OP_SBBL,
161     OP_ANDL,
162     OP_SUBL,
163     OP_XORL,
164     OP_CMPL,
165 };
166 
167 /* i386 shift ops */
168 enum {
169     OP_ROL,
170     OP_ROR,
171     OP_RCL,
172     OP_RCR,
173     OP_SHL,
174     OP_SHR,
175     OP_SHL1, /* undocumented */
176     OP_SAR = 7,
177 };
178 
179 enum {
180     JCC_O,
181     JCC_B,
182     JCC_Z,
183     JCC_BE,
184     JCC_S,
185     JCC_P,
186     JCC_L,
187     JCC_LE,
188 };
189 
190 enum {
191     /* I386 int registers */
192     OR_EAX,   /* MUST be even numbered */
193     OR_ECX,
194     OR_EDX,
195     OR_EBX,
196     OR_ESP,
197     OR_EBP,
198     OR_ESI,
199     OR_EDI,
200 
201     OR_TMP0 = 16,    /* temporary operand register */
202     OR_TMP1,
203     OR_A0, /* temporary register used when doing address evaluation */
204 };
205 
206 enum {
207     USES_CC_DST  = 1,
208     USES_CC_SRC  = 2,
209     USES_CC_SRC2 = 4,
210     USES_CC_SRCT = 8,
211 };
212 
213 /* Bit set if the global variable is live after setting CC_OP to X.  */
214 static const uint8_t cc_op_live[CC_OP_NB] = {
215     [CC_OP_DYNAMIC] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
216     [CC_OP_EFLAGS] = USES_CC_SRC,
217     [CC_OP_MULB ... CC_OP_MULQ] = USES_CC_DST | USES_CC_SRC,
218     [CC_OP_ADDB ... CC_OP_ADDQ] = USES_CC_DST | USES_CC_SRC,
219     [CC_OP_ADCB ... CC_OP_ADCQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
220     [CC_OP_SUBB ... CC_OP_SUBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRCT,
221     [CC_OP_SBBB ... CC_OP_SBBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
222     [CC_OP_LOGICB ... CC_OP_LOGICQ] = USES_CC_DST,
223     [CC_OP_INCB ... CC_OP_INCQ] = USES_CC_DST | USES_CC_SRC,
224     [CC_OP_DECB ... CC_OP_DECQ] = USES_CC_DST | USES_CC_SRC,
225     [CC_OP_SHLB ... CC_OP_SHLQ] = USES_CC_DST | USES_CC_SRC,
226     [CC_OP_SARB ... CC_OP_SARQ] = USES_CC_DST | USES_CC_SRC,
227     [CC_OP_BMILGB ... CC_OP_BMILGQ] = USES_CC_DST | USES_CC_SRC,
228     [CC_OP_ADCX] = USES_CC_DST | USES_CC_SRC,
229     [CC_OP_ADOX] = USES_CC_SRC | USES_CC_SRC2,
230     [CC_OP_ADCOX] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
231     [CC_OP_CLR] = 0,
232     [CC_OP_POPCNT] = USES_CC_SRC,
233 };
234 
235 static void set_cc_op(DisasContext *s, CCOp op)
236 {
237     int dead;
238 
239     if (s->cc_op == op) {
240         return;
241     }
242 
243     /* Discard CC computation that will no longer be used.  */
244     dead = cc_op_live[s->cc_op] & ~cc_op_live[op];
245     if (dead & USES_CC_DST) {
246         tcg_gen_discard_tl(cpu_cc_dst);
247     }
248     if (dead & USES_CC_SRC) {
249         tcg_gen_discard_tl(cpu_cc_src);
250     }
251     if (dead & USES_CC_SRC2) {
252         tcg_gen_discard_tl(cpu_cc_src2);
253     }
254     if (dead & USES_CC_SRCT) {
255         tcg_gen_discard_tl(s->cc_srcT);
256     }
257 
258     if (op == CC_OP_DYNAMIC) {
259         /* The DYNAMIC setting is translator only, and should never be
260            stored.  Thus we always consider it clean.  */
261         s->cc_op_dirty = false;
262     } else {
263         /* Discard any computed CC_OP value (see shifts).  */
264         if (s->cc_op == CC_OP_DYNAMIC) {
265             tcg_gen_discard_i32(cpu_cc_op);
266         }
267         s->cc_op_dirty = true;
268     }
269     s->cc_op = op;
270 }
271 
272 static void gen_update_cc_op(DisasContext *s)
273 {
274     if (s->cc_op_dirty) {
275         tcg_gen_movi_i32(cpu_cc_op, s->cc_op);
276         s->cc_op_dirty = false;
277     }
278 }
279 
280 #ifdef TARGET_X86_64
281 
282 #define NB_OP_SIZES 4
283 
284 #else /* !TARGET_X86_64 */
285 
286 #define NB_OP_SIZES 3
287 
288 #endif /* !TARGET_X86_64 */
289 
290 #if defined(HOST_WORDS_BIGENDIAN)
291 #define REG_B_OFFSET (sizeof(target_ulong) - 1)
292 #define REG_H_OFFSET (sizeof(target_ulong) - 2)
293 #define REG_W_OFFSET (sizeof(target_ulong) - 2)
294 #define REG_L_OFFSET (sizeof(target_ulong) - 4)
295 #define REG_LH_OFFSET (sizeof(target_ulong) - 8)
296 #else
297 #define REG_B_OFFSET 0
298 #define REG_H_OFFSET 1
299 #define REG_W_OFFSET 0
300 #define REG_L_OFFSET 0
301 #define REG_LH_OFFSET 4
302 #endif
303 
304 /* In instruction encodings for byte register accesses the
305  * register number usually indicates "low 8 bits of register N";
306  * however there are some special cases where N 4..7 indicates
307  * [AH, CH, DH, BH], ie "bits 15..8 of register N-4". Return
308  * true for this special case, false otherwise.
309  */
310 static inline bool byte_reg_is_xH(DisasContext *s, int reg)
311 {
312     if (reg < 4) {
313         return false;
314     }
315 #ifdef TARGET_X86_64
316     if (reg >= 8 || s->x86_64_hregs) {
317         return false;
318     }
319 #endif
320     return true;
321 }
322 
323 /* Select the size of a push/pop operation.  */
324 static inline MemOp mo_pushpop(DisasContext *s, MemOp ot)
325 {
326     if (CODE64(s)) {
327         return ot == MO_16 ? MO_16 : MO_64;
328     } else {
329         return ot;
330     }
331 }
332 
333 /* Select the size of the stack pointer.  */
334 static inline MemOp mo_stacksize(DisasContext *s)
335 {
336     return CODE64(s) ? MO_64 : s->ss32 ? MO_32 : MO_16;
337 }
338 
339 /* Select only size 64 else 32.  Used for SSE operand sizes.  */
340 static inline MemOp mo_64_32(MemOp ot)
341 {
342 #ifdef TARGET_X86_64
343     return ot == MO_64 ? MO_64 : MO_32;
344 #else
345     return MO_32;
346 #endif
347 }
348 
349 /* Select size 8 if lsb of B is clear, else OT.  Used for decoding
350    byte vs word opcodes.  */
351 static inline MemOp mo_b_d(int b, MemOp ot)
352 {
353     return b & 1 ? ot : MO_8;
354 }
355 
356 /* Select size 8 if lsb of B is clear, else OT capped at 32.
357    Used for decoding operand size of port opcodes.  */
358 static inline MemOp mo_b_d32(int b, MemOp ot)
359 {
360     return b & 1 ? (ot == MO_16 ? MO_16 : MO_32) : MO_8;
361 }
362 
363 static void gen_op_mov_reg_v(DisasContext *s, MemOp ot, int reg, TCGv t0)
364 {
365     switch(ot) {
366     case MO_8:
367         if (!byte_reg_is_xH(s, reg)) {
368             tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 8);
369         } else {
370             tcg_gen_deposit_tl(cpu_regs[reg - 4], cpu_regs[reg - 4], t0, 8, 8);
371         }
372         break;
373     case MO_16:
374         tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 16);
375         break;
376     case MO_32:
377         /* For x86_64, this sets the higher half of register to zero.
378            For i386, this is equivalent to a mov. */
379         tcg_gen_ext32u_tl(cpu_regs[reg], t0);
380         break;
381 #ifdef TARGET_X86_64
382     case MO_64:
383         tcg_gen_mov_tl(cpu_regs[reg], t0);
384         break;
385 #endif
386     default:
387         tcg_abort();
388     }
389 }
390 
391 static inline
392 void gen_op_mov_v_reg(DisasContext *s, MemOp ot, TCGv t0, int reg)
393 {
394     if (ot == MO_8 && byte_reg_is_xH(s, reg)) {
395         tcg_gen_extract_tl(t0, cpu_regs[reg - 4], 8, 8);
396     } else {
397         tcg_gen_mov_tl(t0, cpu_regs[reg]);
398     }
399 }
400 
401 static void gen_add_A0_im(DisasContext *s, int val)
402 {
403     tcg_gen_addi_tl(s->A0, s->A0, val);
404     if (!CODE64(s)) {
405         tcg_gen_ext32u_tl(s->A0, s->A0);
406     }
407 }
408 
409 static inline void gen_op_jmp_v(TCGv dest)
410 {
411     tcg_gen_st_tl(dest, cpu_env, offsetof(CPUX86State, eip));
412 }
413 
414 static inline
415 void gen_op_add_reg_im(DisasContext *s, MemOp size, int reg, int32_t val)
416 {
417     tcg_gen_addi_tl(s->tmp0, cpu_regs[reg], val);
418     gen_op_mov_reg_v(s, size, reg, s->tmp0);
419 }
420 
421 static inline void gen_op_add_reg_T0(DisasContext *s, MemOp size, int reg)
422 {
423     tcg_gen_add_tl(s->tmp0, cpu_regs[reg], s->T0);
424     gen_op_mov_reg_v(s, size, reg, s->tmp0);
425 }
426 
427 static inline void gen_op_ld_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
428 {
429     tcg_gen_qemu_ld_tl(t0, a0, s->mem_index, idx | MO_LE);
430 }
431 
432 static inline void gen_op_st_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
433 {
434     tcg_gen_qemu_st_tl(t0, a0, s->mem_index, idx | MO_LE);
435 }
436 
437 static inline void gen_op_st_rm_T0_A0(DisasContext *s, int idx, int d)
438 {
439     if (d == OR_TMP0) {
440         gen_op_st_v(s, idx, s->T0, s->A0);
441     } else {
442         gen_op_mov_reg_v(s, idx, d, s->T0);
443     }
444 }
445 
446 static inline void gen_jmp_im(DisasContext *s, target_ulong pc)
447 {
448     tcg_gen_movi_tl(s->tmp0, pc);
449     gen_op_jmp_v(s->tmp0);
450 }
451 
452 /* Compute SEG:REG into A0.  SEG is selected from the override segment
453    (OVR_SEG) and the default segment (DEF_SEG).  OVR_SEG may be -1 to
454    indicate no override.  */
455 static void gen_lea_v_seg(DisasContext *s, MemOp aflag, TCGv a0,
456                           int def_seg, int ovr_seg)
457 {
458     switch (aflag) {
459 #ifdef TARGET_X86_64
460     case MO_64:
461         if (ovr_seg < 0) {
462             tcg_gen_mov_tl(s->A0, a0);
463             return;
464         }
465         break;
466 #endif
467     case MO_32:
468         /* 32 bit address */
469         if (ovr_seg < 0 && s->addseg) {
470             ovr_seg = def_seg;
471         }
472         if (ovr_seg < 0) {
473             tcg_gen_ext32u_tl(s->A0, a0);
474             return;
475         }
476         break;
477     case MO_16:
478         /* 16 bit address */
479         tcg_gen_ext16u_tl(s->A0, a0);
480         a0 = s->A0;
481         if (ovr_seg < 0) {
482             if (s->addseg) {
483                 ovr_seg = def_seg;
484             } else {
485                 return;
486             }
487         }
488         break;
489     default:
490         tcg_abort();
491     }
492 
493     if (ovr_seg >= 0) {
494         TCGv seg = cpu_seg_base[ovr_seg];
495 
496         if (aflag == MO_64) {
497             tcg_gen_add_tl(s->A0, a0, seg);
498         } else if (CODE64(s)) {
499             tcg_gen_ext32u_tl(s->A0, a0);
500             tcg_gen_add_tl(s->A0, s->A0, seg);
501         } else {
502             tcg_gen_add_tl(s->A0, a0, seg);
503             tcg_gen_ext32u_tl(s->A0, s->A0);
504         }
505     }
506 }
507 
508 static inline void gen_string_movl_A0_ESI(DisasContext *s)
509 {
510     gen_lea_v_seg(s, s->aflag, cpu_regs[R_ESI], R_DS, s->override);
511 }
512 
513 static inline void gen_string_movl_A0_EDI(DisasContext *s)
514 {
515     gen_lea_v_seg(s, s->aflag, cpu_regs[R_EDI], R_ES, -1);
516 }
517 
518 static inline void gen_op_movl_T0_Dshift(DisasContext *s, MemOp ot)
519 {
520     tcg_gen_ld32s_tl(s->T0, cpu_env, offsetof(CPUX86State, df));
521     tcg_gen_shli_tl(s->T0, s->T0, ot);
522 };
523 
524 static TCGv gen_ext_tl(TCGv dst, TCGv src, MemOp size, bool sign)
525 {
526     switch (size) {
527     case MO_8:
528         if (sign) {
529             tcg_gen_ext8s_tl(dst, src);
530         } else {
531             tcg_gen_ext8u_tl(dst, src);
532         }
533         return dst;
534     case MO_16:
535         if (sign) {
536             tcg_gen_ext16s_tl(dst, src);
537         } else {
538             tcg_gen_ext16u_tl(dst, src);
539         }
540         return dst;
541 #ifdef TARGET_X86_64
542     case MO_32:
543         if (sign) {
544             tcg_gen_ext32s_tl(dst, src);
545         } else {
546             tcg_gen_ext32u_tl(dst, src);
547         }
548         return dst;
549 #endif
550     default:
551         return src;
552     }
553 }
554 
555 static void gen_extu(MemOp ot, TCGv reg)
556 {
557     gen_ext_tl(reg, reg, ot, false);
558 }
559 
560 static void gen_exts(MemOp ot, TCGv reg)
561 {
562     gen_ext_tl(reg, reg, ot, true);
563 }
564 
565 static inline
566 void gen_op_jnz_ecx(DisasContext *s, MemOp size, TCGLabel *label1)
567 {
568     tcg_gen_mov_tl(s->tmp0, cpu_regs[R_ECX]);
569     gen_extu(size, s->tmp0);
570     tcg_gen_brcondi_tl(TCG_COND_NE, s->tmp0, 0, label1);
571 }
572 
573 static inline
574 void gen_op_jz_ecx(DisasContext *s, MemOp size, TCGLabel *label1)
575 {
576     tcg_gen_mov_tl(s->tmp0, cpu_regs[R_ECX]);
577     gen_extu(size, s->tmp0);
578     tcg_gen_brcondi_tl(TCG_COND_EQ, s->tmp0, 0, label1);
579 }
580 
581 static void gen_helper_in_func(MemOp ot, TCGv v, TCGv_i32 n)
582 {
583     switch (ot) {
584     case MO_8:
585         gen_helper_inb(v, cpu_env, n);
586         break;
587     case MO_16:
588         gen_helper_inw(v, cpu_env, n);
589         break;
590     case MO_32:
591         gen_helper_inl(v, cpu_env, n);
592         break;
593     default:
594         tcg_abort();
595     }
596 }
597 
598 static void gen_helper_out_func(MemOp ot, TCGv_i32 v, TCGv_i32 n)
599 {
600     switch (ot) {
601     case MO_8:
602         gen_helper_outb(cpu_env, v, n);
603         break;
604     case MO_16:
605         gen_helper_outw(cpu_env, v, n);
606         break;
607     case MO_32:
608         gen_helper_outl(cpu_env, v, n);
609         break;
610     default:
611         tcg_abort();
612     }
613 }
614 
615 static void gen_check_io(DisasContext *s, MemOp ot, target_ulong cur_eip,
616                          uint32_t svm_flags)
617 {
618     target_ulong next_eip;
619 
620     if (s->pe && (s->cpl > s->iopl || s->vm86)) {
621         tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
622         switch (ot) {
623         case MO_8:
624             gen_helper_check_iob(cpu_env, s->tmp2_i32);
625             break;
626         case MO_16:
627             gen_helper_check_iow(cpu_env, s->tmp2_i32);
628             break;
629         case MO_32:
630             gen_helper_check_iol(cpu_env, s->tmp2_i32);
631             break;
632         default:
633             tcg_abort();
634         }
635     }
636     if(s->flags & HF_GUEST_MASK) {
637         gen_update_cc_op(s);
638         gen_jmp_im(s, cur_eip);
639         svm_flags |= (1 << (4 + ot));
640         next_eip = s->pc - s->cs_base;
641         tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
642         gen_helper_svm_check_io(cpu_env, s->tmp2_i32,
643                                 tcg_const_i32(svm_flags),
644                                 tcg_const_i32(next_eip - cur_eip));
645     }
646 }
647 
648 static inline void gen_movs(DisasContext *s, MemOp ot)
649 {
650     gen_string_movl_A0_ESI(s);
651     gen_op_ld_v(s, ot, s->T0, s->A0);
652     gen_string_movl_A0_EDI(s);
653     gen_op_st_v(s, ot, s->T0, s->A0);
654     gen_op_movl_T0_Dshift(s, ot);
655     gen_op_add_reg_T0(s, s->aflag, R_ESI);
656     gen_op_add_reg_T0(s, s->aflag, R_EDI);
657 }
658 
659 static void gen_op_update1_cc(DisasContext *s)
660 {
661     tcg_gen_mov_tl(cpu_cc_dst, s->T0);
662 }
663 
664 static void gen_op_update2_cc(DisasContext *s)
665 {
666     tcg_gen_mov_tl(cpu_cc_src, s->T1);
667     tcg_gen_mov_tl(cpu_cc_dst, s->T0);
668 }
669 
670 static void gen_op_update3_cc(DisasContext *s, TCGv reg)
671 {
672     tcg_gen_mov_tl(cpu_cc_src2, reg);
673     tcg_gen_mov_tl(cpu_cc_src, s->T1);
674     tcg_gen_mov_tl(cpu_cc_dst, s->T0);
675 }
676 
677 static inline void gen_op_testl_T0_T1_cc(DisasContext *s)
678 {
679     tcg_gen_and_tl(cpu_cc_dst, s->T0, s->T1);
680 }
681 
682 static void gen_op_update_neg_cc(DisasContext *s)
683 {
684     tcg_gen_mov_tl(cpu_cc_dst, s->T0);
685     tcg_gen_neg_tl(cpu_cc_src, s->T0);
686     tcg_gen_movi_tl(s->cc_srcT, 0);
687 }
688 
689 /* compute all eflags to cc_src */
690 static void gen_compute_eflags(DisasContext *s)
691 {
692     TCGv zero, dst, src1, src2;
693     int live, dead;
694 
695     if (s->cc_op == CC_OP_EFLAGS) {
696         return;
697     }
698     if (s->cc_op == CC_OP_CLR) {
699         tcg_gen_movi_tl(cpu_cc_src, CC_Z | CC_P);
700         set_cc_op(s, CC_OP_EFLAGS);
701         return;
702     }
703 
704     zero = NULL;
705     dst = cpu_cc_dst;
706     src1 = cpu_cc_src;
707     src2 = cpu_cc_src2;
708 
709     /* Take care to not read values that are not live.  */
710     live = cc_op_live[s->cc_op] & ~USES_CC_SRCT;
711     dead = live ^ (USES_CC_DST | USES_CC_SRC | USES_CC_SRC2);
712     if (dead) {
713         zero = tcg_const_tl(0);
714         if (dead & USES_CC_DST) {
715             dst = zero;
716         }
717         if (dead & USES_CC_SRC) {
718             src1 = zero;
719         }
720         if (dead & USES_CC_SRC2) {
721             src2 = zero;
722         }
723     }
724 
725     gen_update_cc_op(s);
726     gen_helper_cc_compute_all(cpu_cc_src, dst, src1, src2, cpu_cc_op);
727     set_cc_op(s, CC_OP_EFLAGS);
728 
729     if (dead) {
730         tcg_temp_free(zero);
731     }
732 }
733 
734 typedef struct CCPrepare {
735     TCGCond cond;
736     TCGv reg;
737     TCGv reg2;
738     target_ulong imm;
739     target_ulong mask;
740     bool use_reg2;
741     bool no_setcond;
742 } CCPrepare;
743 
744 /* compute eflags.C to reg */
745 static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
746 {
747     TCGv t0, t1;
748     int size, shift;
749 
750     switch (s->cc_op) {
751     case CC_OP_SUBB ... CC_OP_SUBQ:
752         /* (DATA_TYPE)CC_SRCT < (DATA_TYPE)CC_SRC */
753         size = s->cc_op - CC_OP_SUBB;
754         t1 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
755         /* If no temporary was used, be careful not to alias t1 and t0.  */
756         t0 = t1 == cpu_cc_src ? s->tmp0 : reg;
757         tcg_gen_mov_tl(t0, s->cc_srcT);
758         gen_extu(size, t0);
759         goto add_sub;
760 
761     case CC_OP_ADDB ... CC_OP_ADDQ:
762         /* (DATA_TYPE)CC_DST < (DATA_TYPE)CC_SRC */
763         size = s->cc_op - CC_OP_ADDB;
764         t1 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
765         t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
766     add_sub:
767         return (CCPrepare) { .cond = TCG_COND_LTU, .reg = t0,
768                              .reg2 = t1, .mask = -1, .use_reg2 = true };
769 
770     case CC_OP_LOGICB ... CC_OP_LOGICQ:
771     case CC_OP_CLR:
772     case CC_OP_POPCNT:
773         return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
774 
775     case CC_OP_INCB ... CC_OP_INCQ:
776     case CC_OP_DECB ... CC_OP_DECQ:
777         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
778                              .mask = -1, .no_setcond = true };
779 
780     case CC_OP_SHLB ... CC_OP_SHLQ:
781         /* (CC_SRC >> (DATA_BITS - 1)) & 1 */
782         size = s->cc_op - CC_OP_SHLB;
783         shift = (8 << size) - 1;
784         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
785                              .mask = (target_ulong)1 << shift };
786 
787     case CC_OP_MULB ... CC_OP_MULQ:
788         return (CCPrepare) { .cond = TCG_COND_NE,
789                              .reg = cpu_cc_src, .mask = -1 };
790 
791     case CC_OP_BMILGB ... CC_OP_BMILGQ:
792         size = s->cc_op - CC_OP_BMILGB;
793         t0 = gen_ext_tl(reg, cpu_cc_src, size, false);
794         return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
795 
796     case CC_OP_ADCX:
797     case CC_OP_ADCOX:
798         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_dst,
799                              .mask = -1, .no_setcond = true };
800 
801     case CC_OP_EFLAGS:
802     case CC_OP_SARB ... CC_OP_SARQ:
803         /* CC_SRC & 1 */
804         return (CCPrepare) { .cond = TCG_COND_NE,
805                              .reg = cpu_cc_src, .mask = CC_C };
806 
807     default:
808        /* The need to compute only C from CC_OP_DYNAMIC is important
809           in efficiently implementing e.g. INC at the start of a TB.  */
810        gen_update_cc_op(s);
811        gen_helper_cc_compute_c(reg, cpu_cc_dst, cpu_cc_src,
812                                cpu_cc_src2, cpu_cc_op);
813        return (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
814                             .mask = -1, .no_setcond = true };
815     }
816 }
817 
818 /* compute eflags.P to reg */
819 static CCPrepare gen_prepare_eflags_p(DisasContext *s, TCGv reg)
820 {
821     gen_compute_eflags(s);
822     return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
823                          .mask = CC_P };
824 }
825 
826 /* compute eflags.S to reg */
827 static CCPrepare gen_prepare_eflags_s(DisasContext *s, TCGv reg)
828 {
829     switch (s->cc_op) {
830     case CC_OP_DYNAMIC:
831         gen_compute_eflags(s);
832         /* FALLTHRU */
833     case CC_OP_EFLAGS:
834     case CC_OP_ADCX:
835     case CC_OP_ADOX:
836     case CC_OP_ADCOX:
837         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
838                              .mask = CC_S };
839     case CC_OP_CLR:
840     case CC_OP_POPCNT:
841         return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
842     default:
843         {
844             MemOp size = (s->cc_op - CC_OP_ADDB) & 3;
845             TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, true);
846             return (CCPrepare) { .cond = TCG_COND_LT, .reg = t0, .mask = -1 };
847         }
848     }
849 }
850 
851 /* compute eflags.O to reg */
852 static CCPrepare gen_prepare_eflags_o(DisasContext *s, TCGv reg)
853 {
854     switch (s->cc_op) {
855     case CC_OP_ADOX:
856     case CC_OP_ADCOX:
857         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src2,
858                              .mask = -1, .no_setcond = true };
859     case CC_OP_CLR:
860     case CC_OP_POPCNT:
861         return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
862     default:
863         gen_compute_eflags(s);
864         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
865                              .mask = CC_O };
866     }
867 }
868 
869 /* compute eflags.Z to reg */
870 static CCPrepare gen_prepare_eflags_z(DisasContext *s, TCGv reg)
871 {
872     switch (s->cc_op) {
873     case CC_OP_DYNAMIC:
874         gen_compute_eflags(s);
875         /* FALLTHRU */
876     case CC_OP_EFLAGS:
877     case CC_OP_ADCX:
878     case CC_OP_ADOX:
879     case CC_OP_ADCOX:
880         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
881                              .mask = CC_Z };
882     case CC_OP_CLR:
883         return (CCPrepare) { .cond = TCG_COND_ALWAYS, .mask = -1 };
884     case CC_OP_POPCNT:
885         return (CCPrepare) { .cond = TCG_COND_EQ, .reg = cpu_cc_src,
886                              .mask = -1 };
887     default:
888         {
889             MemOp size = (s->cc_op - CC_OP_ADDB) & 3;
890             TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
891             return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
892         }
893     }
894 }
895 
896 /* perform a conditional store into register 'reg' according to jump opcode
897    value 'b'. In the fast case, T0 is guaranted not to be used. */
898 static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
899 {
900     int inv, jcc_op, cond;
901     MemOp size;
902     CCPrepare cc;
903     TCGv t0;
904 
905     inv = b & 1;
906     jcc_op = (b >> 1) & 7;
907 
908     switch (s->cc_op) {
909     case CC_OP_SUBB ... CC_OP_SUBQ:
910         /* We optimize relational operators for the cmp/jcc case.  */
911         size = s->cc_op - CC_OP_SUBB;
912         switch (jcc_op) {
913         case JCC_BE:
914             tcg_gen_mov_tl(s->tmp4, s->cc_srcT);
915             gen_extu(size, s->tmp4);
916             t0 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
917             cc = (CCPrepare) { .cond = TCG_COND_LEU, .reg = s->tmp4,
918                                .reg2 = t0, .mask = -1, .use_reg2 = true };
919             break;
920 
921         case JCC_L:
922             cond = TCG_COND_LT;
923             goto fast_jcc_l;
924         case JCC_LE:
925             cond = TCG_COND_LE;
926         fast_jcc_l:
927             tcg_gen_mov_tl(s->tmp4, s->cc_srcT);
928             gen_exts(size, s->tmp4);
929             t0 = gen_ext_tl(s->tmp0, cpu_cc_src, size, true);
930             cc = (CCPrepare) { .cond = cond, .reg = s->tmp4,
931                                .reg2 = t0, .mask = -1, .use_reg2 = true };
932             break;
933 
934         default:
935             goto slow_jcc;
936         }
937         break;
938 
939     default:
940     slow_jcc:
941         /* This actually generates good code for JC, JZ and JS.  */
942         switch (jcc_op) {
943         case JCC_O:
944             cc = gen_prepare_eflags_o(s, reg);
945             break;
946         case JCC_B:
947             cc = gen_prepare_eflags_c(s, reg);
948             break;
949         case JCC_Z:
950             cc = gen_prepare_eflags_z(s, reg);
951             break;
952         case JCC_BE:
953             gen_compute_eflags(s);
954             cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
955                                .mask = CC_Z | CC_C };
956             break;
957         case JCC_S:
958             cc = gen_prepare_eflags_s(s, reg);
959             break;
960         case JCC_P:
961             cc = gen_prepare_eflags_p(s, reg);
962             break;
963         case JCC_L:
964             gen_compute_eflags(s);
965             if (reg == cpu_cc_src) {
966                 reg = s->tmp0;
967             }
968             tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
969             tcg_gen_xor_tl(reg, reg, cpu_cc_src);
970             cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
971                                .mask = CC_S };
972             break;
973         default:
974         case JCC_LE:
975             gen_compute_eflags(s);
976             if (reg == cpu_cc_src) {
977                 reg = s->tmp0;
978             }
979             tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
980             tcg_gen_xor_tl(reg, reg, cpu_cc_src);
981             cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
982                                .mask = CC_S | CC_Z };
983             break;
984         }
985         break;
986     }
987 
988     if (inv) {
989         cc.cond = tcg_invert_cond(cc.cond);
990     }
991     return cc;
992 }
993 
994 static void gen_setcc1(DisasContext *s, int b, TCGv reg)
995 {
996     CCPrepare cc = gen_prepare_cc(s, b, reg);
997 
998     if (cc.no_setcond) {
999         if (cc.cond == TCG_COND_EQ) {
1000             tcg_gen_xori_tl(reg, cc.reg, 1);
1001         } else {
1002             tcg_gen_mov_tl(reg, cc.reg);
1003         }
1004         return;
1005     }
1006 
1007     if (cc.cond == TCG_COND_NE && !cc.use_reg2 && cc.imm == 0 &&
1008         cc.mask != 0 && (cc.mask & (cc.mask - 1)) == 0) {
1009         tcg_gen_shri_tl(reg, cc.reg, ctztl(cc.mask));
1010         tcg_gen_andi_tl(reg, reg, 1);
1011         return;
1012     }
1013     if (cc.mask != -1) {
1014         tcg_gen_andi_tl(reg, cc.reg, cc.mask);
1015         cc.reg = reg;
1016     }
1017     if (cc.use_reg2) {
1018         tcg_gen_setcond_tl(cc.cond, reg, cc.reg, cc.reg2);
1019     } else {
1020         tcg_gen_setcondi_tl(cc.cond, reg, cc.reg, cc.imm);
1021     }
1022 }
1023 
1024 static inline void gen_compute_eflags_c(DisasContext *s, TCGv reg)
1025 {
1026     gen_setcc1(s, JCC_B << 1, reg);
1027 }
1028 
1029 /* generate a conditional jump to label 'l1' according to jump opcode
1030    value 'b'. In the fast case, T0 is guaranted not to be used. */
1031 static inline void gen_jcc1_noeob(DisasContext *s, int b, TCGLabel *l1)
1032 {
1033     CCPrepare cc = gen_prepare_cc(s, b, s->T0);
1034 
1035     if (cc.mask != -1) {
1036         tcg_gen_andi_tl(s->T0, cc.reg, cc.mask);
1037         cc.reg = s->T0;
1038     }
1039     if (cc.use_reg2) {
1040         tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
1041     } else {
1042         tcg_gen_brcondi_tl(cc.cond, cc.reg, cc.imm, l1);
1043     }
1044 }
1045 
1046 /* Generate a conditional jump to label 'l1' according to jump opcode
1047    value 'b'. In the fast case, T0 is guaranted not to be used.
1048    A translation block must end soon.  */
1049 static inline void gen_jcc1(DisasContext *s, int b, TCGLabel *l1)
1050 {
1051     CCPrepare cc = gen_prepare_cc(s, b, s->T0);
1052 
1053     gen_update_cc_op(s);
1054     if (cc.mask != -1) {
1055         tcg_gen_andi_tl(s->T0, cc.reg, cc.mask);
1056         cc.reg = s->T0;
1057     }
1058     set_cc_op(s, CC_OP_DYNAMIC);
1059     if (cc.use_reg2) {
1060         tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
1061     } else {
1062         tcg_gen_brcondi_tl(cc.cond, cc.reg, cc.imm, l1);
1063     }
1064 }
1065 
1066 /* XXX: does not work with gdbstub "ice" single step - not a
1067    serious problem */
1068 static TCGLabel *gen_jz_ecx_string(DisasContext *s, target_ulong next_eip)
1069 {
1070     TCGLabel *l1 = gen_new_label();
1071     TCGLabel *l2 = gen_new_label();
1072     gen_op_jnz_ecx(s, s->aflag, l1);
1073     gen_set_label(l2);
1074     gen_jmp_tb(s, next_eip, 1);
1075     gen_set_label(l1);
1076     return l2;
1077 }
1078 
1079 static inline void gen_stos(DisasContext *s, MemOp ot)
1080 {
1081     gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
1082     gen_string_movl_A0_EDI(s);
1083     gen_op_st_v(s, ot, s->T0, s->A0);
1084     gen_op_movl_T0_Dshift(s, ot);
1085     gen_op_add_reg_T0(s, s->aflag, R_EDI);
1086 }
1087 
1088 static inline void gen_lods(DisasContext *s, MemOp ot)
1089 {
1090     gen_string_movl_A0_ESI(s);
1091     gen_op_ld_v(s, ot, s->T0, s->A0);
1092     gen_op_mov_reg_v(s, ot, R_EAX, s->T0);
1093     gen_op_movl_T0_Dshift(s, ot);
1094     gen_op_add_reg_T0(s, s->aflag, R_ESI);
1095 }
1096 
1097 static inline void gen_scas(DisasContext *s, MemOp ot)
1098 {
1099     gen_string_movl_A0_EDI(s);
1100     gen_op_ld_v(s, ot, s->T1, s->A0);
1101     gen_op(s, OP_CMPL, ot, R_EAX);
1102     gen_op_movl_T0_Dshift(s, ot);
1103     gen_op_add_reg_T0(s, s->aflag, R_EDI);
1104 }
1105 
1106 static inline void gen_cmps(DisasContext *s, MemOp ot)
1107 {
1108     gen_string_movl_A0_EDI(s);
1109     gen_op_ld_v(s, ot, s->T1, s->A0);
1110     gen_string_movl_A0_ESI(s);
1111     gen_op(s, OP_CMPL, ot, OR_TMP0);
1112     gen_op_movl_T0_Dshift(s, ot);
1113     gen_op_add_reg_T0(s, s->aflag, R_ESI);
1114     gen_op_add_reg_T0(s, s->aflag, R_EDI);
1115 }
1116 
1117 static void gen_bpt_io(DisasContext *s, TCGv_i32 t_port, int ot)
1118 {
1119     if (s->flags & HF_IOBPT_MASK) {
1120         TCGv_i32 t_size = tcg_const_i32(1 << ot);
1121         TCGv t_next = tcg_const_tl(s->pc - s->cs_base);
1122 
1123         gen_helper_bpt_io(cpu_env, t_port, t_size, t_next);
1124         tcg_temp_free_i32(t_size);
1125         tcg_temp_free(t_next);
1126     }
1127 }
1128 
1129 
1130 static inline void gen_ins(DisasContext *s, MemOp ot)
1131 {
1132     gen_string_movl_A0_EDI(s);
1133     /* Note: we must do this dummy write first to be restartable in
1134        case of page fault. */
1135     tcg_gen_movi_tl(s->T0, 0);
1136     gen_op_st_v(s, ot, s->T0, s->A0);
1137     tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
1138     tcg_gen_andi_i32(s->tmp2_i32, s->tmp2_i32, 0xffff);
1139     gen_helper_in_func(ot, s->T0, s->tmp2_i32);
1140     gen_op_st_v(s, ot, s->T0, s->A0);
1141     gen_op_movl_T0_Dshift(s, ot);
1142     gen_op_add_reg_T0(s, s->aflag, R_EDI);
1143     gen_bpt_io(s, s->tmp2_i32, ot);
1144 }
1145 
1146 static inline void gen_outs(DisasContext *s, MemOp ot)
1147 {
1148     gen_string_movl_A0_ESI(s);
1149     gen_op_ld_v(s, ot, s->T0, s->A0);
1150 
1151     tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
1152     tcg_gen_andi_i32(s->tmp2_i32, s->tmp2_i32, 0xffff);
1153     tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T0);
1154     gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32);
1155     gen_op_movl_T0_Dshift(s, ot);
1156     gen_op_add_reg_T0(s, s->aflag, R_ESI);
1157     gen_bpt_io(s, s->tmp2_i32, ot);
1158 }
1159 
1160 /* same method as Valgrind : we generate jumps to current or next
1161    instruction */
1162 #define GEN_REPZ(op)                                                          \
1163 static inline void gen_repz_ ## op(DisasContext *s, MemOp ot,              \
1164                                  target_ulong cur_eip, target_ulong next_eip) \
1165 {                                                                             \
1166     TCGLabel *l2;                                                             \
1167     gen_update_cc_op(s);                                                      \
1168     l2 = gen_jz_ecx_string(s, next_eip);                                      \
1169     gen_ ## op(s, ot);                                                        \
1170     gen_op_add_reg_im(s, s->aflag, R_ECX, -1);                                \
1171     /* a loop would cause two single step exceptions if ECX = 1               \
1172        before rep string_insn */                                              \
1173     if (s->repz_opt)                                                          \
1174         gen_op_jz_ecx(s, s->aflag, l2);                                       \
1175     gen_jmp(s, cur_eip);                                                      \
1176 }
1177 
1178 #define GEN_REPZ2(op)                                                         \
1179 static inline void gen_repz_ ## op(DisasContext *s, MemOp ot,              \
1180                                    target_ulong cur_eip,                      \
1181                                    target_ulong next_eip,                     \
1182                                    int nz)                                    \
1183 {                                                                             \
1184     TCGLabel *l2;                                                             \
1185     gen_update_cc_op(s);                                                      \
1186     l2 = gen_jz_ecx_string(s, next_eip);                                      \
1187     gen_ ## op(s, ot);                                                        \
1188     gen_op_add_reg_im(s, s->aflag, R_ECX, -1);                                \
1189     gen_update_cc_op(s);                                                      \
1190     gen_jcc1(s, (JCC_Z << 1) | (nz ^ 1), l2);                                 \
1191     if (s->repz_opt)                                                          \
1192         gen_op_jz_ecx(s, s->aflag, l2);                                       \
1193     gen_jmp(s, cur_eip);                                                      \
1194 }
1195 
1196 GEN_REPZ(movs)
1197 GEN_REPZ(stos)
1198 GEN_REPZ(lods)
1199 GEN_REPZ(ins)
1200 GEN_REPZ(outs)
1201 GEN_REPZ2(scas)
1202 GEN_REPZ2(cmps)
1203 
1204 static void gen_helper_fp_arith_ST0_FT0(int op)
1205 {
1206     switch (op) {
1207     case 0:
1208         gen_helper_fadd_ST0_FT0(cpu_env);
1209         break;
1210     case 1:
1211         gen_helper_fmul_ST0_FT0(cpu_env);
1212         break;
1213     case 2:
1214         gen_helper_fcom_ST0_FT0(cpu_env);
1215         break;
1216     case 3:
1217         gen_helper_fcom_ST0_FT0(cpu_env);
1218         break;
1219     case 4:
1220         gen_helper_fsub_ST0_FT0(cpu_env);
1221         break;
1222     case 5:
1223         gen_helper_fsubr_ST0_FT0(cpu_env);
1224         break;
1225     case 6:
1226         gen_helper_fdiv_ST0_FT0(cpu_env);
1227         break;
1228     case 7:
1229         gen_helper_fdivr_ST0_FT0(cpu_env);
1230         break;
1231     }
1232 }
1233 
1234 /* NOTE the exception in "r" op ordering */
1235 static void gen_helper_fp_arith_STN_ST0(int op, int opreg)
1236 {
1237     TCGv_i32 tmp = tcg_const_i32(opreg);
1238     switch (op) {
1239     case 0:
1240         gen_helper_fadd_STN_ST0(cpu_env, tmp);
1241         break;
1242     case 1:
1243         gen_helper_fmul_STN_ST0(cpu_env, tmp);
1244         break;
1245     case 4:
1246         gen_helper_fsubr_STN_ST0(cpu_env, tmp);
1247         break;
1248     case 5:
1249         gen_helper_fsub_STN_ST0(cpu_env, tmp);
1250         break;
1251     case 6:
1252         gen_helper_fdivr_STN_ST0(cpu_env, tmp);
1253         break;
1254     case 7:
1255         gen_helper_fdiv_STN_ST0(cpu_env, tmp);
1256         break;
1257     }
1258 }
1259 
1260 static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip)
1261 {
1262     gen_update_cc_op(s);
1263     gen_jmp_im(s, cur_eip);
1264     gen_helper_raise_exception(cpu_env, tcg_const_i32(trapno));
1265     s->base.is_jmp = DISAS_NORETURN;
1266 }
1267 
1268 /* Generate #UD for the current instruction.  The assumption here is that
1269    the instruction is known, but it isn't allowed in the current cpu mode.  */
1270 static void gen_illegal_opcode(DisasContext *s)
1271 {
1272     gen_exception(s, EXCP06_ILLOP, s->pc_start - s->cs_base);
1273 }
1274 
1275 /* if d == OR_TMP0, it means memory operand (address in A0) */
1276 static void gen_op(DisasContext *s1, int op, MemOp ot, int d)
1277 {
1278     if (d != OR_TMP0) {
1279         if (s1->prefix & PREFIX_LOCK) {
1280             /* Lock prefix when destination is not memory.  */
1281             gen_illegal_opcode(s1);
1282             return;
1283         }
1284         gen_op_mov_v_reg(s1, ot, s1->T0, d);
1285     } else if (!(s1->prefix & PREFIX_LOCK)) {
1286         gen_op_ld_v(s1, ot, s1->T0, s1->A0);
1287     }
1288     switch(op) {
1289     case OP_ADCL:
1290         gen_compute_eflags_c(s1, s1->tmp4);
1291         if (s1->prefix & PREFIX_LOCK) {
1292             tcg_gen_add_tl(s1->T0, s1->tmp4, s1->T1);
1293             tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
1294                                         s1->mem_index, ot | MO_LE);
1295         } else {
1296             tcg_gen_add_tl(s1->T0, s1->T0, s1->T1);
1297             tcg_gen_add_tl(s1->T0, s1->T0, s1->tmp4);
1298             gen_op_st_rm_T0_A0(s1, ot, d);
1299         }
1300         gen_op_update3_cc(s1, s1->tmp4);
1301         set_cc_op(s1, CC_OP_ADCB + ot);
1302         break;
1303     case OP_SBBL:
1304         gen_compute_eflags_c(s1, s1->tmp4);
1305         if (s1->prefix & PREFIX_LOCK) {
1306             tcg_gen_add_tl(s1->T0, s1->T1, s1->tmp4);
1307             tcg_gen_neg_tl(s1->T0, s1->T0);
1308             tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
1309                                         s1->mem_index, ot | MO_LE);
1310         } else {
1311             tcg_gen_sub_tl(s1->T0, s1->T0, s1->T1);
1312             tcg_gen_sub_tl(s1->T0, s1->T0, s1->tmp4);
1313             gen_op_st_rm_T0_A0(s1, ot, d);
1314         }
1315         gen_op_update3_cc(s1, s1->tmp4);
1316         set_cc_op(s1, CC_OP_SBBB + ot);
1317         break;
1318     case OP_ADDL:
1319         if (s1->prefix & PREFIX_LOCK) {
1320             tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T1,
1321                                         s1->mem_index, ot | MO_LE);
1322         } else {
1323             tcg_gen_add_tl(s1->T0, s1->T0, s1->T1);
1324             gen_op_st_rm_T0_A0(s1, ot, d);
1325         }
1326         gen_op_update2_cc(s1);
1327         set_cc_op(s1, CC_OP_ADDB + ot);
1328         break;
1329     case OP_SUBL:
1330         if (s1->prefix & PREFIX_LOCK) {
1331             tcg_gen_neg_tl(s1->T0, s1->T1);
1332             tcg_gen_atomic_fetch_add_tl(s1->cc_srcT, s1->A0, s1->T0,
1333                                         s1->mem_index, ot | MO_LE);
1334             tcg_gen_sub_tl(s1->T0, s1->cc_srcT, s1->T1);
1335         } else {
1336             tcg_gen_mov_tl(s1->cc_srcT, s1->T0);
1337             tcg_gen_sub_tl(s1->T0, s1->T0, s1->T1);
1338             gen_op_st_rm_T0_A0(s1, ot, d);
1339         }
1340         gen_op_update2_cc(s1);
1341         set_cc_op(s1, CC_OP_SUBB + ot);
1342         break;
1343     default:
1344     case OP_ANDL:
1345         if (s1->prefix & PREFIX_LOCK) {
1346             tcg_gen_atomic_and_fetch_tl(s1->T0, s1->A0, s1->T1,
1347                                         s1->mem_index, ot | MO_LE);
1348         } else {
1349             tcg_gen_and_tl(s1->T0, s1->T0, s1->T1);
1350             gen_op_st_rm_T0_A0(s1, ot, d);
1351         }
1352         gen_op_update1_cc(s1);
1353         set_cc_op(s1, CC_OP_LOGICB + ot);
1354         break;
1355     case OP_ORL:
1356         if (s1->prefix & PREFIX_LOCK) {
1357             tcg_gen_atomic_or_fetch_tl(s1->T0, s1->A0, s1->T1,
1358                                        s1->mem_index, ot | MO_LE);
1359         } else {
1360             tcg_gen_or_tl(s1->T0, s1->T0, s1->T1);
1361             gen_op_st_rm_T0_A0(s1, ot, d);
1362         }
1363         gen_op_update1_cc(s1);
1364         set_cc_op(s1, CC_OP_LOGICB + ot);
1365         break;
1366     case OP_XORL:
1367         if (s1->prefix & PREFIX_LOCK) {
1368             tcg_gen_atomic_xor_fetch_tl(s1->T0, s1->A0, s1->T1,
1369                                         s1->mem_index, ot | MO_LE);
1370         } else {
1371             tcg_gen_xor_tl(s1->T0, s1->T0, s1->T1);
1372             gen_op_st_rm_T0_A0(s1, ot, d);
1373         }
1374         gen_op_update1_cc(s1);
1375         set_cc_op(s1, CC_OP_LOGICB + ot);
1376         break;
1377     case OP_CMPL:
1378         tcg_gen_mov_tl(cpu_cc_src, s1->T1);
1379         tcg_gen_mov_tl(s1->cc_srcT, s1->T0);
1380         tcg_gen_sub_tl(cpu_cc_dst, s1->T0, s1->T1);
1381         set_cc_op(s1, CC_OP_SUBB + ot);
1382         break;
1383     }
1384 }
1385 
1386 /* if d == OR_TMP0, it means memory operand (address in A0) */
1387 static void gen_inc(DisasContext *s1, MemOp ot, int d, int c)
1388 {
1389     if (s1->prefix & PREFIX_LOCK) {
1390         if (d != OR_TMP0) {
1391             /* Lock prefix when destination is not memory */
1392             gen_illegal_opcode(s1);
1393             return;
1394         }
1395         tcg_gen_movi_tl(s1->T0, c > 0 ? 1 : -1);
1396         tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
1397                                     s1->mem_index, ot | MO_LE);
1398     } else {
1399         if (d != OR_TMP0) {
1400             gen_op_mov_v_reg(s1, ot, s1->T0, d);
1401         } else {
1402             gen_op_ld_v(s1, ot, s1->T0, s1->A0);
1403         }
1404         tcg_gen_addi_tl(s1->T0, s1->T0, (c > 0 ? 1 : -1));
1405         gen_op_st_rm_T0_A0(s1, ot, d);
1406     }
1407 
1408     gen_compute_eflags_c(s1, cpu_cc_src);
1409     tcg_gen_mov_tl(cpu_cc_dst, s1->T0);
1410     set_cc_op(s1, (c > 0 ? CC_OP_INCB : CC_OP_DECB) + ot);
1411 }
1412 
1413 static void gen_shift_flags(DisasContext *s, MemOp ot, TCGv result,
1414                             TCGv shm1, TCGv count, bool is_right)
1415 {
1416     TCGv_i32 z32, s32, oldop;
1417     TCGv z_tl;
1418 
1419     /* Store the results into the CC variables.  If we know that the
1420        variable must be dead, store unconditionally.  Otherwise we'll
1421        need to not disrupt the current contents.  */
1422     z_tl = tcg_const_tl(0);
1423     if (cc_op_live[s->cc_op] & USES_CC_DST) {
1424         tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_dst, count, z_tl,
1425                            result, cpu_cc_dst);
1426     } else {
1427         tcg_gen_mov_tl(cpu_cc_dst, result);
1428     }
1429     if (cc_op_live[s->cc_op] & USES_CC_SRC) {
1430         tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_src, count, z_tl,
1431                            shm1, cpu_cc_src);
1432     } else {
1433         tcg_gen_mov_tl(cpu_cc_src, shm1);
1434     }
1435     tcg_temp_free(z_tl);
1436 
1437     /* Get the two potential CC_OP values into temporaries.  */
1438     tcg_gen_movi_i32(s->tmp2_i32, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
1439     if (s->cc_op == CC_OP_DYNAMIC) {
1440         oldop = cpu_cc_op;
1441     } else {
1442         tcg_gen_movi_i32(s->tmp3_i32, s->cc_op);
1443         oldop = s->tmp3_i32;
1444     }
1445 
1446     /* Conditionally store the CC_OP value.  */
1447     z32 = tcg_const_i32(0);
1448     s32 = tcg_temp_new_i32();
1449     tcg_gen_trunc_tl_i32(s32, count);
1450     tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, s32, z32, s->tmp2_i32, oldop);
1451     tcg_temp_free_i32(z32);
1452     tcg_temp_free_i32(s32);
1453 
1454     /* The CC_OP value is no longer predictable.  */
1455     set_cc_op(s, CC_OP_DYNAMIC);
1456 }
1457 
1458 static void gen_shift_rm_T1(DisasContext *s, MemOp ot, int op1,
1459                             int is_right, int is_arith)
1460 {
1461     target_ulong mask = (ot == MO_64 ? 0x3f : 0x1f);
1462 
1463     /* load */
1464     if (op1 == OR_TMP0) {
1465         gen_op_ld_v(s, ot, s->T0, s->A0);
1466     } else {
1467         gen_op_mov_v_reg(s, ot, s->T0, op1);
1468     }
1469 
1470     tcg_gen_andi_tl(s->T1, s->T1, mask);
1471     tcg_gen_subi_tl(s->tmp0, s->T1, 1);
1472 
1473     if (is_right) {
1474         if (is_arith) {
1475             gen_exts(ot, s->T0);
1476             tcg_gen_sar_tl(s->tmp0, s->T0, s->tmp0);
1477             tcg_gen_sar_tl(s->T0, s->T0, s->T1);
1478         } else {
1479             gen_extu(ot, s->T0);
1480             tcg_gen_shr_tl(s->tmp0, s->T0, s->tmp0);
1481             tcg_gen_shr_tl(s->T0, s->T0, s->T1);
1482         }
1483     } else {
1484         tcg_gen_shl_tl(s->tmp0, s->T0, s->tmp0);
1485         tcg_gen_shl_tl(s->T0, s->T0, s->T1);
1486     }
1487 
1488     /* store */
1489     gen_op_st_rm_T0_A0(s, ot, op1);
1490 
1491     gen_shift_flags(s, ot, s->T0, s->tmp0, s->T1, is_right);
1492 }
1493 
1494 static void gen_shift_rm_im(DisasContext *s, MemOp ot, int op1, int op2,
1495                             int is_right, int is_arith)
1496 {
1497     int mask = (ot == MO_64 ? 0x3f : 0x1f);
1498 
1499     /* load */
1500     if (op1 == OR_TMP0)
1501         gen_op_ld_v(s, ot, s->T0, s->A0);
1502     else
1503         gen_op_mov_v_reg(s, ot, s->T0, op1);
1504 
1505     op2 &= mask;
1506     if (op2 != 0) {
1507         if (is_right) {
1508             if (is_arith) {
1509                 gen_exts(ot, s->T0);
1510                 tcg_gen_sari_tl(s->tmp4, s->T0, op2 - 1);
1511                 tcg_gen_sari_tl(s->T0, s->T0, op2);
1512             } else {
1513                 gen_extu(ot, s->T0);
1514                 tcg_gen_shri_tl(s->tmp4, s->T0, op2 - 1);
1515                 tcg_gen_shri_tl(s->T0, s->T0, op2);
1516             }
1517         } else {
1518             tcg_gen_shli_tl(s->tmp4, s->T0, op2 - 1);
1519             tcg_gen_shli_tl(s->T0, s->T0, op2);
1520         }
1521     }
1522 
1523     /* store */
1524     gen_op_st_rm_T0_A0(s, ot, op1);
1525 
1526     /* update eflags if non zero shift */
1527     if (op2 != 0) {
1528         tcg_gen_mov_tl(cpu_cc_src, s->tmp4);
1529         tcg_gen_mov_tl(cpu_cc_dst, s->T0);
1530         set_cc_op(s, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
1531     }
1532 }
1533 
1534 static void gen_rot_rm_T1(DisasContext *s, MemOp ot, int op1, int is_right)
1535 {
1536     target_ulong mask = (ot == MO_64 ? 0x3f : 0x1f);
1537     TCGv_i32 t0, t1;
1538 
1539     /* load */
1540     if (op1 == OR_TMP0) {
1541         gen_op_ld_v(s, ot, s->T0, s->A0);
1542     } else {
1543         gen_op_mov_v_reg(s, ot, s->T0, op1);
1544     }
1545 
1546     tcg_gen_andi_tl(s->T1, s->T1, mask);
1547 
1548     switch (ot) {
1549     case MO_8:
1550         /* Replicate the 8-bit input so that a 32-bit rotate works.  */
1551         tcg_gen_ext8u_tl(s->T0, s->T0);
1552         tcg_gen_muli_tl(s->T0, s->T0, 0x01010101);
1553         goto do_long;
1554     case MO_16:
1555         /* Replicate the 16-bit input so that a 32-bit rotate works.  */
1556         tcg_gen_deposit_tl(s->T0, s->T0, s->T0, 16, 16);
1557         goto do_long;
1558     do_long:
1559 #ifdef TARGET_X86_64
1560     case MO_32:
1561         tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
1562         tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
1563         if (is_right) {
1564             tcg_gen_rotr_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
1565         } else {
1566             tcg_gen_rotl_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
1567         }
1568         tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
1569         break;
1570 #endif
1571     default:
1572         if (is_right) {
1573             tcg_gen_rotr_tl(s->T0, s->T0, s->T1);
1574         } else {
1575             tcg_gen_rotl_tl(s->T0, s->T0, s->T1);
1576         }
1577         break;
1578     }
1579 
1580     /* store */
1581     gen_op_st_rm_T0_A0(s, ot, op1);
1582 
1583     /* We'll need the flags computed into CC_SRC.  */
1584     gen_compute_eflags(s);
1585 
1586     /* The value that was "rotated out" is now present at the other end
1587        of the word.  Compute C into CC_DST and O into CC_SRC2.  Note that
1588        since we've computed the flags into CC_SRC, these variables are
1589        currently dead.  */
1590     if (is_right) {
1591         tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask - 1);
1592         tcg_gen_shri_tl(cpu_cc_dst, s->T0, mask);
1593         tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
1594     } else {
1595         tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask);
1596         tcg_gen_andi_tl(cpu_cc_dst, s->T0, 1);
1597     }
1598     tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
1599     tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
1600 
1601     /* Now conditionally store the new CC_OP value.  If the shift count
1602        is 0 we keep the CC_OP_EFLAGS setting so that only CC_SRC is live.
1603        Otherwise reuse CC_OP_ADCOX which have the C and O flags split out
1604        exactly as we computed above.  */
1605     t0 = tcg_const_i32(0);
1606     t1 = tcg_temp_new_i32();
1607     tcg_gen_trunc_tl_i32(t1, s->T1);
1608     tcg_gen_movi_i32(s->tmp2_i32, CC_OP_ADCOX);
1609     tcg_gen_movi_i32(s->tmp3_i32, CC_OP_EFLAGS);
1610     tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, t1, t0,
1611                         s->tmp2_i32, s->tmp3_i32);
1612     tcg_temp_free_i32(t0);
1613     tcg_temp_free_i32(t1);
1614 
1615     /* The CC_OP value is no longer predictable.  */
1616     set_cc_op(s, CC_OP_DYNAMIC);
1617 }
1618 
1619 static void gen_rot_rm_im(DisasContext *s, MemOp ot, int op1, int op2,
1620                           int is_right)
1621 {
1622     int mask = (ot == MO_64 ? 0x3f : 0x1f);
1623     int shift;
1624 
1625     /* load */
1626     if (op1 == OR_TMP0) {
1627         gen_op_ld_v(s, ot, s->T0, s->A0);
1628     } else {
1629         gen_op_mov_v_reg(s, ot, s->T0, op1);
1630     }
1631 
1632     op2 &= mask;
1633     if (op2 != 0) {
1634         switch (ot) {
1635 #ifdef TARGET_X86_64
1636         case MO_32:
1637             tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
1638             if (is_right) {
1639                 tcg_gen_rotri_i32(s->tmp2_i32, s->tmp2_i32, op2);
1640             } else {
1641                 tcg_gen_rotli_i32(s->tmp2_i32, s->tmp2_i32, op2);
1642             }
1643             tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
1644             break;
1645 #endif
1646         default:
1647             if (is_right) {
1648                 tcg_gen_rotri_tl(s->T0, s->T0, op2);
1649             } else {
1650                 tcg_gen_rotli_tl(s->T0, s->T0, op2);
1651             }
1652             break;
1653         case MO_8:
1654             mask = 7;
1655             goto do_shifts;
1656         case MO_16:
1657             mask = 15;
1658         do_shifts:
1659             shift = op2 & mask;
1660             if (is_right) {
1661                 shift = mask + 1 - shift;
1662             }
1663             gen_extu(ot, s->T0);
1664             tcg_gen_shli_tl(s->tmp0, s->T0, shift);
1665             tcg_gen_shri_tl(s->T0, s->T0, mask + 1 - shift);
1666             tcg_gen_or_tl(s->T0, s->T0, s->tmp0);
1667             break;
1668         }
1669     }
1670 
1671     /* store */
1672     gen_op_st_rm_T0_A0(s, ot, op1);
1673 
1674     if (op2 != 0) {
1675         /* Compute the flags into CC_SRC.  */
1676         gen_compute_eflags(s);
1677 
1678         /* The value that was "rotated out" is now present at the other end
1679            of the word.  Compute C into CC_DST and O into CC_SRC2.  Note that
1680            since we've computed the flags into CC_SRC, these variables are
1681            currently dead.  */
1682         if (is_right) {
1683             tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask - 1);
1684             tcg_gen_shri_tl(cpu_cc_dst, s->T0, mask);
1685             tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
1686         } else {
1687             tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask);
1688             tcg_gen_andi_tl(cpu_cc_dst, s->T0, 1);
1689         }
1690         tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
1691         tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
1692         set_cc_op(s, CC_OP_ADCOX);
1693     }
1694 }
1695 
1696 /* XXX: add faster immediate = 1 case */
1697 static void gen_rotc_rm_T1(DisasContext *s, MemOp ot, int op1,
1698                            int is_right)
1699 {
1700     gen_compute_eflags(s);
1701     assert(s->cc_op == CC_OP_EFLAGS);
1702 
1703     /* load */
1704     if (op1 == OR_TMP0)
1705         gen_op_ld_v(s, ot, s->T0, s->A0);
1706     else
1707         gen_op_mov_v_reg(s, ot, s->T0, op1);
1708 
1709     if (is_right) {
1710         switch (ot) {
1711         case MO_8:
1712             gen_helper_rcrb(s->T0, cpu_env, s->T0, s->T1);
1713             break;
1714         case MO_16:
1715             gen_helper_rcrw(s->T0, cpu_env, s->T0, s->T1);
1716             break;
1717         case MO_32:
1718             gen_helper_rcrl(s->T0, cpu_env, s->T0, s->T1);
1719             break;
1720 #ifdef TARGET_X86_64
1721         case MO_64:
1722             gen_helper_rcrq(s->T0, cpu_env, s->T0, s->T1);
1723             break;
1724 #endif
1725         default:
1726             tcg_abort();
1727         }
1728     } else {
1729         switch (ot) {
1730         case MO_8:
1731             gen_helper_rclb(s->T0, cpu_env, s->T0, s->T1);
1732             break;
1733         case MO_16:
1734             gen_helper_rclw(s->T0, cpu_env, s->T0, s->T1);
1735             break;
1736         case MO_32:
1737             gen_helper_rcll(s->T0, cpu_env, s->T0, s->T1);
1738             break;
1739 #ifdef TARGET_X86_64
1740         case MO_64:
1741             gen_helper_rclq(s->T0, cpu_env, s->T0, s->T1);
1742             break;
1743 #endif
1744         default:
1745             tcg_abort();
1746         }
1747     }
1748     /* store */
1749     gen_op_st_rm_T0_A0(s, ot, op1);
1750 }
1751 
1752 /* XXX: add faster immediate case */
1753 static void gen_shiftd_rm_T1(DisasContext *s, MemOp ot, int op1,
1754                              bool is_right, TCGv count_in)
1755 {
1756     target_ulong mask = (ot == MO_64 ? 63 : 31);
1757     TCGv count;
1758 
1759     /* load */
1760     if (op1 == OR_TMP0) {
1761         gen_op_ld_v(s, ot, s->T0, s->A0);
1762     } else {
1763         gen_op_mov_v_reg(s, ot, s->T0, op1);
1764     }
1765 
1766     count = tcg_temp_new();
1767     tcg_gen_andi_tl(count, count_in, mask);
1768 
1769     switch (ot) {
1770     case MO_16:
1771         /* Note: we implement the Intel behaviour for shift count > 16.
1772            This means "shrdw C, B, A" shifts A:B:A >> C.  Build the B:A
1773            portion by constructing it as a 32-bit value.  */
1774         if (is_right) {
1775             tcg_gen_deposit_tl(s->tmp0, s->T0, s->T1, 16, 16);
1776             tcg_gen_mov_tl(s->T1, s->T0);
1777             tcg_gen_mov_tl(s->T0, s->tmp0);
1778         } else {
1779             tcg_gen_deposit_tl(s->T1, s->T0, s->T1, 16, 16);
1780         }
1781         /* FALLTHRU */
1782 #ifdef TARGET_X86_64
1783     case MO_32:
1784         /* Concatenate the two 32-bit values and use a 64-bit shift.  */
1785         tcg_gen_subi_tl(s->tmp0, count, 1);
1786         if (is_right) {
1787             tcg_gen_concat_tl_i64(s->T0, s->T0, s->T1);
1788             tcg_gen_shr_i64(s->tmp0, s->T0, s->tmp0);
1789             tcg_gen_shr_i64(s->T0, s->T0, count);
1790         } else {
1791             tcg_gen_concat_tl_i64(s->T0, s->T1, s->T0);
1792             tcg_gen_shl_i64(s->tmp0, s->T0, s->tmp0);
1793             tcg_gen_shl_i64(s->T0, s->T0, count);
1794             tcg_gen_shri_i64(s->tmp0, s->tmp0, 32);
1795             tcg_gen_shri_i64(s->T0, s->T0, 32);
1796         }
1797         break;
1798 #endif
1799     default:
1800         tcg_gen_subi_tl(s->tmp0, count, 1);
1801         if (is_right) {
1802             tcg_gen_shr_tl(s->tmp0, s->T0, s->tmp0);
1803 
1804             tcg_gen_subfi_tl(s->tmp4, mask + 1, count);
1805             tcg_gen_shr_tl(s->T0, s->T0, count);
1806             tcg_gen_shl_tl(s->T1, s->T1, s->tmp4);
1807         } else {
1808             tcg_gen_shl_tl(s->tmp0, s->T0, s->tmp0);
1809             if (ot == MO_16) {
1810                 /* Only needed if count > 16, for Intel behaviour.  */
1811                 tcg_gen_subfi_tl(s->tmp4, 33, count);
1812                 tcg_gen_shr_tl(s->tmp4, s->T1, s->tmp4);
1813                 tcg_gen_or_tl(s->tmp0, s->tmp0, s->tmp4);
1814             }
1815 
1816             tcg_gen_subfi_tl(s->tmp4, mask + 1, count);
1817             tcg_gen_shl_tl(s->T0, s->T0, count);
1818             tcg_gen_shr_tl(s->T1, s->T1, s->tmp4);
1819         }
1820         tcg_gen_movi_tl(s->tmp4, 0);
1821         tcg_gen_movcond_tl(TCG_COND_EQ, s->T1, count, s->tmp4,
1822                            s->tmp4, s->T1);
1823         tcg_gen_or_tl(s->T0, s->T0, s->T1);
1824         break;
1825     }
1826 
1827     /* store */
1828     gen_op_st_rm_T0_A0(s, ot, op1);
1829 
1830     gen_shift_flags(s, ot, s->T0, s->tmp0, count, is_right);
1831     tcg_temp_free(count);
1832 }
1833 
1834 static void gen_shift(DisasContext *s1, int op, MemOp ot, int d, int s)
1835 {
1836     if (s != OR_TMP1)
1837         gen_op_mov_v_reg(s1, ot, s1->T1, s);
1838     switch(op) {
1839     case OP_ROL:
1840         gen_rot_rm_T1(s1, ot, d, 0);
1841         break;
1842     case OP_ROR:
1843         gen_rot_rm_T1(s1, ot, d, 1);
1844         break;
1845     case OP_SHL:
1846     case OP_SHL1:
1847         gen_shift_rm_T1(s1, ot, d, 0, 0);
1848         break;
1849     case OP_SHR:
1850         gen_shift_rm_T1(s1, ot, d, 1, 0);
1851         break;
1852     case OP_SAR:
1853         gen_shift_rm_T1(s1, ot, d, 1, 1);
1854         break;
1855     case OP_RCL:
1856         gen_rotc_rm_T1(s1, ot, d, 0);
1857         break;
1858     case OP_RCR:
1859         gen_rotc_rm_T1(s1, ot, d, 1);
1860         break;
1861     }
1862 }
1863 
1864 static void gen_shifti(DisasContext *s1, int op, MemOp ot, int d, int c)
1865 {
1866     switch(op) {
1867     case OP_ROL:
1868         gen_rot_rm_im(s1, ot, d, c, 0);
1869         break;
1870     case OP_ROR:
1871         gen_rot_rm_im(s1, ot, d, c, 1);
1872         break;
1873     case OP_SHL:
1874     case OP_SHL1:
1875         gen_shift_rm_im(s1, ot, d, c, 0, 0);
1876         break;
1877     case OP_SHR:
1878         gen_shift_rm_im(s1, ot, d, c, 1, 0);
1879         break;
1880     case OP_SAR:
1881         gen_shift_rm_im(s1, ot, d, c, 1, 1);
1882         break;
1883     default:
1884         /* currently not optimized */
1885         tcg_gen_movi_tl(s1->T1, c);
1886         gen_shift(s1, op, ot, d, OR_TMP1);
1887         break;
1888     }
1889 }
1890 
1891 #define X86_MAX_INSN_LENGTH 15
1892 
1893 static uint64_t advance_pc(CPUX86State *env, DisasContext *s, int num_bytes)
1894 {
1895     uint64_t pc = s->pc;
1896 
1897     s->pc += num_bytes;
1898     if (unlikely(s->pc - s->pc_start > X86_MAX_INSN_LENGTH)) {
1899         /* If the instruction's 16th byte is on a different page than the 1st, a
1900          * page fault on the second page wins over the general protection fault
1901          * caused by the instruction being too long.
1902          * This can happen even if the operand is only one byte long!
1903          */
1904         if (((s->pc - 1) ^ (pc - 1)) & TARGET_PAGE_MASK) {
1905             volatile uint8_t unused =
1906                 cpu_ldub_code(env, (s->pc - 1) & TARGET_PAGE_MASK);
1907             (void) unused;
1908         }
1909         siglongjmp(s->jmpbuf, 1);
1910     }
1911 
1912     return pc;
1913 }
1914 
1915 static inline uint8_t x86_ldub_code(CPUX86State *env, DisasContext *s)
1916 {
1917     return translator_ldub(env, advance_pc(env, s, 1));
1918 }
1919 
1920 static inline int16_t x86_ldsw_code(CPUX86State *env, DisasContext *s)
1921 {
1922     return translator_ldsw(env, advance_pc(env, s, 2));
1923 }
1924 
1925 static inline uint16_t x86_lduw_code(CPUX86State *env, DisasContext *s)
1926 {
1927     return translator_lduw(env, advance_pc(env, s, 2));
1928 }
1929 
1930 static inline uint32_t x86_ldl_code(CPUX86State *env, DisasContext *s)
1931 {
1932     return translator_ldl(env, advance_pc(env, s, 4));
1933 }
1934 
1935 #ifdef TARGET_X86_64
1936 static inline uint64_t x86_ldq_code(CPUX86State *env, DisasContext *s)
1937 {
1938     return translator_ldq(env, advance_pc(env, s, 8));
1939 }
1940 #endif
1941 
1942 /* Decompose an address.  */
1943 
1944 typedef struct AddressParts {
1945     int def_seg;
1946     int base;
1947     int index;
1948     int scale;
1949     target_long disp;
1950 } AddressParts;
1951 
1952 static AddressParts gen_lea_modrm_0(CPUX86State *env, DisasContext *s,
1953                                     int modrm)
1954 {
1955     int def_seg, base, index, scale, mod, rm;
1956     target_long disp;
1957     bool havesib;
1958 
1959     def_seg = R_DS;
1960     index = -1;
1961     scale = 0;
1962     disp = 0;
1963 
1964     mod = (modrm >> 6) & 3;
1965     rm = modrm & 7;
1966     base = rm | REX_B(s);
1967 
1968     if (mod == 3) {
1969         /* Normally filtered out earlier, but including this path
1970            simplifies multi-byte nop, as well as bndcl, bndcu, bndcn.  */
1971         goto done;
1972     }
1973 
1974     switch (s->aflag) {
1975     case MO_64:
1976     case MO_32:
1977         havesib = 0;
1978         if (rm == 4) {
1979             int code = x86_ldub_code(env, s);
1980             scale = (code >> 6) & 3;
1981             index = ((code >> 3) & 7) | REX_X(s);
1982             if (index == 4) {
1983                 index = -1;  /* no index */
1984             }
1985             base = (code & 7) | REX_B(s);
1986             havesib = 1;
1987         }
1988 
1989         switch (mod) {
1990         case 0:
1991             if ((base & 7) == 5) {
1992                 base = -1;
1993                 disp = (int32_t)x86_ldl_code(env, s);
1994                 if (CODE64(s) && !havesib) {
1995                     base = -2;
1996                     disp += s->pc + s->rip_offset;
1997                 }
1998             }
1999             break;
2000         case 1:
2001             disp = (int8_t)x86_ldub_code(env, s);
2002             break;
2003         default:
2004         case 2:
2005             disp = (int32_t)x86_ldl_code(env, s);
2006             break;
2007         }
2008 
2009         /* For correct popl handling with esp.  */
2010         if (base == R_ESP && s->popl_esp_hack) {
2011             disp += s->popl_esp_hack;
2012         }
2013         if (base == R_EBP || base == R_ESP) {
2014             def_seg = R_SS;
2015         }
2016         break;
2017 
2018     case MO_16:
2019         if (mod == 0) {
2020             if (rm == 6) {
2021                 base = -1;
2022                 disp = x86_lduw_code(env, s);
2023                 break;
2024             }
2025         } else if (mod == 1) {
2026             disp = (int8_t)x86_ldub_code(env, s);
2027         } else {
2028             disp = (int16_t)x86_lduw_code(env, s);
2029         }
2030 
2031         switch (rm) {
2032         case 0:
2033             base = R_EBX;
2034             index = R_ESI;
2035             break;
2036         case 1:
2037             base = R_EBX;
2038             index = R_EDI;
2039             break;
2040         case 2:
2041             base = R_EBP;
2042             index = R_ESI;
2043             def_seg = R_SS;
2044             break;
2045         case 3:
2046             base = R_EBP;
2047             index = R_EDI;
2048             def_seg = R_SS;
2049             break;
2050         case 4:
2051             base = R_ESI;
2052             break;
2053         case 5:
2054             base = R_EDI;
2055             break;
2056         case 6:
2057             base = R_EBP;
2058             def_seg = R_SS;
2059             break;
2060         default:
2061         case 7:
2062             base = R_EBX;
2063             break;
2064         }
2065         break;
2066 
2067     default:
2068         tcg_abort();
2069     }
2070 
2071  done:
2072     return (AddressParts){ def_seg, base, index, scale, disp };
2073 }
2074 
2075 /* Compute the address, with a minimum number of TCG ops.  */
2076 static TCGv gen_lea_modrm_1(DisasContext *s, AddressParts a)
2077 {
2078     TCGv ea = NULL;
2079 
2080     if (a.index >= 0) {
2081         if (a.scale == 0) {
2082             ea = cpu_regs[a.index];
2083         } else {
2084             tcg_gen_shli_tl(s->A0, cpu_regs[a.index], a.scale);
2085             ea = s->A0;
2086         }
2087         if (a.base >= 0) {
2088             tcg_gen_add_tl(s->A0, ea, cpu_regs[a.base]);
2089             ea = s->A0;
2090         }
2091     } else if (a.base >= 0) {
2092         ea = cpu_regs[a.base];
2093     }
2094     if (!ea) {
2095         tcg_gen_movi_tl(s->A0, a.disp);
2096         ea = s->A0;
2097     } else if (a.disp != 0) {
2098         tcg_gen_addi_tl(s->A0, ea, a.disp);
2099         ea = s->A0;
2100     }
2101 
2102     return ea;
2103 }
2104 
2105 static void gen_lea_modrm(CPUX86State *env, DisasContext *s, int modrm)
2106 {
2107     AddressParts a = gen_lea_modrm_0(env, s, modrm);
2108     TCGv ea = gen_lea_modrm_1(s, a);
2109     gen_lea_v_seg(s, s->aflag, ea, a.def_seg, s->override);
2110 }
2111 
2112 static void gen_nop_modrm(CPUX86State *env, DisasContext *s, int modrm)
2113 {
2114     (void)gen_lea_modrm_0(env, s, modrm);
2115 }
2116 
2117 /* Used for BNDCL, BNDCU, BNDCN.  */
2118 static void gen_bndck(CPUX86State *env, DisasContext *s, int modrm,
2119                       TCGCond cond, TCGv_i64 bndv)
2120 {
2121     TCGv ea = gen_lea_modrm_1(s, gen_lea_modrm_0(env, s, modrm));
2122 
2123     tcg_gen_extu_tl_i64(s->tmp1_i64, ea);
2124     if (!CODE64(s)) {
2125         tcg_gen_ext32u_i64(s->tmp1_i64, s->tmp1_i64);
2126     }
2127     tcg_gen_setcond_i64(cond, s->tmp1_i64, s->tmp1_i64, bndv);
2128     tcg_gen_extrl_i64_i32(s->tmp2_i32, s->tmp1_i64);
2129     gen_helper_bndck(cpu_env, s->tmp2_i32);
2130 }
2131 
2132 /* used for LEA and MOV AX, mem */
2133 static void gen_add_A0_ds_seg(DisasContext *s)
2134 {
2135     gen_lea_v_seg(s, s->aflag, s->A0, R_DS, s->override);
2136 }
2137 
2138 /* generate modrm memory load or store of 'reg'. TMP0 is used if reg ==
2139    OR_TMP0 */
2140 static void gen_ldst_modrm(CPUX86State *env, DisasContext *s, int modrm,
2141                            MemOp ot, int reg, int is_store)
2142 {
2143     int mod, rm;
2144 
2145     mod = (modrm >> 6) & 3;
2146     rm = (modrm & 7) | REX_B(s);
2147     if (mod == 3) {
2148         if (is_store) {
2149             if (reg != OR_TMP0)
2150                 gen_op_mov_v_reg(s, ot, s->T0, reg);
2151             gen_op_mov_reg_v(s, ot, rm, s->T0);
2152         } else {
2153             gen_op_mov_v_reg(s, ot, s->T0, rm);
2154             if (reg != OR_TMP0)
2155                 gen_op_mov_reg_v(s, ot, reg, s->T0);
2156         }
2157     } else {
2158         gen_lea_modrm(env, s, modrm);
2159         if (is_store) {
2160             if (reg != OR_TMP0)
2161                 gen_op_mov_v_reg(s, ot, s->T0, reg);
2162             gen_op_st_v(s, ot, s->T0, s->A0);
2163         } else {
2164             gen_op_ld_v(s, ot, s->T0, s->A0);
2165             if (reg != OR_TMP0)
2166                 gen_op_mov_reg_v(s, ot, reg, s->T0);
2167         }
2168     }
2169 }
2170 
2171 static inline uint32_t insn_get(CPUX86State *env, DisasContext *s, MemOp ot)
2172 {
2173     uint32_t ret;
2174 
2175     switch (ot) {
2176     case MO_8:
2177         ret = x86_ldub_code(env, s);
2178         break;
2179     case MO_16:
2180         ret = x86_lduw_code(env, s);
2181         break;
2182     case MO_32:
2183 #ifdef TARGET_X86_64
2184     case MO_64:
2185 #endif
2186         ret = x86_ldl_code(env, s);
2187         break;
2188     default:
2189         tcg_abort();
2190     }
2191     return ret;
2192 }
2193 
2194 static inline int insn_const_size(MemOp ot)
2195 {
2196     if (ot <= MO_32) {
2197         return 1 << ot;
2198     } else {
2199         return 4;
2200     }
2201 }
2202 
2203 static inline bool use_goto_tb(DisasContext *s, target_ulong pc)
2204 {
2205 #ifndef CONFIG_USER_ONLY
2206     return (pc & TARGET_PAGE_MASK) == (s->base.tb->pc & TARGET_PAGE_MASK) ||
2207            (pc & TARGET_PAGE_MASK) == (s->pc_start & TARGET_PAGE_MASK);
2208 #else
2209     return true;
2210 #endif
2211 }
2212 
2213 static inline void gen_goto_tb(DisasContext *s, int tb_num, target_ulong eip)
2214 {
2215     target_ulong pc = s->cs_base + eip;
2216 
2217     if (use_goto_tb(s, pc))  {
2218         /* jump to same page: we can use a direct jump */
2219         tcg_gen_goto_tb(tb_num);
2220         gen_jmp_im(s, eip);
2221         tcg_gen_exit_tb(s->base.tb, tb_num);
2222         s->base.is_jmp = DISAS_NORETURN;
2223     } else {
2224         /* jump to another page */
2225         gen_jmp_im(s, eip);
2226         gen_jr(s, s->tmp0);
2227     }
2228 }
2229 
2230 static inline void gen_jcc(DisasContext *s, int b,
2231                            target_ulong val, target_ulong next_eip)
2232 {
2233     TCGLabel *l1, *l2;
2234 
2235     if (s->jmp_opt) {
2236         l1 = gen_new_label();
2237         gen_jcc1(s, b, l1);
2238 
2239         gen_goto_tb(s, 0, next_eip);
2240 
2241         gen_set_label(l1);
2242         gen_goto_tb(s, 1, val);
2243     } else {
2244         l1 = gen_new_label();
2245         l2 = gen_new_label();
2246         gen_jcc1(s, b, l1);
2247 
2248         gen_jmp_im(s, next_eip);
2249         tcg_gen_br(l2);
2250 
2251         gen_set_label(l1);
2252         gen_jmp_im(s, val);
2253         gen_set_label(l2);
2254         gen_eob(s);
2255     }
2256 }
2257 
2258 static void gen_cmovcc1(CPUX86State *env, DisasContext *s, MemOp ot, int b,
2259                         int modrm, int reg)
2260 {
2261     CCPrepare cc;
2262 
2263     gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
2264 
2265     cc = gen_prepare_cc(s, b, s->T1);
2266     if (cc.mask != -1) {
2267         TCGv t0 = tcg_temp_new();
2268         tcg_gen_andi_tl(t0, cc.reg, cc.mask);
2269         cc.reg = t0;
2270     }
2271     if (!cc.use_reg2) {
2272         cc.reg2 = tcg_const_tl(cc.imm);
2273     }
2274 
2275     tcg_gen_movcond_tl(cc.cond, s->T0, cc.reg, cc.reg2,
2276                        s->T0, cpu_regs[reg]);
2277     gen_op_mov_reg_v(s, ot, reg, s->T0);
2278 
2279     if (cc.mask != -1) {
2280         tcg_temp_free(cc.reg);
2281     }
2282     if (!cc.use_reg2) {
2283         tcg_temp_free(cc.reg2);
2284     }
2285 }
2286 
2287 static inline void gen_op_movl_T0_seg(DisasContext *s, int seg_reg)
2288 {
2289     tcg_gen_ld32u_tl(s->T0, cpu_env,
2290                      offsetof(CPUX86State,segs[seg_reg].selector));
2291 }
2292 
2293 static inline void gen_op_movl_seg_T0_vm(DisasContext *s, int seg_reg)
2294 {
2295     tcg_gen_ext16u_tl(s->T0, s->T0);
2296     tcg_gen_st32_tl(s->T0, cpu_env,
2297                     offsetof(CPUX86State,segs[seg_reg].selector));
2298     tcg_gen_shli_tl(cpu_seg_base[seg_reg], s->T0, 4);
2299 }
2300 
2301 /* move T0 to seg_reg and compute if the CPU state may change. Never
2302    call this function with seg_reg == R_CS */
2303 static void gen_movl_seg_T0(DisasContext *s, int seg_reg)
2304 {
2305     if (s->pe && !s->vm86) {
2306         tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
2307         gen_helper_load_seg(cpu_env, tcg_const_i32(seg_reg), s->tmp2_i32);
2308         /* abort translation because the addseg value may change or
2309            because ss32 may change. For R_SS, translation must always
2310            stop as a special handling must be done to disable hardware
2311            interrupts for the next instruction */
2312         if (seg_reg == R_SS || (s->code32 && seg_reg < R_FS)) {
2313             s->base.is_jmp = DISAS_TOO_MANY;
2314         }
2315     } else {
2316         gen_op_movl_seg_T0_vm(s, seg_reg);
2317         if (seg_reg == R_SS) {
2318             s->base.is_jmp = DISAS_TOO_MANY;
2319         }
2320     }
2321 }
2322 
2323 static inline int svm_is_rep(int prefixes)
2324 {
2325     return ((prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) ? 8 : 0);
2326 }
2327 
2328 static inline void
2329 gen_svm_check_intercept_param(DisasContext *s, target_ulong pc_start,
2330                               uint32_t type, uint64_t param)
2331 {
2332     /* no SVM activated; fast case */
2333     if (likely(!(s->flags & HF_GUEST_MASK)))
2334         return;
2335     gen_update_cc_op(s);
2336     gen_jmp_im(s, pc_start - s->cs_base);
2337     gen_helper_svm_check_intercept_param(cpu_env, tcg_const_i32(type),
2338                                          tcg_const_i64(param));
2339 }
2340 
2341 static inline void
2342 gen_svm_check_intercept(DisasContext *s, target_ulong pc_start, uint64_t type)
2343 {
2344     gen_svm_check_intercept_param(s, pc_start, type, 0);
2345 }
2346 
2347 static inline void gen_stack_update(DisasContext *s, int addend)
2348 {
2349     gen_op_add_reg_im(s, mo_stacksize(s), R_ESP, addend);
2350 }
2351 
2352 /* Generate a push. It depends on ss32, addseg and dflag.  */
2353 static void gen_push_v(DisasContext *s, TCGv val)
2354 {
2355     MemOp d_ot = mo_pushpop(s, s->dflag);
2356     MemOp a_ot = mo_stacksize(s);
2357     int size = 1 << d_ot;
2358     TCGv new_esp = s->A0;
2359 
2360     tcg_gen_subi_tl(s->A0, cpu_regs[R_ESP], size);
2361 
2362     if (!CODE64(s)) {
2363         if (s->addseg) {
2364             new_esp = s->tmp4;
2365             tcg_gen_mov_tl(new_esp, s->A0);
2366         }
2367         gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2368     }
2369 
2370     gen_op_st_v(s, d_ot, val, s->A0);
2371     gen_op_mov_reg_v(s, a_ot, R_ESP, new_esp);
2372 }
2373 
2374 /* two step pop is necessary for precise exceptions */
2375 static MemOp gen_pop_T0(DisasContext *s)
2376 {
2377     MemOp d_ot = mo_pushpop(s, s->dflag);
2378 
2379     gen_lea_v_seg(s, mo_stacksize(s), cpu_regs[R_ESP], R_SS, -1);
2380     gen_op_ld_v(s, d_ot, s->T0, s->A0);
2381 
2382     return d_ot;
2383 }
2384 
2385 static inline void gen_pop_update(DisasContext *s, MemOp ot)
2386 {
2387     gen_stack_update(s, 1 << ot);
2388 }
2389 
2390 static inline void gen_stack_A0(DisasContext *s)
2391 {
2392     gen_lea_v_seg(s, s->ss32 ? MO_32 : MO_16, cpu_regs[R_ESP], R_SS, -1);
2393 }
2394 
2395 static void gen_pusha(DisasContext *s)
2396 {
2397     MemOp s_ot = s->ss32 ? MO_32 : MO_16;
2398     MemOp d_ot = s->dflag;
2399     int size = 1 << d_ot;
2400     int i;
2401 
2402     for (i = 0; i < 8; i++) {
2403         tcg_gen_addi_tl(s->A0, cpu_regs[R_ESP], (i - 8) * size);
2404         gen_lea_v_seg(s, s_ot, s->A0, R_SS, -1);
2405         gen_op_st_v(s, d_ot, cpu_regs[7 - i], s->A0);
2406     }
2407 
2408     gen_stack_update(s, -8 * size);
2409 }
2410 
2411 static void gen_popa(DisasContext *s)
2412 {
2413     MemOp s_ot = s->ss32 ? MO_32 : MO_16;
2414     MemOp d_ot = s->dflag;
2415     int size = 1 << d_ot;
2416     int i;
2417 
2418     for (i = 0; i < 8; i++) {
2419         /* ESP is not reloaded */
2420         if (7 - i == R_ESP) {
2421             continue;
2422         }
2423         tcg_gen_addi_tl(s->A0, cpu_regs[R_ESP], i * size);
2424         gen_lea_v_seg(s, s_ot, s->A0, R_SS, -1);
2425         gen_op_ld_v(s, d_ot, s->T0, s->A0);
2426         gen_op_mov_reg_v(s, d_ot, 7 - i, s->T0);
2427     }
2428 
2429     gen_stack_update(s, 8 * size);
2430 }
2431 
2432 static void gen_enter(DisasContext *s, int esp_addend, int level)
2433 {
2434     MemOp d_ot = mo_pushpop(s, s->dflag);
2435     MemOp a_ot = CODE64(s) ? MO_64 : s->ss32 ? MO_32 : MO_16;
2436     int size = 1 << d_ot;
2437 
2438     /* Push BP; compute FrameTemp into T1.  */
2439     tcg_gen_subi_tl(s->T1, cpu_regs[R_ESP], size);
2440     gen_lea_v_seg(s, a_ot, s->T1, R_SS, -1);
2441     gen_op_st_v(s, d_ot, cpu_regs[R_EBP], s->A0);
2442 
2443     level &= 31;
2444     if (level != 0) {
2445         int i;
2446 
2447         /* Copy level-1 pointers from the previous frame.  */
2448         for (i = 1; i < level; ++i) {
2449             tcg_gen_subi_tl(s->A0, cpu_regs[R_EBP], size * i);
2450             gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2451             gen_op_ld_v(s, d_ot, s->tmp0, s->A0);
2452 
2453             tcg_gen_subi_tl(s->A0, s->T1, size * i);
2454             gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2455             gen_op_st_v(s, d_ot, s->tmp0, s->A0);
2456         }
2457 
2458         /* Push the current FrameTemp as the last level.  */
2459         tcg_gen_subi_tl(s->A0, s->T1, size * level);
2460         gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2461         gen_op_st_v(s, d_ot, s->T1, s->A0);
2462     }
2463 
2464     /* Copy the FrameTemp value to EBP.  */
2465     gen_op_mov_reg_v(s, a_ot, R_EBP, s->T1);
2466 
2467     /* Compute the final value of ESP.  */
2468     tcg_gen_subi_tl(s->T1, s->T1, esp_addend + size * level);
2469     gen_op_mov_reg_v(s, a_ot, R_ESP, s->T1);
2470 }
2471 
2472 static void gen_leave(DisasContext *s)
2473 {
2474     MemOp d_ot = mo_pushpop(s, s->dflag);
2475     MemOp a_ot = mo_stacksize(s);
2476 
2477     gen_lea_v_seg(s, a_ot, cpu_regs[R_EBP], R_SS, -1);
2478     gen_op_ld_v(s, d_ot, s->T0, s->A0);
2479 
2480     tcg_gen_addi_tl(s->T1, cpu_regs[R_EBP], 1 << d_ot);
2481 
2482     gen_op_mov_reg_v(s, d_ot, R_EBP, s->T0);
2483     gen_op_mov_reg_v(s, a_ot, R_ESP, s->T1);
2484 }
2485 
2486 /* Similarly, except that the assumption here is that we don't decode
2487    the instruction at all -- either a missing opcode, an unimplemented
2488    feature, or just a bogus instruction stream.  */
2489 static void gen_unknown_opcode(CPUX86State *env, DisasContext *s)
2490 {
2491     gen_illegal_opcode(s);
2492 
2493     if (qemu_loglevel_mask(LOG_UNIMP)) {
2494         FILE *logfile = qemu_log_lock();
2495         target_ulong pc = s->pc_start, end = s->pc;
2496 
2497         qemu_log("ILLOPC: " TARGET_FMT_lx ":", pc);
2498         for (; pc < end; ++pc) {
2499             qemu_log(" %02x", cpu_ldub_code(env, pc));
2500         }
2501         qemu_log("\n");
2502         qemu_log_unlock(logfile);
2503     }
2504 }
2505 
2506 /* an interrupt is different from an exception because of the
2507    privilege checks */
2508 static void gen_interrupt(DisasContext *s, int intno,
2509                           target_ulong cur_eip, target_ulong next_eip)
2510 {
2511     gen_update_cc_op(s);
2512     gen_jmp_im(s, cur_eip);
2513     gen_helper_raise_interrupt(cpu_env, tcg_const_i32(intno),
2514                                tcg_const_i32(next_eip - cur_eip));
2515     s->base.is_jmp = DISAS_NORETURN;
2516 }
2517 
2518 static void gen_debug(DisasContext *s, target_ulong cur_eip)
2519 {
2520     gen_update_cc_op(s);
2521     gen_jmp_im(s, cur_eip);
2522     gen_helper_debug(cpu_env);
2523     s->base.is_jmp = DISAS_NORETURN;
2524 }
2525 
2526 static void gen_set_hflag(DisasContext *s, uint32_t mask)
2527 {
2528     if ((s->flags & mask) == 0) {
2529         TCGv_i32 t = tcg_temp_new_i32();
2530         tcg_gen_ld_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2531         tcg_gen_ori_i32(t, t, mask);
2532         tcg_gen_st_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2533         tcg_temp_free_i32(t);
2534         s->flags |= mask;
2535     }
2536 }
2537 
2538 static void gen_reset_hflag(DisasContext *s, uint32_t mask)
2539 {
2540     if (s->flags & mask) {
2541         TCGv_i32 t = tcg_temp_new_i32();
2542         tcg_gen_ld_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2543         tcg_gen_andi_i32(t, t, ~mask);
2544         tcg_gen_st_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2545         tcg_temp_free_i32(t);
2546         s->flags &= ~mask;
2547     }
2548 }
2549 
2550 /* Clear BND registers during legacy branches.  */
2551 static void gen_bnd_jmp(DisasContext *s)
2552 {
2553     /* Clear the registers only if BND prefix is missing, MPX is enabled,
2554        and if the BNDREGs are known to be in use (non-zero) already.
2555        The helper itself will check BNDPRESERVE at runtime.  */
2556     if ((s->prefix & PREFIX_REPNZ) == 0
2557         && (s->flags & HF_MPX_EN_MASK) != 0
2558         && (s->flags & HF_MPX_IU_MASK) != 0) {
2559         gen_helper_bnd_jmp(cpu_env);
2560     }
2561 }
2562 
2563 /* Generate an end of block. Trace exception is also generated if needed.
2564    If INHIBIT, set HF_INHIBIT_IRQ_MASK if it isn't already set.
2565    If RECHECK_TF, emit a rechecking helper for #DB, ignoring the state of
2566    S->TF.  This is used by the syscall/sysret insns.  */
2567 static void
2568 do_gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf, bool jr)
2569 {
2570     gen_update_cc_op(s);
2571 
2572     /* If several instructions disable interrupts, only the first does it.  */
2573     if (inhibit && !(s->flags & HF_INHIBIT_IRQ_MASK)) {
2574         gen_set_hflag(s, HF_INHIBIT_IRQ_MASK);
2575     } else {
2576         gen_reset_hflag(s, HF_INHIBIT_IRQ_MASK);
2577     }
2578 
2579     if (s->base.tb->flags & HF_RF_MASK) {
2580         gen_helper_reset_rf(cpu_env);
2581     }
2582     if (s->base.singlestep_enabled) {
2583         gen_helper_debug(cpu_env);
2584     } else if (recheck_tf) {
2585         gen_helper_rechecking_single_step(cpu_env);
2586         tcg_gen_exit_tb(NULL, 0);
2587     } else if (s->tf) {
2588         gen_helper_single_step(cpu_env);
2589     } else if (jr) {
2590         tcg_gen_lookup_and_goto_ptr();
2591     } else {
2592         tcg_gen_exit_tb(NULL, 0);
2593     }
2594     s->base.is_jmp = DISAS_NORETURN;
2595 }
2596 
2597 static inline void
2598 gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf)
2599 {
2600     do_gen_eob_worker(s, inhibit, recheck_tf, false);
2601 }
2602 
2603 /* End of block.
2604    If INHIBIT, set HF_INHIBIT_IRQ_MASK if it isn't already set.  */
2605 static void gen_eob_inhibit_irq(DisasContext *s, bool inhibit)
2606 {
2607     gen_eob_worker(s, inhibit, false);
2608 }
2609 
2610 /* End of block, resetting the inhibit irq flag.  */
2611 static void gen_eob(DisasContext *s)
2612 {
2613     gen_eob_worker(s, false, false);
2614 }
2615 
2616 /* Jump to register */
2617 static void gen_jr(DisasContext *s, TCGv dest)
2618 {
2619     do_gen_eob_worker(s, false, false, true);
2620 }
2621 
2622 /* generate a jump to eip. No segment change must happen before as a
2623    direct call to the next block may occur */
2624 static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num)
2625 {
2626     gen_update_cc_op(s);
2627     set_cc_op(s, CC_OP_DYNAMIC);
2628     if (s->jmp_opt) {
2629         gen_goto_tb(s, tb_num, eip);
2630     } else {
2631         gen_jmp_im(s, eip);
2632         gen_eob(s);
2633     }
2634 }
2635 
2636 static void gen_jmp(DisasContext *s, target_ulong eip)
2637 {
2638     gen_jmp_tb(s, eip, 0);
2639 }
2640 
2641 static inline void gen_ldq_env_A0(DisasContext *s, int offset)
2642 {
2643     tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ);
2644     tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset);
2645 }
2646 
2647 static inline void gen_stq_env_A0(DisasContext *s, int offset)
2648 {
2649     tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset);
2650     tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ);
2651 }
2652 
2653 static inline void gen_ldo_env_A0(DisasContext *s, int offset)
2654 {
2655     int mem_index = s->mem_index;
2656     tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, mem_index, MO_LEQ);
2657     tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
2658     tcg_gen_addi_tl(s->tmp0, s->A0, 8);
2659     tcg_gen_qemu_ld_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEQ);
2660     tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
2661 }
2662 
2663 static inline void gen_sto_env_A0(DisasContext *s, int offset)
2664 {
2665     int mem_index = s->mem_index;
2666     tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
2667     tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, mem_index, MO_LEQ);
2668     tcg_gen_addi_tl(s->tmp0, s->A0, 8);
2669     tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
2670     tcg_gen_qemu_st_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEQ);
2671 }
2672 
2673 static inline void gen_op_movo(DisasContext *s, int d_offset, int s_offset)
2674 {
2675     tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(0)));
2676     tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(0)));
2677     tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(1)));
2678     tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(1)));
2679 }
2680 
2681 static inline void gen_op_movq(DisasContext *s, int d_offset, int s_offset)
2682 {
2683     tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset);
2684     tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset);
2685 }
2686 
2687 static inline void gen_op_movl(DisasContext *s, int d_offset, int s_offset)
2688 {
2689     tcg_gen_ld_i32(s->tmp2_i32, cpu_env, s_offset);
2690     tcg_gen_st_i32(s->tmp2_i32, cpu_env, d_offset);
2691 }
2692 
2693 static inline void gen_op_movq_env_0(DisasContext *s, int d_offset)
2694 {
2695     tcg_gen_movi_i64(s->tmp1_i64, 0);
2696     tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset);
2697 }
2698 
2699 typedef void (*SSEFunc_i_ep)(TCGv_i32 val, TCGv_ptr env, TCGv_ptr reg);
2700 typedef void (*SSEFunc_l_ep)(TCGv_i64 val, TCGv_ptr env, TCGv_ptr reg);
2701 typedef void (*SSEFunc_0_epi)(TCGv_ptr env, TCGv_ptr reg, TCGv_i32 val);
2702 typedef void (*SSEFunc_0_epl)(TCGv_ptr env, TCGv_ptr reg, TCGv_i64 val);
2703 typedef void (*SSEFunc_0_epp)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b);
2704 typedef void (*SSEFunc_0_eppi)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b,
2705                                TCGv_i32 val);
2706 typedef void (*SSEFunc_0_ppi)(TCGv_ptr reg_a, TCGv_ptr reg_b, TCGv_i32 val);
2707 typedef void (*SSEFunc_0_eppt)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b,
2708                                TCGv val);
2709 
2710 #define SSE_SPECIAL ((void *)1)
2711 #define SSE_DUMMY ((void *)2)
2712 
2713 #define MMX_OP2(x) { gen_helper_ ## x ## _mmx, gen_helper_ ## x ## _xmm }
2714 #define SSE_FOP(x) { gen_helper_ ## x ## ps, gen_helper_ ## x ## pd, \
2715                      gen_helper_ ## x ## ss, gen_helper_ ## x ## sd, }
2716 
2717 static const SSEFunc_0_epp sse_op_table1[256][4] = {
2718     /* 3DNow! extensions */
2719     [0x0e] = { SSE_DUMMY }, /* femms */
2720     [0x0f] = { SSE_DUMMY }, /* pf... */
2721     /* pure SSE operations */
2722     [0x10] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
2723     [0x11] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
2724     [0x12] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movlps, movlpd, movsldup, movddup */
2725     [0x13] = { SSE_SPECIAL, SSE_SPECIAL },  /* movlps, movlpd */
2726     [0x14] = { gen_helper_punpckldq_xmm, gen_helper_punpcklqdq_xmm },
2727     [0x15] = { gen_helper_punpckhdq_xmm, gen_helper_punpckhqdq_xmm },
2728     [0x16] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },  /* movhps, movhpd, movshdup */
2729     [0x17] = { SSE_SPECIAL, SSE_SPECIAL },  /* movhps, movhpd */
2730 
2731     [0x28] = { SSE_SPECIAL, SSE_SPECIAL },  /* movaps, movapd */
2732     [0x29] = { SSE_SPECIAL, SSE_SPECIAL },  /* movaps, movapd */
2733     [0x2a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtpi2ps, cvtpi2pd, cvtsi2ss, cvtsi2sd */
2734     [0x2b] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movntps, movntpd, movntss, movntsd */
2735     [0x2c] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvttps2pi, cvttpd2pi, cvttsd2si, cvttss2si */
2736     [0x2d] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtps2pi, cvtpd2pi, cvtsd2si, cvtss2si */
2737     [0x2e] = { gen_helper_ucomiss, gen_helper_ucomisd },
2738     [0x2f] = { gen_helper_comiss, gen_helper_comisd },
2739     [0x50] = { SSE_SPECIAL, SSE_SPECIAL }, /* movmskps, movmskpd */
2740     [0x51] = SSE_FOP(sqrt),
2741     [0x52] = { gen_helper_rsqrtps, NULL, gen_helper_rsqrtss, NULL },
2742     [0x53] = { gen_helper_rcpps, NULL, gen_helper_rcpss, NULL },
2743     [0x54] = { gen_helper_pand_xmm, gen_helper_pand_xmm }, /* andps, andpd */
2744     [0x55] = { gen_helper_pandn_xmm, gen_helper_pandn_xmm }, /* andnps, andnpd */
2745     [0x56] = { gen_helper_por_xmm, gen_helper_por_xmm }, /* orps, orpd */
2746     [0x57] = { gen_helper_pxor_xmm, gen_helper_pxor_xmm }, /* xorps, xorpd */
2747     [0x58] = SSE_FOP(add),
2748     [0x59] = SSE_FOP(mul),
2749     [0x5a] = { gen_helper_cvtps2pd, gen_helper_cvtpd2ps,
2750                gen_helper_cvtss2sd, gen_helper_cvtsd2ss },
2751     [0x5b] = { gen_helper_cvtdq2ps, gen_helper_cvtps2dq, gen_helper_cvttps2dq },
2752     [0x5c] = SSE_FOP(sub),
2753     [0x5d] = SSE_FOP(min),
2754     [0x5e] = SSE_FOP(div),
2755     [0x5f] = SSE_FOP(max),
2756 
2757     [0xc2] = SSE_FOP(cmpeq),
2758     [0xc6] = { (SSEFunc_0_epp)gen_helper_shufps,
2759                (SSEFunc_0_epp)gen_helper_shufpd }, /* XXX: casts */
2760 
2761     /* SSSE3, SSE4, MOVBE, CRC32, BMI1, BMI2, ADX.  */
2762     [0x38] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2763     [0x3a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2764 
2765     /* MMX ops and their SSE extensions */
2766     [0x60] = MMX_OP2(punpcklbw),
2767     [0x61] = MMX_OP2(punpcklwd),
2768     [0x62] = MMX_OP2(punpckldq),
2769     [0x63] = MMX_OP2(packsswb),
2770     [0x64] = MMX_OP2(pcmpgtb),
2771     [0x65] = MMX_OP2(pcmpgtw),
2772     [0x66] = MMX_OP2(pcmpgtl),
2773     [0x67] = MMX_OP2(packuswb),
2774     [0x68] = MMX_OP2(punpckhbw),
2775     [0x69] = MMX_OP2(punpckhwd),
2776     [0x6a] = MMX_OP2(punpckhdq),
2777     [0x6b] = MMX_OP2(packssdw),
2778     [0x6c] = { NULL, gen_helper_punpcklqdq_xmm },
2779     [0x6d] = { NULL, gen_helper_punpckhqdq_xmm },
2780     [0x6e] = { SSE_SPECIAL, SSE_SPECIAL }, /* movd mm, ea */
2781     [0x6f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, , movqdu */
2782     [0x70] = { (SSEFunc_0_epp)gen_helper_pshufw_mmx,
2783                (SSEFunc_0_epp)gen_helper_pshufd_xmm,
2784                (SSEFunc_0_epp)gen_helper_pshufhw_xmm,
2785                (SSEFunc_0_epp)gen_helper_pshuflw_xmm }, /* XXX: casts */
2786     [0x71] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftw */
2787     [0x72] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftd */
2788     [0x73] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftq */
2789     [0x74] = MMX_OP2(pcmpeqb),
2790     [0x75] = MMX_OP2(pcmpeqw),
2791     [0x76] = MMX_OP2(pcmpeql),
2792     [0x77] = { SSE_DUMMY }, /* emms */
2793     [0x78] = { NULL, SSE_SPECIAL, NULL, SSE_SPECIAL }, /* extrq_i, insertq_i */
2794     [0x79] = { NULL, gen_helper_extrq_r, NULL, gen_helper_insertq_r },
2795     [0x7c] = { NULL, gen_helper_haddpd, NULL, gen_helper_haddps },
2796     [0x7d] = { NULL, gen_helper_hsubpd, NULL, gen_helper_hsubps },
2797     [0x7e] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movd, movd, , movq */
2798     [0x7f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, movdqu */
2799     [0xc4] = { SSE_SPECIAL, SSE_SPECIAL }, /* pinsrw */
2800     [0xc5] = { SSE_SPECIAL, SSE_SPECIAL }, /* pextrw */
2801     [0xd0] = { NULL, gen_helper_addsubpd, NULL, gen_helper_addsubps },
2802     [0xd1] = MMX_OP2(psrlw),
2803     [0xd2] = MMX_OP2(psrld),
2804     [0xd3] = MMX_OP2(psrlq),
2805     [0xd4] = MMX_OP2(paddq),
2806     [0xd5] = MMX_OP2(pmullw),
2807     [0xd6] = { NULL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2808     [0xd7] = { SSE_SPECIAL, SSE_SPECIAL }, /* pmovmskb */
2809     [0xd8] = MMX_OP2(psubusb),
2810     [0xd9] = MMX_OP2(psubusw),
2811     [0xda] = MMX_OP2(pminub),
2812     [0xdb] = MMX_OP2(pand),
2813     [0xdc] = MMX_OP2(paddusb),
2814     [0xdd] = MMX_OP2(paddusw),
2815     [0xde] = MMX_OP2(pmaxub),
2816     [0xdf] = MMX_OP2(pandn),
2817     [0xe0] = MMX_OP2(pavgb),
2818     [0xe1] = MMX_OP2(psraw),
2819     [0xe2] = MMX_OP2(psrad),
2820     [0xe3] = MMX_OP2(pavgw),
2821     [0xe4] = MMX_OP2(pmulhuw),
2822     [0xe5] = MMX_OP2(pmulhw),
2823     [0xe6] = { NULL, gen_helper_cvttpd2dq, gen_helper_cvtdq2pd, gen_helper_cvtpd2dq },
2824     [0xe7] = { SSE_SPECIAL , SSE_SPECIAL },  /* movntq, movntq */
2825     [0xe8] = MMX_OP2(psubsb),
2826     [0xe9] = MMX_OP2(psubsw),
2827     [0xea] = MMX_OP2(pminsw),
2828     [0xeb] = MMX_OP2(por),
2829     [0xec] = MMX_OP2(paddsb),
2830     [0xed] = MMX_OP2(paddsw),
2831     [0xee] = MMX_OP2(pmaxsw),
2832     [0xef] = MMX_OP2(pxor),
2833     [0xf0] = { NULL, NULL, NULL, SSE_SPECIAL }, /* lddqu */
2834     [0xf1] = MMX_OP2(psllw),
2835     [0xf2] = MMX_OP2(pslld),
2836     [0xf3] = MMX_OP2(psllq),
2837     [0xf4] = MMX_OP2(pmuludq),
2838     [0xf5] = MMX_OP2(pmaddwd),
2839     [0xf6] = MMX_OP2(psadbw),
2840     [0xf7] = { (SSEFunc_0_epp)gen_helper_maskmov_mmx,
2841                (SSEFunc_0_epp)gen_helper_maskmov_xmm }, /* XXX: casts */
2842     [0xf8] = MMX_OP2(psubb),
2843     [0xf9] = MMX_OP2(psubw),
2844     [0xfa] = MMX_OP2(psubl),
2845     [0xfb] = MMX_OP2(psubq),
2846     [0xfc] = MMX_OP2(paddb),
2847     [0xfd] = MMX_OP2(paddw),
2848     [0xfe] = MMX_OP2(paddl),
2849 };
2850 
2851 static const SSEFunc_0_epp sse_op_table2[3 * 8][2] = {
2852     [0 + 2] = MMX_OP2(psrlw),
2853     [0 + 4] = MMX_OP2(psraw),
2854     [0 + 6] = MMX_OP2(psllw),
2855     [8 + 2] = MMX_OP2(psrld),
2856     [8 + 4] = MMX_OP2(psrad),
2857     [8 + 6] = MMX_OP2(pslld),
2858     [16 + 2] = MMX_OP2(psrlq),
2859     [16 + 3] = { NULL, gen_helper_psrldq_xmm },
2860     [16 + 6] = MMX_OP2(psllq),
2861     [16 + 7] = { NULL, gen_helper_pslldq_xmm },
2862 };
2863 
2864 static const SSEFunc_0_epi sse_op_table3ai[] = {
2865     gen_helper_cvtsi2ss,
2866     gen_helper_cvtsi2sd
2867 };
2868 
2869 #ifdef TARGET_X86_64
2870 static const SSEFunc_0_epl sse_op_table3aq[] = {
2871     gen_helper_cvtsq2ss,
2872     gen_helper_cvtsq2sd
2873 };
2874 #endif
2875 
2876 static const SSEFunc_i_ep sse_op_table3bi[] = {
2877     gen_helper_cvttss2si,
2878     gen_helper_cvtss2si,
2879     gen_helper_cvttsd2si,
2880     gen_helper_cvtsd2si
2881 };
2882 
2883 #ifdef TARGET_X86_64
2884 static const SSEFunc_l_ep sse_op_table3bq[] = {
2885     gen_helper_cvttss2sq,
2886     gen_helper_cvtss2sq,
2887     gen_helper_cvttsd2sq,
2888     gen_helper_cvtsd2sq
2889 };
2890 #endif
2891 
2892 static const SSEFunc_0_epp sse_op_table4[8][4] = {
2893     SSE_FOP(cmpeq),
2894     SSE_FOP(cmplt),
2895     SSE_FOP(cmple),
2896     SSE_FOP(cmpunord),
2897     SSE_FOP(cmpneq),
2898     SSE_FOP(cmpnlt),
2899     SSE_FOP(cmpnle),
2900     SSE_FOP(cmpord),
2901 };
2902 
2903 static const SSEFunc_0_epp sse_op_table5[256] = {
2904     [0x0c] = gen_helper_pi2fw,
2905     [0x0d] = gen_helper_pi2fd,
2906     [0x1c] = gen_helper_pf2iw,
2907     [0x1d] = gen_helper_pf2id,
2908     [0x8a] = gen_helper_pfnacc,
2909     [0x8e] = gen_helper_pfpnacc,
2910     [0x90] = gen_helper_pfcmpge,
2911     [0x94] = gen_helper_pfmin,
2912     [0x96] = gen_helper_pfrcp,
2913     [0x97] = gen_helper_pfrsqrt,
2914     [0x9a] = gen_helper_pfsub,
2915     [0x9e] = gen_helper_pfadd,
2916     [0xa0] = gen_helper_pfcmpgt,
2917     [0xa4] = gen_helper_pfmax,
2918     [0xa6] = gen_helper_movq, /* pfrcpit1; no need to actually increase precision */
2919     [0xa7] = gen_helper_movq, /* pfrsqit1 */
2920     [0xaa] = gen_helper_pfsubr,
2921     [0xae] = gen_helper_pfacc,
2922     [0xb0] = gen_helper_pfcmpeq,
2923     [0xb4] = gen_helper_pfmul,
2924     [0xb6] = gen_helper_movq, /* pfrcpit2 */
2925     [0xb7] = gen_helper_pmulhrw_mmx,
2926     [0xbb] = gen_helper_pswapd,
2927     [0xbf] = gen_helper_pavgb_mmx /* pavgusb */
2928 };
2929 
2930 struct SSEOpHelper_epp {
2931     SSEFunc_0_epp op[2];
2932     uint32_t ext_mask;
2933 };
2934 
2935 struct SSEOpHelper_eppi {
2936     SSEFunc_0_eppi op[2];
2937     uint32_t ext_mask;
2938 };
2939 
2940 #define SSSE3_OP(x) { MMX_OP2(x), CPUID_EXT_SSSE3 }
2941 #define SSE41_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE41 }
2942 #define SSE42_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE42 }
2943 #define SSE41_SPECIAL { { NULL, SSE_SPECIAL }, CPUID_EXT_SSE41 }
2944 #define PCLMULQDQ_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, \
2945         CPUID_EXT_PCLMULQDQ }
2946 #define AESNI_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_AES }
2947 
2948 static const struct SSEOpHelper_epp sse_op_table6[256] = {
2949     [0x00] = SSSE3_OP(pshufb),
2950     [0x01] = SSSE3_OP(phaddw),
2951     [0x02] = SSSE3_OP(phaddd),
2952     [0x03] = SSSE3_OP(phaddsw),
2953     [0x04] = SSSE3_OP(pmaddubsw),
2954     [0x05] = SSSE3_OP(phsubw),
2955     [0x06] = SSSE3_OP(phsubd),
2956     [0x07] = SSSE3_OP(phsubsw),
2957     [0x08] = SSSE3_OP(psignb),
2958     [0x09] = SSSE3_OP(psignw),
2959     [0x0a] = SSSE3_OP(psignd),
2960     [0x0b] = SSSE3_OP(pmulhrsw),
2961     [0x10] = SSE41_OP(pblendvb),
2962     [0x14] = SSE41_OP(blendvps),
2963     [0x15] = SSE41_OP(blendvpd),
2964     [0x17] = SSE41_OP(ptest),
2965     [0x1c] = SSSE3_OP(pabsb),
2966     [0x1d] = SSSE3_OP(pabsw),
2967     [0x1e] = SSSE3_OP(pabsd),
2968     [0x20] = SSE41_OP(pmovsxbw),
2969     [0x21] = SSE41_OP(pmovsxbd),
2970     [0x22] = SSE41_OP(pmovsxbq),
2971     [0x23] = SSE41_OP(pmovsxwd),
2972     [0x24] = SSE41_OP(pmovsxwq),
2973     [0x25] = SSE41_OP(pmovsxdq),
2974     [0x28] = SSE41_OP(pmuldq),
2975     [0x29] = SSE41_OP(pcmpeqq),
2976     [0x2a] = SSE41_SPECIAL, /* movntqda */
2977     [0x2b] = SSE41_OP(packusdw),
2978     [0x30] = SSE41_OP(pmovzxbw),
2979     [0x31] = SSE41_OP(pmovzxbd),
2980     [0x32] = SSE41_OP(pmovzxbq),
2981     [0x33] = SSE41_OP(pmovzxwd),
2982     [0x34] = SSE41_OP(pmovzxwq),
2983     [0x35] = SSE41_OP(pmovzxdq),
2984     [0x37] = SSE42_OP(pcmpgtq),
2985     [0x38] = SSE41_OP(pminsb),
2986     [0x39] = SSE41_OP(pminsd),
2987     [0x3a] = SSE41_OP(pminuw),
2988     [0x3b] = SSE41_OP(pminud),
2989     [0x3c] = SSE41_OP(pmaxsb),
2990     [0x3d] = SSE41_OP(pmaxsd),
2991     [0x3e] = SSE41_OP(pmaxuw),
2992     [0x3f] = SSE41_OP(pmaxud),
2993     [0x40] = SSE41_OP(pmulld),
2994     [0x41] = SSE41_OP(phminposuw),
2995     [0xdb] = AESNI_OP(aesimc),
2996     [0xdc] = AESNI_OP(aesenc),
2997     [0xdd] = AESNI_OP(aesenclast),
2998     [0xde] = AESNI_OP(aesdec),
2999     [0xdf] = AESNI_OP(aesdeclast),
3000 };
3001 
3002 static const struct SSEOpHelper_eppi sse_op_table7[256] = {
3003     [0x08] = SSE41_OP(roundps),
3004     [0x09] = SSE41_OP(roundpd),
3005     [0x0a] = SSE41_OP(roundss),
3006     [0x0b] = SSE41_OP(roundsd),
3007     [0x0c] = SSE41_OP(blendps),
3008     [0x0d] = SSE41_OP(blendpd),
3009     [0x0e] = SSE41_OP(pblendw),
3010     [0x0f] = SSSE3_OP(palignr),
3011     [0x14] = SSE41_SPECIAL, /* pextrb */
3012     [0x15] = SSE41_SPECIAL, /* pextrw */
3013     [0x16] = SSE41_SPECIAL, /* pextrd/pextrq */
3014     [0x17] = SSE41_SPECIAL, /* extractps */
3015     [0x20] = SSE41_SPECIAL, /* pinsrb */
3016     [0x21] = SSE41_SPECIAL, /* insertps */
3017     [0x22] = SSE41_SPECIAL, /* pinsrd/pinsrq */
3018     [0x40] = SSE41_OP(dpps),
3019     [0x41] = SSE41_OP(dppd),
3020     [0x42] = SSE41_OP(mpsadbw),
3021     [0x44] = PCLMULQDQ_OP(pclmulqdq),
3022     [0x60] = SSE42_OP(pcmpestrm),
3023     [0x61] = SSE42_OP(pcmpestri),
3024     [0x62] = SSE42_OP(pcmpistrm),
3025     [0x63] = SSE42_OP(pcmpistri),
3026     [0xdf] = AESNI_OP(aeskeygenassist),
3027 };
3028 
3029 static void gen_sse(CPUX86State *env, DisasContext *s, int b,
3030                     target_ulong pc_start, int rex_r)
3031 {
3032     int b1, op1_offset, op2_offset, is_xmm, val;
3033     int modrm, mod, rm, reg;
3034     SSEFunc_0_epp sse_fn_epp;
3035     SSEFunc_0_eppi sse_fn_eppi;
3036     SSEFunc_0_ppi sse_fn_ppi;
3037     SSEFunc_0_eppt sse_fn_eppt;
3038     MemOp ot;
3039 
3040     b &= 0xff;
3041     if (s->prefix & PREFIX_DATA)
3042         b1 = 1;
3043     else if (s->prefix & PREFIX_REPZ)
3044         b1 = 2;
3045     else if (s->prefix & PREFIX_REPNZ)
3046         b1 = 3;
3047     else
3048         b1 = 0;
3049     sse_fn_epp = sse_op_table1[b][b1];
3050     if (!sse_fn_epp) {
3051         goto unknown_op;
3052     }
3053     if ((b <= 0x5f && b >= 0x10) || b == 0xc6 || b == 0xc2) {
3054         is_xmm = 1;
3055     } else {
3056         if (b1 == 0) {
3057             /* MMX case */
3058             is_xmm = 0;
3059         } else {
3060             is_xmm = 1;
3061         }
3062     }
3063     /* simple MMX/SSE operation */
3064     if (s->flags & HF_TS_MASK) {
3065         gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
3066         return;
3067     }
3068     if (s->flags & HF_EM_MASK) {
3069     illegal_op:
3070         gen_illegal_opcode(s);
3071         return;
3072     }
3073     if (is_xmm
3074         && !(s->flags & HF_OSFXSR_MASK)
3075         && ((b != 0x38 && b != 0x3a) || (s->prefix & PREFIX_DATA))) {
3076         goto unknown_op;
3077     }
3078     if (b == 0x0e) {
3079         if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) {
3080             /* If we were fully decoding this we might use illegal_op.  */
3081             goto unknown_op;
3082         }
3083         /* femms */
3084         gen_helper_emms(cpu_env);
3085         return;
3086     }
3087     if (b == 0x77) {
3088         /* emms */
3089         gen_helper_emms(cpu_env);
3090         return;
3091     }
3092     /* prepare MMX state (XXX: optimize by storing fptt and fptags in
3093        the static cpu state) */
3094     if (!is_xmm) {
3095         gen_helper_enter_mmx(cpu_env);
3096     }
3097 
3098     modrm = x86_ldub_code(env, s);
3099     reg = ((modrm >> 3) & 7);
3100     if (is_xmm)
3101         reg |= rex_r;
3102     mod = (modrm >> 6) & 3;
3103     if (sse_fn_epp == SSE_SPECIAL) {
3104         b |= (b1 << 8);
3105         switch(b) {
3106         case 0x0e7: /* movntq */
3107             if (mod == 3) {
3108                 goto illegal_op;
3109             }
3110             gen_lea_modrm(env, s, modrm);
3111             gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3112             break;
3113         case 0x1e7: /* movntdq */
3114         case 0x02b: /* movntps */
3115         case 0x12b: /* movntps */
3116             if (mod == 3)
3117                 goto illegal_op;
3118             gen_lea_modrm(env, s, modrm);
3119             gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3120             break;
3121         case 0x3f0: /* lddqu */
3122             if (mod == 3)
3123                 goto illegal_op;
3124             gen_lea_modrm(env, s, modrm);
3125             gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3126             break;
3127         case 0x22b: /* movntss */
3128         case 0x32b: /* movntsd */
3129             if (mod == 3)
3130                 goto illegal_op;
3131             gen_lea_modrm(env, s, modrm);
3132             if (b1 & 1) {
3133                 gen_stq_env_A0(s, offsetof(CPUX86State,
3134                                            xmm_regs[reg].ZMM_Q(0)));
3135             } else {
3136                 tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State,
3137                     xmm_regs[reg].ZMM_L(0)));
3138                 gen_op_st_v(s, MO_32, s->T0, s->A0);
3139             }
3140             break;
3141         case 0x6e: /* movd mm, ea */
3142 #ifdef TARGET_X86_64
3143             if (s->dflag == MO_64) {
3144                 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
3145                 tcg_gen_st_tl(s->T0, cpu_env,
3146                               offsetof(CPUX86State, fpregs[reg].mmx));
3147             } else
3148 #endif
3149             {
3150                 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
3151                 tcg_gen_addi_ptr(s->ptr0, cpu_env,
3152                                  offsetof(CPUX86State,fpregs[reg].mmx));
3153                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
3154                 gen_helper_movl_mm_T0_mmx(s->ptr0, s->tmp2_i32);
3155             }
3156             break;
3157         case 0x16e: /* movd xmm, ea */
3158 #ifdef TARGET_X86_64
3159             if (s->dflag == MO_64) {
3160                 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
3161                 tcg_gen_addi_ptr(s->ptr0, cpu_env,
3162                                  offsetof(CPUX86State,xmm_regs[reg]));
3163                 gen_helper_movq_mm_T0_xmm(s->ptr0, s->T0);
3164             } else
3165 #endif
3166             {
3167                 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
3168                 tcg_gen_addi_ptr(s->ptr0, cpu_env,
3169                                  offsetof(CPUX86State,xmm_regs[reg]));
3170                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
3171                 gen_helper_movl_mm_T0_xmm(s->ptr0, s->tmp2_i32);
3172             }
3173             break;
3174         case 0x6f: /* movq mm, ea */
3175             if (mod != 3) {
3176                 gen_lea_modrm(env, s, modrm);
3177                 gen_ldq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3178             } else {
3179                 rm = (modrm & 7);
3180                 tcg_gen_ld_i64(s->tmp1_i64, cpu_env,
3181                                offsetof(CPUX86State,fpregs[rm].mmx));
3182                 tcg_gen_st_i64(s->tmp1_i64, cpu_env,
3183                                offsetof(CPUX86State,fpregs[reg].mmx));
3184             }
3185             break;
3186         case 0x010: /* movups */
3187         case 0x110: /* movupd */
3188         case 0x028: /* movaps */
3189         case 0x128: /* movapd */
3190         case 0x16f: /* movdqa xmm, ea */
3191         case 0x26f: /* movdqu xmm, ea */
3192             if (mod != 3) {
3193                 gen_lea_modrm(env, s, modrm);
3194                 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3195             } else {
3196                 rm = (modrm & 7) | REX_B(s);
3197                 gen_op_movo(s, offsetof(CPUX86State, xmm_regs[reg]),
3198                             offsetof(CPUX86State,xmm_regs[rm]));
3199             }
3200             break;
3201         case 0x210: /* movss xmm, ea */
3202             if (mod != 3) {
3203                 gen_lea_modrm(env, s, modrm);
3204                 gen_op_ld_v(s, MO_32, s->T0, s->A0);
3205                 tcg_gen_st32_tl(s->T0, cpu_env,
3206                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)));
3207                 tcg_gen_movi_tl(s->T0, 0);
3208                 tcg_gen_st32_tl(s->T0, cpu_env,
3209                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1)));
3210                 tcg_gen_st32_tl(s->T0, cpu_env,
3211                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)));
3212                 tcg_gen_st32_tl(s->T0, cpu_env,
3213                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)));
3214             } else {
3215                 rm = (modrm & 7) | REX_B(s);
3216                 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)),
3217                             offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)));
3218             }
3219             break;
3220         case 0x310: /* movsd xmm, ea */
3221             if (mod != 3) {
3222                 gen_lea_modrm(env, s, modrm);
3223                 gen_ldq_env_A0(s, offsetof(CPUX86State,
3224                                            xmm_regs[reg].ZMM_Q(0)));
3225                 tcg_gen_movi_tl(s->T0, 0);
3226                 tcg_gen_st32_tl(s->T0, cpu_env,
3227                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)));
3228                 tcg_gen_st32_tl(s->T0, cpu_env,
3229                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)));
3230             } else {
3231                 rm = (modrm & 7) | REX_B(s);
3232                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3233                             offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3234             }
3235             break;
3236         case 0x012: /* movlps */
3237         case 0x112: /* movlpd */
3238             if (mod != 3) {
3239                 gen_lea_modrm(env, s, modrm);
3240                 gen_ldq_env_A0(s, offsetof(CPUX86State,
3241                                            xmm_regs[reg].ZMM_Q(0)));
3242             } else {
3243                 /* movhlps */
3244                 rm = (modrm & 7) | REX_B(s);
3245                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3246                             offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(1)));
3247             }
3248             break;
3249         case 0x212: /* movsldup */
3250             if (mod != 3) {
3251                 gen_lea_modrm(env, s, modrm);
3252                 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3253             } else {
3254                 rm = (modrm & 7) | REX_B(s);
3255                 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)),
3256                             offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)));
3257                 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)),
3258                             offsetof(CPUX86State,xmm_regs[rm].ZMM_L(2)));
3259             }
3260             gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1)),
3261                         offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3262             gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)),
3263                         offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)));
3264             break;
3265         case 0x312: /* movddup */
3266             if (mod != 3) {
3267                 gen_lea_modrm(env, s, modrm);
3268                 gen_ldq_env_A0(s, offsetof(CPUX86State,
3269                                            xmm_regs[reg].ZMM_Q(0)));
3270             } else {
3271                 rm = (modrm & 7) | REX_B(s);
3272                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3273                             offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3274             }
3275             gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)),
3276                         offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3277             break;
3278         case 0x016: /* movhps */
3279         case 0x116: /* movhpd */
3280             if (mod != 3) {
3281                 gen_lea_modrm(env, s, modrm);
3282                 gen_ldq_env_A0(s, offsetof(CPUX86State,
3283                                            xmm_regs[reg].ZMM_Q(1)));
3284             } else {
3285                 /* movlhps */
3286                 rm = (modrm & 7) | REX_B(s);
3287                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)),
3288                             offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3289             }
3290             break;
3291         case 0x216: /* movshdup */
3292             if (mod != 3) {
3293                 gen_lea_modrm(env, s, modrm);
3294                 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3295             } else {
3296                 rm = (modrm & 7) | REX_B(s);
3297                 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1)),
3298                             offsetof(CPUX86State,xmm_regs[rm].ZMM_L(1)));
3299                 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)),
3300                             offsetof(CPUX86State,xmm_regs[rm].ZMM_L(3)));
3301             }
3302             gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)),
3303                         offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)));
3304             gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)),
3305                         offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)));
3306             break;
3307         case 0x178:
3308         case 0x378:
3309             {
3310                 int bit_index, field_length;
3311 
3312                 if (b1 == 1 && reg != 0)
3313                     goto illegal_op;
3314                 field_length = x86_ldub_code(env, s) & 0x3F;
3315                 bit_index = x86_ldub_code(env, s) & 0x3F;
3316                 tcg_gen_addi_ptr(s->ptr0, cpu_env,
3317                     offsetof(CPUX86State,xmm_regs[reg]));
3318                 if (b1 == 1)
3319                     gen_helper_extrq_i(cpu_env, s->ptr0,
3320                                        tcg_const_i32(bit_index),
3321                                        tcg_const_i32(field_length));
3322                 else
3323                     gen_helper_insertq_i(cpu_env, s->ptr0,
3324                                          tcg_const_i32(bit_index),
3325                                          tcg_const_i32(field_length));
3326             }
3327             break;
3328         case 0x7e: /* movd ea, mm */
3329 #ifdef TARGET_X86_64
3330             if (s->dflag == MO_64) {
3331                 tcg_gen_ld_i64(s->T0, cpu_env,
3332                                offsetof(CPUX86State,fpregs[reg].mmx));
3333                 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
3334             } else
3335 #endif
3336             {
3337                 tcg_gen_ld32u_tl(s->T0, cpu_env,
3338                                  offsetof(CPUX86State,fpregs[reg].mmx.MMX_L(0)));
3339                 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
3340             }
3341             break;
3342         case 0x17e: /* movd ea, xmm */
3343 #ifdef TARGET_X86_64
3344             if (s->dflag == MO_64) {
3345                 tcg_gen_ld_i64(s->T0, cpu_env,
3346                                offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3347                 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
3348             } else
3349 #endif
3350             {
3351                 tcg_gen_ld32u_tl(s->T0, cpu_env,
3352                                  offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3353                 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
3354             }
3355             break;
3356         case 0x27e: /* movq xmm, ea */
3357             if (mod != 3) {
3358                 gen_lea_modrm(env, s, modrm);
3359                 gen_ldq_env_A0(s, offsetof(CPUX86State,
3360                                            xmm_regs[reg].ZMM_Q(0)));
3361             } else {
3362                 rm = (modrm & 7) | REX_B(s);
3363                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3364                             offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3365             }
3366             gen_op_movq_env_0(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)));
3367             break;
3368         case 0x7f: /* movq ea, mm */
3369             if (mod != 3) {
3370                 gen_lea_modrm(env, s, modrm);
3371                 gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3372             } else {
3373                 rm = (modrm & 7);
3374                 gen_op_movq(s, offsetof(CPUX86State, fpregs[rm].mmx),
3375                             offsetof(CPUX86State,fpregs[reg].mmx));
3376             }
3377             break;
3378         case 0x011: /* movups */
3379         case 0x111: /* movupd */
3380         case 0x029: /* movaps */
3381         case 0x129: /* movapd */
3382         case 0x17f: /* movdqa ea, xmm */
3383         case 0x27f: /* movdqu ea, xmm */
3384             if (mod != 3) {
3385                 gen_lea_modrm(env, s, modrm);
3386                 gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3387             } else {
3388                 rm = (modrm & 7) | REX_B(s);
3389                 gen_op_movo(s, offsetof(CPUX86State, xmm_regs[rm]),
3390                             offsetof(CPUX86State,xmm_regs[reg]));
3391             }
3392             break;
3393         case 0x211: /* movss ea, xmm */
3394             if (mod != 3) {
3395                 gen_lea_modrm(env, s, modrm);
3396                 tcg_gen_ld32u_tl(s->T0, cpu_env,
3397                                  offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)));
3398                 gen_op_st_v(s, MO_32, s->T0, s->A0);
3399             } else {
3400                 rm = (modrm & 7) | REX_B(s);
3401                 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[rm].ZMM_L(0)),
3402                             offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3403             }
3404             break;
3405         case 0x311: /* movsd ea, xmm */
3406             if (mod != 3) {
3407                 gen_lea_modrm(env, s, modrm);
3408                 gen_stq_env_A0(s, offsetof(CPUX86State,
3409                                            xmm_regs[reg].ZMM_Q(0)));
3410             } else {
3411                 rm = (modrm & 7) | REX_B(s);
3412                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[rm].ZMM_Q(0)),
3413                             offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3414             }
3415             break;
3416         case 0x013: /* movlps */
3417         case 0x113: /* movlpd */
3418             if (mod != 3) {
3419                 gen_lea_modrm(env, s, modrm);
3420                 gen_stq_env_A0(s, offsetof(CPUX86State,
3421                                            xmm_regs[reg].ZMM_Q(0)));
3422             } else {
3423                 goto illegal_op;
3424             }
3425             break;
3426         case 0x017: /* movhps */
3427         case 0x117: /* movhpd */
3428             if (mod != 3) {
3429                 gen_lea_modrm(env, s, modrm);
3430                 gen_stq_env_A0(s, offsetof(CPUX86State,
3431                                            xmm_regs[reg].ZMM_Q(1)));
3432             } else {
3433                 goto illegal_op;
3434             }
3435             break;
3436         case 0x71: /* shift mm, im */
3437         case 0x72:
3438         case 0x73:
3439         case 0x171: /* shift xmm, im */
3440         case 0x172:
3441         case 0x173:
3442             if (b1 >= 2) {
3443                 goto unknown_op;
3444             }
3445             val = x86_ldub_code(env, s);
3446             if (is_xmm) {
3447                 tcg_gen_movi_tl(s->T0, val);
3448                 tcg_gen_st32_tl(s->T0, cpu_env,
3449                                 offsetof(CPUX86State, xmm_t0.ZMM_L(0)));
3450                 tcg_gen_movi_tl(s->T0, 0);
3451                 tcg_gen_st32_tl(s->T0, cpu_env,
3452                                 offsetof(CPUX86State, xmm_t0.ZMM_L(1)));
3453                 op1_offset = offsetof(CPUX86State,xmm_t0);
3454             } else {
3455                 tcg_gen_movi_tl(s->T0, val);
3456                 tcg_gen_st32_tl(s->T0, cpu_env,
3457                                 offsetof(CPUX86State, mmx_t0.MMX_L(0)));
3458                 tcg_gen_movi_tl(s->T0, 0);
3459                 tcg_gen_st32_tl(s->T0, cpu_env,
3460                                 offsetof(CPUX86State, mmx_t0.MMX_L(1)));
3461                 op1_offset = offsetof(CPUX86State,mmx_t0);
3462             }
3463             sse_fn_epp = sse_op_table2[((b - 1) & 3) * 8 +
3464                                        (((modrm >> 3)) & 7)][b1];
3465             if (!sse_fn_epp) {
3466                 goto unknown_op;
3467             }
3468             if (is_xmm) {
3469                 rm = (modrm & 7) | REX_B(s);
3470                 op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3471             } else {
3472                 rm = (modrm & 7);
3473                 op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3474             }
3475             tcg_gen_addi_ptr(s->ptr0, cpu_env, op2_offset);
3476             tcg_gen_addi_ptr(s->ptr1, cpu_env, op1_offset);
3477             sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
3478             break;
3479         case 0x050: /* movmskps */
3480             rm = (modrm & 7) | REX_B(s);
3481             tcg_gen_addi_ptr(s->ptr0, cpu_env,
3482                              offsetof(CPUX86State,xmm_regs[rm]));
3483             gen_helper_movmskps(s->tmp2_i32, cpu_env, s->ptr0);
3484             tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
3485             break;
3486         case 0x150: /* movmskpd */
3487             rm = (modrm & 7) | REX_B(s);
3488             tcg_gen_addi_ptr(s->ptr0, cpu_env,
3489                              offsetof(CPUX86State,xmm_regs[rm]));
3490             gen_helper_movmskpd(s->tmp2_i32, cpu_env, s->ptr0);
3491             tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
3492             break;
3493         case 0x02a: /* cvtpi2ps */
3494         case 0x12a: /* cvtpi2pd */
3495             gen_helper_enter_mmx(cpu_env);
3496             if (mod != 3) {
3497                 gen_lea_modrm(env, s, modrm);
3498                 op2_offset = offsetof(CPUX86State,mmx_t0);
3499                 gen_ldq_env_A0(s, op2_offset);
3500             } else {
3501                 rm = (modrm & 7);
3502                 op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3503             }
3504             op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3505             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
3506             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
3507             switch(b >> 8) {
3508             case 0x0:
3509                 gen_helper_cvtpi2ps(cpu_env, s->ptr0, s->ptr1);
3510                 break;
3511             default:
3512             case 0x1:
3513                 gen_helper_cvtpi2pd(cpu_env, s->ptr0, s->ptr1);
3514                 break;
3515             }
3516             break;
3517         case 0x22a: /* cvtsi2ss */
3518         case 0x32a: /* cvtsi2sd */
3519             ot = mo_64_32(s->dflag);
3520             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3521             op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3522             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
3523             if (ot == MO_32) {
3524                 SSEFunc_0_epi sse_fn_epi = sse_op_table3ai[(b >> 8) & 1];
3525                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
3526                 sse_fn_epi(cpu_env, s->ptr0, s->tmp2_i32);
3527             } else {
3528 #ifdef TARGET_X86_64
3529                 SSEFunc_0_epl sse_fn_epl = sse_op_table3aq[(b >> 8) & 1];
3530                 sse_fn_epl(cpu_env, s->ptr0, s->T0);
3531 #else
3532                 goto illegal_op;
3533 #endif
3534             }
3535             break;
3536         case 0x02c: /* cvttps2pi */
3537         case 0x12c: /* cvttpd2pi */
3538         case 0x02d: /* cvtps2pi */
3539         case 0x12d: /* cvtpd2pi */
3540             gen_helper_enter_mmx(cpu_env);
3541             if (mod != 3) {
3542                 gen_lea_modrm(env, s, modrm);
3543                 op2_offset = offsetof(CPUX86State,xmm_t0);
3544                 gen_ldo_env_A0(s, op2_offset);
3545             } else {
3546                 rm = (modrm & 7) | REX_B(s);
3547                 op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3548             }
3549             op1_offset = offsetof(CPUX86State,fpregs[reg & 7].mmx);
3550             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
3551             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
3552             switch(b) {
3553             case 0x02c:
3554                 gen_helper_cvttps2pi(cpu_env, s->ptr0, s->ptr1);
3555                 break;
3556             case 0x12c:
3557                 gen_helper_cvttpd2pi(cpu_env, s->ptr0, s->ptr1);
3558                 break;
3559             case 0x02d:
3560                 gen_helper_cvtps2pi(cpu_env, s->ptr0, s->ptr1);
3561                 break;
3562             case 0x12d:
3563                 gen_helper_cvtpd2pi(cpu_env, s->ptr0, s->ptr1);
3564                 break;
3565             }
3566             break;
3567         case 0x22c: /* cvttss2si */
3568         case 0x32c: /* cvttsd2si */
3569         case 0x22d: /* cvtss2si */
3570         case 0x32d: /* cvtsd2si */
3571             ot = mo_64_32(s->dflag);
3572             if (mod != 3) {
3573                 gen_lea_modrm(env, s, modrm);
3574                 if ((b >> 8) & 1) {
3575                     gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.ZMM_Q(0)));
3576                 } else {
3577                     gen_op_ld_v(s, MO_32, s->T0, s->A0);
3578                     tcg_gen_st32_tl(s->T0, cpu_env,
3579                                     offsetof(CPUX86State, xmm_t0.ZMM_L(0)));
3580                 }
3581                 op2_offset = offsetof(CPUX86State,xmm_t0);
3582             } else {
3583                 rm = (modrm & 7) | REX_B(s);
3584                 op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3585             }
3586             tcg_gen_addi_ptr(s->ptr0, cpu_env, op2_offset);
3587             if (ot == MO_32) {
3588                 SSEFunc_i_ep sse_fn_i_ep =
3589                     sse_op_table3bi[((b >> 7) & 2) | (b & 1)];
3590                 sse_fn_i_ep(s->tmp2_i32, cpu_env, s->ptr0);
3591                 tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
3592             } else {
3593 #ifdef TARGET_X86_64
3594                 SSEFunc_l_ep sse_fn_l_ep =
3595                     sse_op_table3bq[((b >> 7) & 2) | (b & 1)];
3596                 sse_fn_l_ep(s->T0, cpu_env, s->ptr0);
3597 #else
3598                 goto illegal_op;
3599 #endif
3600             }
3601             gen_op_mov_reg_v(s, ot, reg, s->T0);
3602             break;
3603         case 0xc4: /* pinsrw */
3604         case 0x1c4:
3605             s->rip_offset = 1;
3606             gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
3607             val = x86_ldub_code(env, s);
3608             if (b1) {
3609                 val &= 7;
3610                 tcg_gen_st16_tl(s->T0, cpu_env,
3611                                 offsetof(CPUX86State,xmm_regs[reg].ZMM_W(val)));
3612             } else {
3613                 val &= 3;
3614                 tcg_gen_st16_tl(s->T0, cpu_env,
3615                                 offsetof(CPUX86State,fpregs[reg].mmx.MMX_W(val)));
3616             }
3617             break;
3618         case 0xc5: /* pextrw */
3619         case 0x1c5:
3620             if (mod != 3)
3621                 goto illegal_op;
3622             ot = mo_64_32(s->dflag);
3623             val = x86_ldub_code(env, s);
3624             if (b1) {
3625                 val &= 7;
3626                 rm = (modrm & 7) | REX_B(s);
3627                 tcg_gen_ld16u_tl(s->T0, cpu_env,
3628                                  offsetof(CPUX86State,xmm_regs[rm].ZMM_W(val)));
3629             } else {
3630                 val &= 3;
3631                 rm = (modrm & 7);
3632                 tcg_gen_ld16u_tl(s->T0, cpu_env,
3633                                 offsetof(CPUX86State,fpregs[rm].mmx.MMX_W(val)));
3634             }
3635             reg = ((modrm >> 3) & 7) | rex_r;
3636             gen_op_mov_reg_v(s, ot, reg, s->T0);
3637             break;
3638         case 0x1d6: /* movq ea, xmm */
3639             if (mod != 3) {
3640                 gen_lea_modrm(env, s, modrm);
3641                 gen_stq_env_A0(s, offsetof(CPUX86State,
3642                                            xmm_regs[reg].ZMM_Q(0)));
3643             } else {
3644                 rm = (modrm & 7) | REX_B(s);
3645                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[rm].ZMM_Q(0)),
3646                             offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3647                 gen_op_movq_env_0(s,
3648                                   offsetof(CPUX86State, xmm_regs[rm].ZMM_Q(1)));
3649             }
3650             break;
3651         case 0x2d6: /* movq2dq */
3652             gen_helper_enter_mmx(cpu_env);
3653             rm = (modrm & 7);
3654             gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3655                         offsetof(CPUX86State,fpregs[rm].mmx));
3656             gen_op_movq_env_0(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)));
3657             break;
3658         case 0x3d6: /* movdq2q */
3659             gen_helper_enter_mmx(cpu_env);
3660             rm = (modrm & 7) | REX_B(s);
3661             gen_op_movq(s, offsetof(CPUX86State, fpregs[reg & 7].mmx),
3662                         offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3663             break;
3664         case 0xd7: /* pmovmskb */
3665         case 0x1d7:
3666             if (mod != 3)
3667                 goto illegal_op;
3668             if (b1) {
3669                 rm = (modrm & 7) | REX_B(s);
3670                 tcg_gen_addi_ptr(s->ptr0, cpu_env,
3671                                  offsetof(CPUX86State, xmm_regs[rm]));
3672                 gen_helper_pmovmskb_xmm(s->tmp2_i32, cpu_env, s->ptr0);
3673             } else {
3674                 rm = (modrm & 7);
3675                 tcg_gen_addi_ptr(s->ptr0, cpu_env,
3676                                  offsetof(CPUX86State, fpregs[rm].mmx));
3677                 gen_helper_pmovmskb_mmx(s->tmp2_i32, cpu_env, s->ptr0);
3678             }
3679             reg = ((modrm >> 3) & 7) | rex_r;
3680             tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
3681             break;
3682 
3683         case 0x138:
3684         case 0x038:
3685             b = modrm;
3686             if ((b & 0xf0) == 0xf0) {
3687                 goto do_0f_38_fx;
3688             }
3689             modrm = x86_ldub_code(env, s);
3690             rm = modrm & 7;
3691             reg = ((modrm >> 3) & 7) | rex_r;
3692             mod = (modrm >> 6) & 3;
3693             if (b1 >= 2) {
3694                 goto unknown_op;
3695             }
3696 
3697             sse_fn_epp = sse_op_table6[b].op[b1];
3698             if (!sse_fn_epp) {
3699                 goto unknown_op;
3700             }
3701             if (!(s->cpuid_ext_features & sse_op_table6[b].ext_mask))
3702                 goto illegal_op;
3703 
3704             if (b1) {
3705                 op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3706                 if (mod == 3) {
3707                     op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
3708                 } else {
3709                     op2_offset = offsetof(CPUX86State,xmm_t0);
3710                     gen_lea_modrm(env, s, modrm);
3711                     switch (b) {
3712                     case 0x20: case 0x30: /* pmovsxbw, pmovzxbw */
3713                     case 0x23: case 0x33: /* pmovsxwd, pmovzxwd */
3714                     case 0x25: case 0x35: /* pmovsxdq, pmovzxdq */
3715                         gen_ldq_env_A0(s, op2_offset +
3716                                         offsetof(ZMMReg, ZMM_Q(0)));
3717                         break;
3718                     case 0x21: case 0x31: /* pmovsxbd, pmovzxbd */
3719                     case 0x24: case 0x34: /* pmovsxwq, pmovzxwq */
3720                         tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
3721                                             s->mem_index, MO_LEUL);
3722                         tcg_gen_st_i32(s->tmp2_i32, cpu_env, op2_offset +
3723                                         offsetof(ZMMReg, ZMM_L(0)));
3724                         break;
3725                     case 0x22: case 0x32: /* pmovsxbq, pmovzxbq */
3726                         tcg_gen_qemu_ld_tl(s->tmp0, s->A0,
3727                                            s->mem_index, MO_LEUW);
3728                         tcg_gen_st16_tl(s->tmp0, cpu_env, op2_offset +
3729                                         offsetof(ZMMReg, ZMM_W(0)));
3730                         break;
3731                     case 0x2a:            /* movntqda */
3732                         gen_ldo_env_A0(s, op1_offset);
3733                         return;
3734                     default:
3735                         gen_ldo_env_A0(s, op2_offset);
3736                     }
3737                 }
3738             } else {
3739                 op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
3740                 if (mod == 3) {
3741                     op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3742                 } else {
3743                     op2_offset = offsetof(CPUX86State,mmx_t0);
3744                     gen_lea_modrm(env, s, modrm);
3745                     gen_ldq_env_A0(s, op2_offset);
3746                 }
3747             }
3748             if (sse_fn_epp == SSE_SPECIAL) {
3749                 goto unknown_op;
3750             }
3751 
3752             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
3753             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
3754             sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
3755 
3756             if (b == 0x17) {
3757                 set_cc_op(s, CC_OP_EFLAGS);
3758             }
3759             break;
3760 
3761         case 0x238:
3762         case 0x338:
3763         do_0f_38_fx:
3764             /* Various integer extensions at 0f 38 f[0-f].  */
3765             b = modrm | (b1 << 8);
3766             modrm = x86_ldub_code(env, s);
3767             reg = ((modrm >> 3) & 7) | rex_r;
3768 
3769             switch (b) {
3770             case 0x3f0: /* crc32 Gd,Eb */
3771             case 0x3f1: /* crc32 Gd,Ey */
3772             do_crc32:
3773                 if (!(s->cpuid_ext_features & CPUID_EXT_SSE42)) {
3774                     goto illegal_op;
3775                 }
3776                 if ((b & 0xff) == 0xf0) {
3777                     ot = MO_8;
3778                 } else if (s->dflag != MO_64) {
3779                     ot = (s->prefix & PREFIX_DATA ? MO_16 : MO_32);
3780                 } else {
3781                     ot = MO_64;
3782                 }
3783 
3784                 tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[reg]);
3785                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3786                 gen_helper_crc32(s->T0, s->tmp2_i32,
3787                                  s->T0, tcg_const_i32(8 << ot));
3788 
3789                 ot = mo_64_32(s->dflag);
3790                 gen_op_mov_reg_v(s, ot, reg, s->T0);
3791                 break;
3792 
3793             case 0x1f0: /* crc32 or movbe */
3794             case 0x1f1:
3795                 /* For these insns, the f3 prefix is supposed to have priority
3796                    over the 66 prefix, but that's not what we implement above
3797                    setting b1.  */
3798                 if (s->prefix & PREFIX_REPNZ) {
3799                     goto do_crc32;
3800                 }
3801                 /* FALLTHRU */
3802             case 0x0f0: /* movbe Gy,My */
3803             case 0x0f1: /* movbe My,Gy */
3804                 if (!(s->cpuid_ext_features & CPUID_EXT_MOVBE)) {
3805                     goto illegal_op;
3806                 }
3807                 if (s->dflag != MO_64) {
3808                     ot = (s->prefix & PREFIX_DATA ? MO_16 : MO_32);
3809                 } else {
3810                     ot = MO_64;
3811                 }
3812 
3813                 gen_lea_modrm(env, s, modrm);
3814                 if ((b & 1) == 0) {
3815                     tcg_gen_qemu_ld_tl(s->T0, s->A0,
3816                                        s->mem_index, ot | MO_BE);
3817                     gen_op_mov_reg_v(s, ot, reg, s->T0);
3818                 } else {
3819                     tcg_gen_qemu_st_tl(cpu_regs[reg], s->A0,
3820                                        s->mem_index, ot | MO_BE);
3821                 }
3822                 break;
3823 
3824             case 0x0f2: /* andn Gy, By, Ey */
3825                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
3826                     || !(s->prefix & PREFIX_VEX)
3827                     || s->vex_l != 0) {
3828                     goto illegal_op;
3829                 }
3830                 ot = mo_64_32(s->dflag);
3831                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3832                 tcg_gen_andc_tl(s->T0, s->T0, cpu_regs[s->vex_v]);
3833                 gen_op_mov_reg_v(s, ot, reg, s->T0);
3834                 gen_op_update1_cc(s);
3835                 set_cc_op(s, CC_OP_LOGICB + ot);
3836                 break;
3837 
3838             case 0x0f7: /* bextr Gy, Ey, By */
3839                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
3840                     || !(s->prefix & PREFIX_VEX)
3841                     || s->vex_l != 0) {
3842                     goto illegal_op;
3843                 }
3844                 ot = mo_64_32(s->dflag);
3845                 {
3846                     TCGv bound, zero;
3847 
3848                     gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3849                     /* Extract START, and shift the operand.
3850                        Shifts larger than operand size get zeros.  */
3851                     tcg_gen_ext8u_tl(s->A0, cpu_regs[s->vex_v]);
3852                     tcg_gen_shr_tl(s->T0, s->T0, s->A0);
3853 
3854                     bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
3855                     zero = tcg_const_tl(0);
3856                     tcg_gen_movcond_tl(TCG_COND_LEU, s->T0, s->A0, bound,
3857                                        s->T0, zero);
3858                     tcg_temp_free(zero);
3859 
3860                     /* Extract the LEN into a mask.  Lengths larger than
3861                        operand size get all ones.  */
3862                     tcg_gen_extract_tl(s->A0, cpu_regs[s->vex_v], 8, 8);
3863                     tcg_gen_movcond_tl(TCG_COND_LEU, s->A0, s->A0, bound,
3864                                        s->A0, bound);
3865                     tcg_temp_free(bound);
3866                     tcg_gen_movi_tl(s->T1, 1);
3867                     tcg_gen_shl_tl(s->T1, s->T1, s->A0);
3868                     tcg_gen_subi_tl(s->T1, s->T1, 1);
3869                     tcg_gen_and_tl(s->T0, s->T0, s->T1);
3870 
3871                     gen_op_mov_reg_v(s, ot, reg, s->T0);
3872                     gen_op_update1_cc(s);
3873                     set_cc_op(s, CC_OP_LOGICB + ot);
3874                 }
3875                 break;
3876 
3877             case 0x0f5: /* bzhi Gy, Ey, By */
3878                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3879                     || !(s->prefix & PREFIX_VEX)
3880                     || s->vex_l != 0) {
3881                     goto illegal_op;
3882                 }
3883                 ot = mo_64_32(s->dflag);
3884                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3885                 tcg_gen_ext8u_tl(s->T1, cpu_regs[s->vex_v]);
3886                 {
3887                     TCGv bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
3888                     /* Note that since we're using BMILG (in order to get O
3889                        cleared) we need to store the inverse into C.  */
3890                     tcg_gen_setcond_tl(TCG_COND_LT, cpu_cc_src,
3891                                        s->T1, bound);
3892                     tcg_gen_movcond_tl(TCG_COND_GT, s->T1, s->T1,
3893                                        bound, bound, s->T1);
3894                     tcg_temp_free(bound);
3895                 }
3896                 tcg_gen_movi_tl(s->A0, -1);
3897                 tcg_gen_shl_tl(s->A0, s->A0, s->T1);
3898                 tcg_gen_andc_tl(s->T0, s->T0, s->A0);
3899                 gen_op_mov_reg_v(s, ot, reg, s->T0);
3900                 gen_op_update1_cc(s);
3901                 set_cc_op(s, CC_OP_BMILGB + ot);
3902                 break;
3903 
3904             case 0x3f6: /* mulx By, Gy, rdx, Ey */
3905                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3906                     || !(s->prefix & PREFIX_VEX)
3907                     || s->vex_l != 0) {
3908                     goto illegal_op;
3909                 }
3910                 ot = mo_64_32(s->dflag);
3911                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3912                 switch (ot) {
3913                 default:
3914                     tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
3915                     tcg_gen_trunc_tl_i32(s->tmp3_i32, cpu_regs[R_EDX]);
3916                     tcg_gen_mulu2_i32(s->tmp2_i32, s->tmp3_i32,
3917                                       s->tmp2_i32, s->tmp3_i32);
3918                     tcg_gen_extu_i32_tl(cpu_regs[s->vex_v], s->tmp2_i32);
3919                     tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp3_i32);
3920                     break;
3921 #ifdef TARGET_X86_64
3922                 case MO_64:
3923                     tcg_gen_mulu2_i64(s->T0, s->T1,
3924                                       s->T0, cpu_regs[R_EDX]);
3925                     tcg_gen_mov_i64(cpu_regs[s->vex_v], s->T0);
3926                     tcg_gen_mov_i64(cpu_regs[reg], s->T1);
3927                     break;
3928 #endif
3929                 }
3930                 break;
3931 
3932             case 0x3f5: /* pdep Gy, By, Ey */
3933                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3934                     || !(s->prefix & PREFIX_VEX)
3935                     || s->vex_l != 0) {
3936                     goto illegal_op;
3937                 }
3938                 ot = mo_64_32(s->dflag);
3939                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3940                 /* Note that by zero-extending the source operand, we
3941                    automatically handle zero-extending the result.  */
3942                 if (ot == MO_64) {
3943                     tcg_gen_mov_tl(s->T1, cpu_regs[s->vex_v]);
3944                 } else {
3945                     tcg_gen_ext32u_tl(s->T1, cpu_regs[s->vex_v]);
3946                 }
3947                 gen_helper_pdep(cpu_regs[reg], s->T1, s->T0);
3948                 break;
3949 
3950             case 0x2f5: /* pext Gy, By, Ey */
3951                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3952                     || !(s->prefix & PREFIX_VEX)
3953                     || s->vex_l != 0) {
3954                     goto illegal_op;
3955                 }
3956                 ot = mo_64_32(s->dflag);
3957                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3958                 /* Note that by zero-extending the source operand, we
3959                    automatically handle zero-extending the result.  */
3960                 if (ot == MO_64) {
3961                     tcg_gen_mov_tl(s->T1, cpu_regs[s->vex_v]);
3962                 } else {
3963                     tcg_gen_ext32u_tl(s->T1, cpu_regs[s->vex_v]);
3964                 }
3965                 gen_helper_pext(cpu_regs[reg], s->T1, s->T0);
3966                 break;
3967 
3968             case 0x1f6: /* adcx Gy, Ey */
3969             case 0x2f6: /* adox Gy, Ey */
3970                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_ADX)) {
3971                     goto illegal_op;
3972                 } else {
3973                     TCGv carry_in, carry_out, zero;
3974                     int end_op;
3975 
3976                     ot = mo_64_32(s->dflag);
3977                     gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3978 
3979                     /* Re-use the carry-out from a previous round.  */
3980                     carry_in = NULL;
3981                     carry_out = (b == 0x1f6 ? cpu_cc_dst : cpu_cc_src2);
3982                     switch (s->cc_op) {
3983                     case CC_OP_ADCX:
3984                         if (b == 0x1f6) {
3985                             carry_in = cpu_cc_dst;
3986                             end_op = CC_OP_ADCX;
3987                         } else {
3988                             end_op = CC_OP_ADCOX;
3989                         }
3990                         break;
3991                     case CC_OP_ADOX:
3992                         if (b == 0x1f6) {
3993                             end_op = CC_OP_ADCOX;
3994                         } else {
3995                             carry_in = cpu_cc_src2;
3996                             end_op = CC_OP_ADOX;
3997                         }
3998                         break;
3999                     case CC_OP_ADCOX:
4000                         end_op = CC_OP_ADCOX;
4001                         carry_in = carry_out;
4002                         break;
4003                     default:
4004                         end_op = (b == 0x1f6 ? CC_OP_ADCX : CC_OP_ADOX);
4005                         break;
4006                     }
4007                     /* If we can't reuse carry-out, get it out of EFLAGS.  */
4008                     if (!carry_in) {
4009                         if (s->cc_op != CC_OP_ADCX && s->cc_op != CC_OP_ADOX) {
4010                             gen_compute_eflags(s);
4011                         }
4012                         carry_in = s->tmp0;
4013                         tcg_gen_extract_tl(carry_in, cpu_cc_src,
4014                                            ctz32(b == 0x1f6 ? CC_C : CC_O), 1);
4015                     }
4016 
4017                     switch (ot) {
4018 #ifdef TARGET_X86_64
4019                     case MO_32:
4020                         /* If we know TL is 64-bit, and we want a 32-bit
4021                            result, just do everything in 64-bit arithmetic.  */
4022                         tcg_gen_ext32u_i64(cpu_regs[reg], cpu_regs[reg]);
4023                         tcg_gen_ext32u_i64(s->T0, s->T0);
4024                         tcg_gen_add_i64(s->T0, s->T0, cpu_regs[reg]);
4025                         tcg_gen_add_i64(s->T0, s->T0, carry_in);
4026                         tcg_gen_ext32u_i64(cpu_regs[reg], s->T0);
4027                         tcg_gen_shri_i64(carry_out, s->T0, 32);
4028                         break;
4029 #endif
4030                     default:
4031                         /* Otherwise compute the carry-out in two steps.  */
4032                         zero = tcg_const_tl(0);
4033                         tcg_gen_add2_tl(s->T0, carry_out,
4034                                         s->T0, zero,
4035                                         carry_in, zero);
4036                         tcg_gen_add2_tl(cpu_regs[reg], carry_out,
4037                                         cpu_regs[reg], carry_out,
4038                                         s->T0, zero);
4039                         tcg_temp_free(zero);
4040                         break;
4041                     }
4042                     set_cc_op(s, end_op);
4043                 }
4044                 break;
4045 
4046             case 0x1f7: /* shlx Gy, Ey, By */
4047             case 0x2f7: /* sarx Gy, Ey, By */
4048             case 0x3f7: /* shrx Gy, Ey, By */
4049                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
4050                     || !(s->prefix & PREFIX_VEX)
4051                     || s->vex_l != 0) {
4052                     goto illegal_op;
4053                 }
4054                 ot = mo_64_32(s->dflag);
4055                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4056                 if (ot == MO_64) {
4057                     tcg_gen_andi_tl(s->T1, cpu_regs[s->vex_v], 63);
4058                 } else {
4059                     tcg_gen_andi_tl(s->T1, cpu_regs[s->vex_v], 31);
4060                 }
4061                 if (b == 0x1f7) {
4062                     tcg_gen_shl_tl(s->T0, s->T0, s->T1);
4063                 } else if (b == 0x2f7) {
4064                     if (ot != MO_64) {
4065                         tcg_gen_ext32s_tl(s->T0, s->T0);
4066                     }
4067                     tcg_gen_sar_tl(s->T0, s->T0, s->T1);
4068                 } else {
4069                     if (ot != MO_64) {
4070                         tcg_gen_ext32u_tl(s->T0, s->T0);
4071                     }
4072                     tcg_gen_shr_tl(s->T0, s->T0, s->T1);
4073                 }
4074                 gen_op_mov_reg_v(s, ot, reg, s->T0);
4075                 break;
4076 
4077             case 0x0f3:
4078             case 0x1f3:
4079             case 0x2f3:
4080             case 0x3f3: /* Group 17 */
4081                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
4082                     || !(s->prefix & PREFIX_VEX)
4083                     || s->vex_l != 0) {
4084                     goto illegal_op;
4085                 }
4086                 ot = mo_64_32(s->dflag);
4087                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4088 
4089                 tcg_gen_mov_tl(cpu_cc_src, s->T0);
4090                 switch (reg & 7) {
4091                 case 1: /* blsr By,Ey */
4092                     tcg_gen_subi_tl(s->T1, s->T0, 1);
4093                     tcg_gen_and_tl(s->T0, s->T0, s->T1);
4094                     break;
4095                 case 2: /* blsmsk By,Ey */
4096                     tcg_gen_subi_tl(s->T1, s->T0, 1);
4097                     tcg_gen_xor_tl(s->T0, s->T0, s->T1);
4098                     break;
4099                 case 3: /* blsi By, Ey */
4100                     tcg_gen_neg_tl(s->T1, s->T0);
4101                     tcg_gen_and_tl(s->T0, s->T0, s->T1);
4102                     break;
4103                 default:
4104                     goto unknown_op;
4105                 }
4106                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
4107                 gen_op_mov_reg_v(s, ot, s->vex_v, s->T0);
4108                 set_cc_op(s, CC_OP_BMILGB + ot);
4109                 break;
4110 
4111             default:
4112                 goto unknown_op;
4113             }
4114             break;
4115 
4116         case 0x03a:
4117         case 0x13a:
4118             b = modrm;
4119             modrm = x86_ldub_code(env, s);
4120             rm = modrm & 7;
4121             reg = ((modrm >> 3) & 7) | rex_r;
4122             mod = (modrm >> 6) & 3;
4123             if (b1 >= 2) {
4124                 goto unknown_op;
4125             }
4126 
4127             sse_fn_eppi = sse_op_table7[b].op[b1];
4128             if (!sse_fn_eppi) {
4129                 goto unknown_op;
4130             }
4131             if (!(s->cpuid_ext_features & sse_op_table7[b].ext_mask))
4132                 goto illegal_op;
4133 
4134             s->rip_offset = 1;
4135 
4136             if (sse_fn_eppi == SSE_SPECIAL) {
4137                 ot = mo_64_32(s->dflag);
4138                 rm = (modrm & 7) | REX_B(s);
4139                 if (mod != 3)
4140                     gen_lea_modrm(env, s, modrm);
4141                 reg = ((modrm >> 3) & 7) | rex_r;
4142                 val = x86_ldub_code(env, s);
4143                 switch (b) {
4144                 case 0x14: /* pextrb */
4145                     tcg_gen_ld8u_tl(s->T0, cpu_env, offsetof(CPUX86State,
4146                                             xmm_regs[reg].ZMM_B(val & 15)));
4147                     if (mod == 3) {
4148                         gen_op_mov_reg_v(s, ot, rm, s->T0);
4149                     } else {
4150                         tcg_gen_qemu_st_tl(s->T0, s->A0,
4151                                            s->mem_index, MO_UB);
4152                     }
4153                     break;
4154                 case 0x15: /* pextrw */
4155                     tcg_gen_ld16u_tl(s->T0, cpu_env, offsetof(CPUX86State,
4156                                             xmm_regs[reg].ZMM_W(val & 7)));
4157                     if (mod == 3) {
4158                         gen_op_mov_reg_v(s, ot, rm, s->T0);
4159                     } else {
4160                         tcg_gen_qemu_st_tl(s->T0, s->A0,
4161                                            s->mem_index, MO_LEUW);
4162                     }
4163                     break;
4164                 case 0x16:
4165                     if (ot == MO_32) { /* pextrd */
4166                         tcg_gen_ld_i32(s->tmp2_i32, cpu_env,
4167                                         offsetof(CPUX86State,
4168                                                 xmm_regs[reg].ZMM_L(val & 3)));
4169                         if (mod == 3) {
4170                             tcg_gen_extu_i32_tl(cpu_regs[rm], s->tmp2_i32);
4171                         } else {
4172                             tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
4173                                                 s->mem_index, MO_LEUL);
4174                         }
4175                     } else { /* pextrq */
4176 #ifdef TARGET_X86_64
4177                         tcg_gen_ld_i64(s->tmp1_i64, cpu_env,
4178                                         offsetof(CPUX86State,
4179                                                 xmm_regs[reg].ZMM_Q(val & 1)));
4180                         if (mod == 3) {
4181                             tcg_gen_mov_i64(cpu_regs[rm], s->tmp1_i64);
4182                         } else {
4183                             tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
4184                                                 s->mem_index, MO_LEQ);
4185                         }
4186 #else
4187                         goto illegal_op;
4188 #endif
4189                     }
4190                     break;
4191                 case 0x17: /* extractps */
4192                     tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State,
4193                                             xmm_regs[reg].ZMM_L(val & 3)));
4194                     if (mod == 3) {
4195                         gen_op_mov_reg_v(s, ot, rm, s->T0);
4196                     } else {
4197                         tcg_gen_qemu_st_tl(s->T0, s->A0,
4198                                            s->mem_index, MO_LEUL);
4199                     }
4200                     break;
4201                 case 0x20: /* pinsrb */
4202                     if (mod == 3) {
4203                         gen_op_mov_v_reg(s, MO_32, s->T0, rm);
4204                     } else {
4205                         tcg_gen_qemu_ld_tl(s->T0, s->A0,
4206                                            s->mem_index, MO_UB);
4207                     }
4208                     tcg_gen_st8_tl(s->T0, cpu_env, offsetof(CPUX86State,
4209                                             xmm_regs[reg].ZMM_B(val & 15)));
4210                     break;
4211                 case 0x21: /* insertps */
4212                     if (mod == 3) {
4213                         tcg_gen_ld_i32(s->tmp2_i32, cpu_env,
4214                                         offsetof(CPUX86State,xmm_regs[rm]
4215                                                 .ZMM_L((val >> 6) & 3)));
4216                     } else {
4217                         tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
4218                                             s->mem_index, MO_LEUL);
4219                     }
4220                     tcg_gen_st_i32(s->tmp2_i32, cpu_env,
4221                                     offsetof(CPUX86State,xmm_regs[reg]
4222                                             .ZMM_L((val >> 4) & 3)));
4223                     if ((val >> 0) & 1)
4224                         tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4225                                         cpu_env, offsetof(CPUX86State,
4226                                                 xmm_regs[reg].ZMM_L(0)));
4227                     if ((val >> 1) & 1)
4228                         tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4229                                         cpu_env, offsetof(CPUX86State,
4230                                                 xmm_regs[reg].ZMM_L(1)));
4231                     if ((val >> 2) & 1)
4232                         tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4233                                         cpu_env, offsetof(CPUX86State,
4234                                                 xmm_regs[reg].ZMM_L(2)));
4235                     if ((val >> 3) & 1)
4236                         tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4237                                         cpu_env, offsetof(CPUX86State,
4238                                                 xmm_regs[reg].ZMM_L(3)));
4239                     break;
4240                 case 0x22:
4241                     if (ot == MO_32) { /* pinsrd */
4242                         if (mod == 3) {
4243                             tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[rm]);
4244                         } else {
4245                             tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
4246                                                 s->mem_index, MO_LEUL);
4247                         }
4248                         tcg_gen_st_i32(s->tmp2_i32, cpu_env,
4249                                         offsetof(CPUX86State,
4250                                                 xmm_regs[reg].ZMM_L(val & 3)));
4251                     } else { /* pinsrq */
4252 #ifdef TARGET_X86_64
4253                         if (mod == 3) {
4254                             gen_op_mov_v_reg(s, ot, s->tmp1_i64, rm);
4255                         } else {
4256                             tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
4257                                                 s->mem_index, MO_LEQ);
4258                         }
4259                         tcg_gen_st_i64(s->tmp1_i64, cpu_env,
4260                                         offsetof(CPUX86State,
4261                                                 xmm_regs[reg].ZMM_Q(val & 1)));
4262 #else
4263                         goto illegal_op;
4264 #endif
4265                     }
4266                     break;
4267                 }
4268                 return;
4269             }
4270 
4271             if (b1) {
4272                 op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
4273                 if (mod == 3) {
4274                     op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
4275                 } else {
4276                     op2_offset = offsetof(CPUX86State,xmm_t0);
4277                     gen_lea_modrm(env, s, modrm);
4278                     gen_ldo_env_A0(s, op2_offset);
4279                 }
4280             } else {
4281                 op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
4282                 if (mod == 3) {
4283                     op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
4284                 } else {
4285                     op2_offset = offsetof(CPUX86State,mmx_t0);
4286                     gen_lea_modrm(env, s, modrm);
4287                     gen_ldq_env_A0(s, op2_offset);
4288                 }
4289             }
4290             val = x86_ldub_code(env, s);
4291 
4292             if ((b & 0xfc) == 0x60) { /* pcmpXstrX */
4293                 set_cc_op(s, CC_OP_EFLAGS);
4294 
4295                 if (s->dflag == MO_64) {
4296                     /* The helper must use entire 64-bit gp registers */
4297                     val |= 1 << 8;
4298                 }
4299             }
4300 
4301             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4302             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4303             sse_fn_eppi(cpu_env, s->ptr0, s->ptr1, tcg_const_i32(val));
4304             break;
4305 
4306         case 0x33a:
4307             /* Various integer extensions at 0f 3a f[0-f].  */
4308             b = modrm | (b1 << 8);
4309             modrm = x86_ldub_code(env, s);
4310             reg = ((modrm >> 3) & 7) | rex_r;
4311 
4312             switch (b) {
4313             case 0x3f0: /* rorx Gy,Ey, Ib */
4314                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
4315                     || !(s->prefix & PREFIX_VEX)
4316                     || s->vex_l != 0) {
4317                     goto illegal_op;
4318                 }
4319                 ot = mo_64_32(s->dflag);
4320                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4321                 b = x86_ldub_code(env, s);
4322                 if (ot == MO_64) {
4323                     tcg_gen_rotri_tl(s->T0, s->T0, b & 63);
4324                 } else {
4325                     tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
4326                     tcg_gen_rotri_i32(s->tmp2_i32, s->tmp2_i32, b & 31);
4327                     tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
4328                 }
4329                 gen_op_mov_reg_v(s, ot, reg, s->T0);
4330                 break;
4331 
4332             default:
4333                 goto unknown_op;
4334             }
4335             break;
4336 
4337         default:
4338         unknown_op:
4339             gen_unknown_opcode(env, s);
4340             return;
4341         }
4342     } else {
4343         /* generic MMX or SSE operation */
4344         switch(b) {
4345         case 0x70: /* pshufx insn */
4346         case 0xc6: /* pshufx insn */
4347         case 0xc2: /* compare insns */
4348             s->rip_offset = 1;
4349             break;
4350         default:
4351             break;
4352         }
4353         if (is_xmm) {
4354             op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
4355             if (mod != 3) {
4356                 int sz = 4;
4357 
4358                 gen_lea_modrm(env, s, modrm);
4359                 op2_offset = offsetof(CPUX86State,xmm_t0);
4360 
4361                 switch (b) {
4362                 case 0x50 ... 0x5a:
4363                 case 0x5c ... 0x5f:
4364                 case 0xc2:
4365                     /* Most sse scalar operations.  */
4366                     if (b1 == 2) {
4367                         sz = 2;
4368                     } else if (b1 == 3) {
4369                         sz = 3;
4370                     }
4371                     break;
4372 
4373                 case 0x2e:  /* ucomis[sd] */
4374                 case 0x2f:  /* comis[sd] */
4375                     if (b1 == 0) {
4376                         sz = 2;
4377                     } else {
4378                         sz = 3;
4379                     }
4380                     break;
4381                 }
4382 
4383                 switch (sz) {
4384                 case 2:
4385                     /* 32 bit access */
4386                     gen_op_ld_v(s, MO_32, s->T0, s->A0);
4387                     tcg_gen_st32_tl(s->T0, cpu_env,
4388                                     offsetof(CPUX86State,xmm_t0.ZMM_L(0)));
4389                     break;
4390                 case 3:
4391                     /* 64 bit access */
4392                     gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.ZMM_D(0)));
4393                     break;
4394                 default:
4395                     /* 128 bit access */
4396                     gen_ldo_env_A0(s, op2_offset);
4397                     break;
4398                 }
4399             } else {
4400                 rm = (modrm & 7) | REX_B(s);
4401                 op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
4402             }
4403         } else {
4404             op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
4405             if (mod != 3) {
4406                 gen_lea_modrm(env, s, modrm);
4407                 op2_offset = offsetof(CPUX86State,mmx_t0);
4408                 gen_ldq_env_A0(s, op2_offset);
4409             } else {
4410                 rm = (modrm & 7);
4411                 op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
4412             }
4413         }
4414         switch(b) {
4415         case 0x0f: /* 3DNow! data insns */
4416             val = x86_ldub_code(env, s);
4417             sse_fn_epp = sse_op_table5[val];
4418             if (!sse_fn_epp) {
4419                 goto unknown_op;
4420             }
4421             if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) {
4422                 goto illegal_op;
4423             }
4424             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4425             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4426             sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
4427             break;
4428         case 0x70: /* pshufx insn */
4429         case 0xc6: /* pshufx insn */
4430             val = x86_ldub_code(env, s);
4431             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4432             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4433             /* XXX: introduce a new table? */
4434             sse_fn_ppi = (SSEFunc_0_ppi)sse_fn_epp;
4435             sse_fn_ppi(s->ptr0, s->ptr1, tcg_const_i32(val));
4436             break;
4437         case 0xc2:
4438             /* compare insns */
4439             val = x86_ldub_code(env, s);
4440             if (val >= 8)
4441                 goto unknown_op;
4442             sse_fn_epp = sse_op_table4[val][b1];
4443 
4444             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4445             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4446             sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
4447             break;
4448         case 0xf7:
4449             /* maskmov : we must prepare A0 */
4450             if (mod != 3)
4451                 goto illegal_op;
4452             tcg_gen_mov_tl(s->A0, cpu_regs[R_EDI]);
4453             gen_extu(s->aflag, s->A0);
4454             gen_add_A0_ds_seg(s);
4455 
4456             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4457             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4458             /* XXX: introduce a new table? */
4459             sse_fn_eppt = (SSEFunc_0_eppt)sse_fn_epp;
4460             sse_fn_eppt(cpu_env, s->ptr0, s->ptr1, s->A0);
4461             break;
4462         default:
4463             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4464             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4465             sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
4466             break;
4467         }
4468         if (b == 0x2e || b == 0x2f) {
4469             set_cc_op(s, CC_OP_EFLAGS);
4470         }
4471     }
4472 }
4473 
4474 /* convert one instruction. s->base.is_jmp is set if the translation must
4475    be stopped. Return the next pc value */
4476 static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
4477 {
4478     CPUX86State *env = cpu->env_ptr;
4479     int b, prefixes;
4480     int shift;
4481     MemOp ot, aflag, dflag;
4482     int modrm, reg, rm, mod, op, opreg, val;
4483     target_ulong next_eip, tval;
4484     int rex_w, rex_r;
4485     target_ulong pc_start = s->base.pc_next;
4486 
4487     s->pc_start = s->pc = pc_start;
4488     s->override = -1;
4489 #ifdef TARGET_X86_64
4490     s->rex_x = 0;
4491     s->rex_b = 0;
4492     s->x86_64_hregs = false;
4493 #endif
4494     s->rip_offset = 0; /* for relative ip address */
4495     s->vex_l = 0;
4496     s->vex_v = 0;
4497     if (sigsetjmp(s->jmpbuf, 0) != 0) {
4498         gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
4499         return s->pc;
4500     }
4501 
4502     prefixes = 0;
4503     rex_w = -1;
4504     rex_r = 0;
4505 
4506  next_byte:
4507     b = x86_ldub_code(env, s);
4508     /* Collect prefixes.  */
4509     switch (b) {
4510     case 0xf3:
4511         prefixes |= PREFIX_REPZ;
4512         goto next_byte;
4513     case 0xf2:
4514         prefixes |= PREFIX_REPNZ;
4515         goto next_byte;
4516     case 0xf0:
4517         prefixes |= PREFIX_LOCK;
4518         goto next_byte;
4519     case 0x2e:
4520         s->override = R_CS;
4521         goto next_byte;
4522     case 0x36:
4523         s->override = R_SS;
4524         goto next_byte;
4525     case 0x3e:
4526         s->override = R_DS;
4527         goto next_byte;
4528     case 0x26:
4529         s->override = R_ES;
4530         goto next_byte;
4531     case 0x64:
4532         s->override = R_FS;
4533         goto next_byte;
4534     case 0x65:
4535         s->override = R_GS;
4536         goto next_byte;
4537     case 0x66:
4538         prefixes |= PREFIX_DATA;
4539         goto next_byte;
4540     case 0x67:
4541         prefixes |= PREFIX_ADR;
4542         goto next_byte;
4543 #ifdef TARGET_X86_64
4544     case 0x40 ... 0x4f:
4545         if (CODE64(s)) {
4546             /* REX prefix */
4547             rex_w = (b >> 3) & 1;
4548             rex_r = (b & 0x4) << 1;
4549             s->rex_x = (b & 0x2) << 2;
4550             REX_B(s) = (b & 0x1) << 3;
4551             /* select uniform byte register addressing */
4552             s->x86_64_hregs = true;
4553             goto next_byte;
4554         }
4555         break;
4556 #endif
4557     case 0xc5: /* 2-byte VEX */
4558     case 0xc4: /* 3-byte VEX */
4559         /* VEX prefixes cannot be used except in 32-bit mode.
4560            Otherwise the instruction is LES or LDS.  */
4561         if (s->code32 && !s->vm86) {
4562             static const int pp_prefix[4] = {
4563                 0, PREFIX_DATA, PREFIX_REPZ, PREFIX_REPNZ
4564             };
4565             int vex3, vex2 = x86_ldub_code(env, s);
4566 
4567             if (!CODE64(s) && (vex2 & 0xc0) != 0xc0) {
4568                 /* 4.1.4.6: In 32-bit mode, bits [7:6] must be 11b,
4569                    otherwise the instruction is LES or LDS.  */
4570                 s->pc--; /* rewind the advance_pc() x86_ldub_code() did */
4571                 break;
4572             }
4573 
4574             /* 4.1.1-4.1.3: No preceding lock, 66, f2, f3, or rex prefixes. */
4575             if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ
4576                             | PREFIX_LOCK | PREFIX_DATA)) {
4577                 goto illegal_op;
4578             }
4579 #ifdef TARGET_X86_64
4580             if (s->x86_64_hregs) {
4581                 goto illegal_op;
4582             }
4583 #endif
4584             rex_r = (~vex2 >> 4) & 8;
4585             if (b == 0xc5) {
4586                 /* 2-byte VEX prefix: RVVVVlpp, implied 0f leading opcode byte */
4587                 vex3 = vex2;
4588                 b = x86_ldub_code(env, s) | 0x100;
4589             } else {
4590                 /* 3-byte VEX prefix: RXBmmmmm wVVVVlpp */
4591 #ifdef TARGET_X86_64
4592                 s->rex_x = (~vex2 >> 3) & 8;
4593                 s->rex_b = (~vex2 >> 2) & 8;
4594 #endif
4595                 vex3 = x86_ldub_code(env, s);
4596                 rex_w = (vex3 >> 7) & 1;
4597                 switch (vex2 & 0x1f) {
4598                 case 0x01: /* Implied 0f leading opcode bytes.  */
4599                     b = x86_ldub_code(env, s) | 0x100;
4600                     break;
4601                 case 0x02: /* Implied 0f 38 leading opcode bytes.  */
4602                     b = 0x138;
4603                     break;
4604                 case 0x03: /* Implied 0f 3a leading opcode bytes.  */
4605                     b = 0x13a;
4606                     break;
4607                 default:   /* Reserved for future use.  */
4608                     goto unknown_op;
4609                 }
4610             }
4611             s->vex_v = (~vex3 >> 3) & 0xf;
4612             s->vex_l = (vex3 >> 2) & 1;
4613             prefixes |= pp_prefix[vex3 & 3] | PREFIX_VEX;
4614         }
4615         break;
4616     }
4617 
4618     /* Post-process prefixes.  */
4619     if (CODE64(s)) {
4620         /* In 64-bit mode, the default data size is 32-bit.  Select 64-bit
4621            data with rex_w, and 16-bit data with 0x66; rex_w takes precedence
4622            over 0x66 if both are present.  */
4623         dflag = (rex_w > 0 ? MO_64 : prefixes & PREFIX_DATA ? MO_16 : MO_32);
4624         /* In 64-bit mode, 0x67 selects 32-bit addressing.  */
4625         aflag = (prefixes & PREFIX_ADR ? MO_32 : MO_64);
4626     } else {
4627         /* In 16/32-bit mode, 0x66 selects the opposite data size.  */
4628         if (s->code32 ^ ((prefixes & PREFIX_DATA) != 0)) {
4629             dflag = MO_32;
4630         } else {
4631             dflag = MO_16;
4632         }
4633         /* In 16/32-bit mode, 0x67 selects the opposite addressing.  */
4634         if (s->code32 ^ ((prefixes & PREFIX_ADR) != 0)) {
4635             aflag = MO_32;
4636         }  else {
4637             aflag = MO_16;
4638         }
4639     }
4640 
4641     s->prefix = prefixes;
4642     s->aflag = aflag;
4643     s->dflag = dflag;
4644 
4645     /* now check op code */
4646  reswitch:
4647     switch(b) {
4648     case 0x0f:
4649         /**************************/
4650         /* extended op code */
4651         b = x86_ldub_code(env, s) | 0x100;
4652         goto reswitch;
4653 
4654         /**************************/
4655         /* arith & logic */
4656     case 0x00 ... 0x05:
4657     case 0x08 ... 0x0d:
4658     case 0x10 ... 0x15:
4659     case 0x18 ... 0x1d:
4660     case 0x20 ... 0x25:
4661     case 0x28 ... 0x2d:
4662     case 0x30 ... 0x35:
4663     case 0x38 ... 0x3d:
4664         {
4665             int op, f, val;
4666             op = (b >> 3) & 7;
4667             f = (b >> 1) & 3;
4668 
4669             ot = mo_b_d(b, dflag);
4670 
4671             switch(f) {
4672             case 0: /* OP Ev, Gv */
4673                 modrm = x86_ldub_code(env, s);
4674                 reg = ((modrm >> 3) & 7) | rex_r;
4675                 mod = (modrm >> 6) & 3;
4676                 rm = (modrm & 7) | REX_B(s);
4677                 if (mod != 3) {
4678                     gen_lea_modrm(env, s, modrm);
4679                     opreg = OR_TMP0;
4680                 } else if (op == OP_XORL && rm == reg) {
4681                 xor_zero:
4682                     /* xor reg, reg optimisation */
4683                     set_cc_op(s, CC_OP_CLR);
4684                     tcg_gen_movi_tl(s->T0, 0);
4685                     gen_op_mov_reg_v(s, ot, reg, s->T0);
4686                     break;
4687                 } else {
4688                     opreg = rm;
4689                 }
4690                 gen_op_mov_v_reg(s, ot, s->T1, reg);
4691                 gen_op(s, op, ot, opreg);
4692                 break;
4693             case 1: /* OP Gv, Ev */
4694                 modrm = x86_ldub_code(env, s);
4695                 mod = (modrm >> 6) & 3;
4696                 reg = ((modrm >> 3) & 7) | rex_r;
4697                 rm = (modrm & 7) | REX_B(s);
4698                 if (mod != 3) {
4699                     gen_lea_modrm(env, s, modrm);
4700                     gen_op_ld_v(s, ot, s->T1, s->A0);
4701                 } else if (op == OP_XORL && rm == reg) {
4702                     goto xor_zero;
4703                 } else {
4704                     gen_op_mov_v_reg(s, ot, s->T1, rm);
4705                 }
4706                 gen_op(s, op, ot, reg);
4707                 break;
4708             case 2: /* OP A, Iv */
4709                 val = insn_get(env, s, ot);
4710                 tcg_gen_movi_tl(s->T1, val);
4711                 gen_op(s, op, ot, OR_EAX);
4712                 break;
4713             }
4714         }
4715         break;
4716 
4717     case 0x82:
4718         if (CODE64(s))
4719             goto illegal_op;
4720         /* fall through */
4721     case 0x80: /* GRP1 */
4722     case 0x81:
4723     case 0x83:
4724         {
4725             int val;
4726 
4727             ot = mo_b_d(b, dflag);
4728 
4729             modrm = x86_ldub_code(env, s);
4730             mod = (modrm >> 6) & 3;
4731             rm = (modrm & 7) | REX_B(s);
4732             op = (modrm >> 3) & 7;
4733 
4734             if (mod != 3) {
4735                 if (b == 0x83)
4736                     s->rip_offset = 1;
4737                 else
4738                     s->rip_offset = insn_const_size(ot);
4739                 gen_lea_modrm(env, s, modrm);
4740                 opreg = OR_TMP0;
4741             } else {
4742                 opreg = rm;
4743             }
4744 
4745             switch(b) {
4746             default:
4747             case 0x80:
4748             case 0x81:
4749             case 0x82:
4750                 val = insn_get(env, s, ot);
4751                 break;
4752             case 0x83:
4753                 val = (int8_t)insn_get(env, s, MO_8);
4754                 break;
4755             }
4756             tcg_gen_movi_tl(s->T1, val);
4757             gen_op(s, op, ot, opreg);
4758         }
4759         break;
4760 
4761         /**************************/
4762         /* inc, dec, and other misc arith */
4763     case 0x40 ... 0x47: /* inc Gv */
4764         ot = dflag;
4765         gen_inc(s, ot, OR_EAX + (b & 7), 1);
4766         break;
4767     case 0x48 ... 0x4f: /* dec Gv */
4768         ot = dflag;
4769         gen_inc(s, ot, OR_EAX + (b & 7), -1);
4770         break;
4771     case 0xf6: /* GRP3 */
4772     case 0xf7:
4773         ot = mo_b_d(b, dflag);
4774 
4775         modrm = x86_ldub_code(env, s);
4776         mod = (modrm >> 6) & 3;
4777         rm = (modrm & 7) | REX_B(s);
4778         op = (modrm >> 3) & 7;
4779         if (mod != 3) {
4780             if (op == 0) {
4781                 s->rip_offset = insn_const_size(ot);
4782             }
4783             gen_lea_modrm(env, s, modrm);
4784             /* For those below that handle locked memory, don't load here.  */
4785             if (!(s->prefix & PREFIX_LOCK)
4786                 || op != 2) {
4787                 gen_op_ld_v(s, ot, s->T0, s->A0);
4788             }
4789         } else {
4790             gen_op_mov_v_reg(s, ot, s->T0, rm);
4791         }
4792 
4793         switch(op) {
4794         case 0: /* test */
4795             val = insn_get(env, s, ot);
4796             tcg_gen_movi_tl(s->T1, val);
4797             gen_op_testl_T0_T1_cc(s);
4798             set_cc_op(s, CC_OP_LOGICB + ot);
4799             break;
4800         case 2: /* not */
4801             if (s->prefix & PREFIX_LOCK) {
4802                 if (mod == 3) {
4803                     goto illegal_op;
4804                 }
4805                 tcg_gen_movi_tl(s->T0, ~0);
4806                 tcg_gen_atomic_xor_fetch_tl(s->T0, s->A0, s->T0,
4807                                             s->mem_index, ot | MO_LE);
4808             } else {
4809                 tcg_gen_not_tl(s->T0, s->T0);
4810                 if (mod != 3) {
4811                     gen_op_st_v(s, ot, s->T0, s->A0);
4812                 } else {
4813                     gen_op_mov_reg_v(s, ot, rm, s->T0);
4814                 }
4815             }
4816             break;
4817         case 3: /* neg */
4818             if (s->prefix & PREFIX_LOCK) {
4819                 TCGLabel *label1;
4820                 TCGv a0, t0, t1, t2;
4821 
4822                 if (mod == 3) {
4823                     goto illegal_op;
4824                 }
4825                 a0 = tcg_temp_local_new();
4826                 t0 = tcg_temp_local_new();
4827                 label1 = gen_new_label();
4828 
4829                 tcg_gen_mov_tl(a0, s->A0);
4830                 tcg_gen_mov_tl(t0, s->T0);
4831 
4832                 gen_set_label(label1);
4833                 t1 = tcg_temp_new();
4834                 t2 = tcg_temp_new();
4835                 tcg_gen_mov_tl(t2, t0);
4836                 tcg_gen_neg_tl(t1, t0);
4837                 tcg_gen_atomic_cmpxchg_tl(t0, a0, t0, t1,
4838                                           s->mem_index, ot | MO_LE);
4839                 tcg_temp_free(t1);
4840                 tcg_gen_brcond_tl(TCG_COND_NE, t0, t2, label1);
4841 
4842                 tcg_temp_free(t2);
4843                 tcg_temp_free(a0);
4844                 tcg_gen_mov_tl(s->T0, t0);
4845                 tcg_temp_free(t0);
4846             } else {
4847                 tcg_gen_neg_tl(s->T0, s->T0);
4848                 if (mod != 3) {
4849                     gen_op_st_v(s, ot, s->T0, s->A0);
4850                 } else {
4851                     gen_op_mov_reg_v(s, ot, rm, s->T0);
4852                 }
4853             }
4854             gen_op_update_neg_cc(s);
4855             set_cc_op(s, CC_OP_SUBB + ot);
4856             break;
4857         case 4: /* mul */
4858             switch(ot) {
4859             case MO_8:
4860                 gen_op_mov_v_reg(s, MO_8, s->T1, R_EAX);
4861                 tcg_gen_ext8u_tl(s->T0, s->T0);
4862                 tcg_gen_ext8u_tl(s->T1, s->T1);
4863                 /* XXX: use 32 bit mul which could be faster */
4864                 tcg_gen_mul_tl(s->T0, s->T0, s->T1);
4865                 gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
4866                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
4867                 tcg_gen_andi_tl(cpu_cc_src, s->T0, 0xff00);
4868                 set_cc_op(s, CC_OP_MULB);
4869                 break;
4870             case MO_16:
4871                 gen_op_mov_v_reg(s, MO_16, s->T1, R_EAX);
4872                 tcg_gen_ext16u_tl(s->T0, s->T0);
4873                 tcg_gen_ext16u_tl(s->T1, s->T1);
4874                 /* XXX: use 32 bit mul which could be faster */
4875                 tcg_gen_mul_tl(s->T0, s->T0, s->T1);
4876                 gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
4877                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
4878                 tcg_gen_shri_tl(s->T0, s->T0, 16);
4879                 gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0);
4880                 tcg_gen_mov_tl(cpu_cc_src, s->T0);
4881                 set_cc_op(s, CC_OP_MULW);
4882                 break;
4883             default:
4884             case MO_32:
4885                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
4886                 tcg_gen_trunc_tl_i32(s->tmp3_i32, cpu_regs[R_EAX]);
4887                 tcg_gen_mulu2_i32(s->tmp2_i32, s->tmp3_i32,
4888                                   s->tmp2_i32, s->tmp3_i32);
4889                 tcg_gen_extu_i32_tl(cpu_regs[R_EAX], s->tmp2_i32);
4890                 tcg_gen_extu_i32_tl(cpu_regs[R_EDX], s->tmp3_i32);
4891                 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4892                 tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]);
4893                 set_cc_op(s, CC_OP_MULL);
4894                 break;
4895 #ifdef TARGET_X86_64
4896             case MO_64:
4897                 tcg_gen_mulu2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
4898                                   s->T0, cpu_regs[R_EAX]);
4899                 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4900                 tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]);
4901                 set_cc_op(s, CC_OP_MULQ);
4902                 break;
4903 #endif
4904             }
4905             break;
4906         case 5: /* imul */
4907             switch(ot) {
4908             case MO_8:
4909                 gen_op_mov_v_reg(s, MO_8, s->T1, R_EAX);
4910                 tcg_gen_ext8s_tl(s->T0, s->T0);
4911                 tcg_gen_ext8s_tl(s->T1, s->T1);
4912                 /* XXX: use 32 bit mul which could be faster */
4913                 tcg_gen_mul_tl(s->T0, s->T0, s->T1);
4914                 gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
4915                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
4916                 tcg_gen_ext8s_tl(s->tmp0, s->T0);
4917                 tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
4918                 set_cc_op(s, CC_OP_MULB);
4919                 break;
4920             case MO_16:
4921                 gen_op_mov_v_reg(s, MO_16, s->T1, R_EAX);
4922                 tcg_gen_ext16s_tl(s->T0, s->T0);
4923                 tcg_gen_ext16s_tl(s->T1, s->T1);
4924                 /* XXX: use 32 bit mul which could be faster */
4925                 tcg_gen_mul_tl(s->T0, s->T0, s->T1);
4926                 gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
4927                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
4928                 tcg_gen_ext16s_tl(s->tmp0, s->T0);
4929                 tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
4930                 tcg_gen_shri_tl(s->T0, s->T0, 16);
4931                 gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0);
4932                 set_cc_op(s, CC_OP_MULW);
4933                 break;
4934             default:
4935             case MO_32:
4936                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
4937                 tcg_gen_trunc_tl_i32(s->tmp3_i32, cpu_regs[R_EAX]);
4938                 tcg_gen_muls2_i32(s->tmp2_i32, s->tmp3_i32,
4939                                   s->tmp2_i32, s->tmp3_i32);
4940                 tcg_gen_extu_i32_tl(cpu_regs[R_EAX], s->tmp2_i32);
4941                 tcg_gen_extu_i32_tl(cpu_regs[R_EDX], s->tmp3_i32);
4942                 tcg_gen_sari_i32(s->tmp2_i32, s->tmp2_i32, 31);
4943                 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4944                 tcg_gen_sub_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
4945                 tcg_gen_extu_i32_tl(cpu_cc_src, s->tmp2_i32);
4946                 set_cc_op(s, CC_OP_MULL);
4947                 break;
4948 #ifdef TARGET_X86_64
4949             case MO_64:
4950                 tcg_gen_muls2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
4951                                   s->T0, cpu_regs[R_EAX]);
4952                 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4953                 tcg_gen_sari_tl(cpu_cc_src, cpu_regs[R_EAX], 63);
4954                 tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, cpu_regs[R_EDX]);
4955                 set_cc_op(s, CC_OP_MULQ);
4956                 break;
4957 #endif
4958             }
4959             break;
4960         case 6: /* div */
4961             switch(ot) {
4962             case MO_8:
4963                 gen_helper_divb_AL(cpu_env, s->T0);
4964                 break;
4965             case MO_16:
4966                 gen_helper_divw_AX(cpu_env, s->T0);
4967                 break;
4968             default:
4969             case MO_32:
4970                 gen_helper_divl_EAX(cpu_env, s->T0);
4971                 break;
4972 #ifdef TARGET_X86_64
4973             case MO_64:
4974                 gen_helper_divq_EAX(cpu_env, s->T0);
4975                 break;
4976 #endif
4977             }
4978             break;
4979         case 7: /* idiv */
4980             switch(ot) {
4981             case MO_8:
4982                 gen_helper_idivb_AL(cpu_env, s->T0);
4983                 break;
4984             case MO_16:
4985                 gen_helper_idivw_AX(cpu_env, s->T0);
4986                 break;
4987             default:
4988             case MO_32:
4989                 gen_helper_idivl_EAX(cpu_env, s->T0);
4990                 break;
4991 #ifdef TARGET_X86_64
4992             case MO_64:
4993                 gen_helper_idivq_EAX(cpu_env, s->T0);
4994                 break;
4995 #endif
4996             }
4997             break;
4998         default:
4999             goto unknown_op;
5000         }
5001         break;
5002 
5003     case 0xfe: /* GRP4 */
5004     case 0xff: /* GRP5 */
5005         ot = mo_b_d(b, dflag);
5006 
5007         modrm = x86_ldub_code(env, s);
5008         mod = (modrm >> 6) & 3;
5009         rm = (modrm & 7) | REX_B(s);
5010         op = (modrm >> 3) & 7;
5011         if (op >= 2 && b == 0xfe) {
5012             goto unknown_op;
5013         }
5014         if (CODE64(s)) {
5015             if (op == 2 || op == 4) {
5016                 /* operand size for jumps is 64 bit */
5017                 ot = MO_64;
5018             } else if (op == 3 || op == 5) {
5019                 ot = dflag != MO_16 ? MO_32 + (rex_w == 1) : MO_16;
5020             } else if (op == 6) {
5021                 /* default push size is 64 bit */
5022                 ot = mo_pushpop(s, dflag);
5023             }
5024         }
5025         if (mod != 3) {
5026             gen_lea_modrm(env, s, modrm);
5027             if (op >= 2 && op != 3 && op != 5)
5028                 gen_op_ld_v(s, ot, s->T0, s->A0);
5029         } else {
5030             gen_op_mov_v_reg(s, ot, s->T0, rm);
5031         }
5032 
5033         switch(op) {
5034         case 0: /* inc Ev */
5035             if (mod != 3)
5036                 opreg = OR_TMP0;
5037             else
5038                 opreg = rm;
5039             gen_inc(s, ot, opreg, 1);
5040             break;
5041         case 1: /* dec Ev */
5042             if (mod != 3)
5043                 opreg = OR_TMP0;
5044             else
5045                 opreg = rm;
5046             gen_inc(s, ot, opreg, -1);
5047             break;
5048         case 2: /* call Ev */
5049             /* XXX: optimize if memory (no 'and' is necessary) */
5050             if (dflag == MO_16) {
5051                 tcg_gen_ext16u_tl(s->T0, s->T0);
5052             }
5053             next_eip = s->pc - s->cs_base;
5054             tcg_gen_movi_tl(s->T1, next_eip);
5055             gen_push_v(s, s->T1);
5056             gen_op_jmp_v(s->T0);
5057             gen_bnd_jmp(s);
5058             gen_jr(s, s->T0);
5059             break;
5060         case 3: /* lcall Ev */
5061             gen_op_ld_v(s, ot, s->T1, s->A0);
5062             gen_add_A0_im(s, 1 << ot);
5063             gen_op_ld_v(s, MO_16, s->T0, s->A0);
5064         do_lcall:
5065             if (s->pe && !s->vm86) {
5066                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5067                 gen_helper_lcall_protected(cpu_env, s->tmp2_i32, s->T1,
5068                                            tcg_const_i32(dflag - 1),
5069                                            tcg_const_tl(s->pc - s->cs_base));
5070             } else {
5071                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5072                 gen_helper_lcall_real(cpu_env, s->tmp2_i32, s->T1,
5073                                       tcg_const_i32(dflag - 1),
5074                                       tcg_const_i32(s->pc - s->cs_base));
5075             }
5076             tcg_gen_ld_tl(s->tmp4, cpu_env, offsetof(CPUX86State, eip));
5077             gen_jr(s, s->tmp4);
5078             break;
5079         case 4: /* jmp Ev */
5080             if (dflag == MO_16) {
5081                 tcg_gen_ext16u_tl(s->T0, s->T0);
5082             }
5083             gen_op_jmp_v(s->T0);
5084             gen_bnd_jmp(s);
5085             gen_jr(s, s->T0);
5086             break;
5087         case 5: /* ljmp Ev */
5088             gen_op_ld_v(s, ot, s->T1, s->A0);
5089             gen_add_A0_im(s, 1 << ot);
5090             gen_op_ld_v(s, MO_16, s->T0, s->A0);
5091         do_ljmp:
5092             if (s->pe && !s->vm86) {
5093                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5094                 gen_helper_ljmp_protected(cpu_env, s->tmp2_i32, s->T1,
5095                                           tcg_const_tl(s->pc - s->cs_base));
5096             } else {
5097                 gen_op_movl_seg_T0_vm(s, R_CS);
5098                 gen_op_jmp_v(s->T1);
5099             }
5100             tcg_gen_ld_tl(s->tmp4, cpu_env, offsetof(CPUX86State, eip));
5101             gen_jr(s, s->tmp4);
5102             break;
5103         case 6: /* push Ev */
5104             gen_push_v(s, s->T0);
5105             break;
5106         default:
5107             goto unknown_op;
5108         }
5109         break;
5110 
5111     case 0x84: /* test Ev, Gv */
5112     case 0x85:
5113         ot = mo_b_d(b, dflag);
5114 
5115         modrm = x86_ldub_code(env, s);
5116         reg = ((modrm >> 3) & 7) | rex_r;
5117 
5118         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
5119         gen_op_mov_v_reg(s, ot, s->T1, reg);
5120         gen_op_testl_T0_T1_cc(s);
5121         set_cc_op(s, CC_OP_LOGICB + ot);
5122         break;
5123 
5124     case 0xa8: /* test eAX, Iv */
5125     case 0xa9:
5126         ot = mo_b_d(b, dflag);
5127         val = insn_get(env, s, ot);
5128 
5129         gen_op_mov_v_reg(s, ot, s->T0, OR_EAX);
5130         tcg_gen_movi_tl(s->T1, val);
5131         gen_op_testl_T0_T1_cc(s);
5132         set_cc_op(s, CC_OP_LOGICB + ot);
5133         break;
5134 
5135     case 0x98: /* CWDE/CBW */
5136         switch (dflag) {
5137 #ifdef TARGET_X86_64
5138         case MO_64:
5139             gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
5140             tcg_gen_ext32s_tl(s->T0, s->T0);
5141             gen_op_mov_reg_v(s, MO_64, R_EAX, s->T0);
5142             break;
5143 #endif
5144         case MO_32:
5145             gen_op_mov_v_reg(s, MO_16, s->T0, R_EAX);
5146             tcg_gen_ext16s_tl(s->T0, s->T0);
5147             gen_op_mov_reg_v(s, MO_32, R_EAX, s->T0);
5148             break;
5149         case MO_16:
5150             gen_op_mov_v_reg(s, MO_8, s->T0, R_EAX);
5151             tcg_gen_ext8s_tl(s->T0, s->T0);
5152             gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
5153             break;
5154         default:
5155             tcg_abort();
5156         }
5157         break;
5158     case 0x99: /* CDQ/CWD */
5159         switch (dflag) {
5160 #ifdef TARGET_X86_64
5161         case MO_64:
5162             gen_op_mov_v_reg(s, MO_64, s->T0, R_EAX);
5163             tcg_gen_sari_tl(s->T0, s->T0, 63);
5164             gen_op_mov_reg_v(s, MO_64, R_EDX, s->T0);
5165             break;
5166 #endif
5167         case MO_32:
5168             gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
5169             tcg_gen_ext32s_tl(s->T0, s->T0);
5170             tcg_gen_sari_tl(s->T0, s->T0, 31);
5171             gen_op_mov_reg_v(s, MO_32, R_EDX, s->T0);
5172             break;
5173         case MO_16:
5174             gen_op_mov_v_reg(s, MO_16, s->T0, R_EAX);
5175             tcg_gen_ext16s_tl(s->T0, s->T0);
5176             tcg_gen_sari_tl(s->T0, s->T0, 15);
5177             gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0);
5178             break;
5179         default:
5180             tcg_abort();
5181         }
5182         break;
5183     case 0x1af: /* imul Gv, Ev */
5184     case 0x69: /* imul Gv, Ev, I */
5185     case 0x6b:
5186         ot = dflag;
5187         modrm = x86_ldub_code(env, s);
5188         reg = ((modrm >> 3) & 7) | rex_r;
5189         if (b == 0x69)
5190             s->rip_offset = insn_const_size(ot);
5191         else if (b == 0x6b)
5192             s->rip_offset = 1;
5193         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
5194         if (b == 0x69) {
5195             val = insn_get(env, s, ot);
5196             tcg_gen_movi_tl(s->T1, val);
5197         } else if (b == 0x6b) {
5198             val = (int8_t)insn_get(env, s, MO_8);
5199             tcg_gen_movi_tl(s->T1, val);
5200         } else {
5201             gen_op_mov_v_reg(s, ot, s->T1, reg);
5202         }
5203         switch (ot) {
5204 #ifdef TARGET_X86_64
5205         case MO_64:
5206             tcg_gen_muls2_i64(cpu_regs[reg], s->T1, s->T0, s->T1);
5207             tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
5208             tcg_gen_sari_tl(cpu_cc_src, cpu_cc_dst, 63);
5209             tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, s->T1);
5210             break;
5211 #endif
5212         case MO_32:
5213             tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5214             tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
5215             tcg_gen_muls2_i32(s->tmp2_i32, s->tmp3_i32,
5216                               s->tmp2_i32, s->tmp3_i32);
5217             tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
5218             tcg_gen_sari_i32(s->tmp2_i32, s->tmp2_i32, 31);
5219             tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
5220             tcg_gen_sub_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
5221             tcg_gen_extu_i32_tl(cpu_cc_src, s->tmp2_i32);
5222             break;
5223         default:
5224             tcg_gen_ext16s_tl(s->T0, s->T0);
5225             tcg_gen_ext16s_tl(s->T1, s->T1);
5226             /* XXX: use 32 bit mul which could be faster */
5227             tcg_gen_mul_tl(s->T0, s->T0, s->T1);
5228             tcg_gen_mov_tl(cpu_cc_dst, s->T0);
5229             tcg_gen_ext16s_tl(s->tmp0, s->T0);
5230             tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
5231             gen_op_mov_reg_v(s, ot, reg, s->T0);
5232             break;
5233         }
5234         set_cc_op(s, CC_OP_MULB + ot);
5235         break;
5236     case 0x1c0:
5237     case 0x1c1: /* xadd Ev, Gv */
5238         ot = mo_b_d(b, dflag);
5239         modrm = x86_ldub_code(env, s);
5240         reg = ((modrm >> 3) & 7) | rex_r;
5241         mod = (modrm >> 6) & 3;
5242         gen_op_mov_v_reg(s, ot, s->T0, reg);
5243         if (mod == 3) {
5244             rm = (modrm & 7) | REX_B(s);
5245             gen_op_mov_v_reg(s, ot, s->T1, rm);
5246             tcg_gen_add_tl(s->T0, s->T0, s->T1);
5247             gen_op_mov_reg_v(s, ot, reg, s->T1);
5248             gen_op_mov_reg_v(s, ot, rm, s->T0);
5249         } else {
5250             gen_lea_modrm(env, s, modrm);
5251             if (s->prefix & PREFIX_LOCK) {
5252                 tcg_gen_atomic_fetch_add_tl(s->T1, s->A0, s->T0,
5253                                             s->mem_index, ot | MO_LE);
5254                 tcg_gen_add_tl(s->T0, s->T0, s->T1);
5255             } else {
5256                 gen_op_ld_v(s, ot, s->T1, s->A0);
5257                 tcg_gen_add_tl(s->T0, s->T0, s->T1);
5258                 gen_op_st_v(s, ot, s->T0, s->A0);
5259             }
5260             gen_op_mov_reg_v(s, ot, reg, s->T1);
5261         }
5262         gen_op_update2_cc(s);
5263         set_cc_op(s, CC_OP_ADDB + ot);
5264         break;
5265     case 0x1b0:
5266     case 0x1b1: /* cmpxchg Ev, Gv */
5267         {
5268             TCGv oldv, newv, cmpv;
5269 
5270             ot = mo_b_d(b, dflag);
5271             modrm = x86_ldub_code(env, s);
5272             reg = ((modrm >> 3) & 7) | rex_r;
5273             mod = (modrm >> 6) & 3;
5274             oldv = tcg_temp_new();
5275             newv = tcg_temp_new();
5276             cmpv = tcg_temp_new();
5277             gen_op_mov_v_reg(s, ot, newv, reg);
5278             tcg_gen_mov_tl(cmpv, cpu_regs[R_EAX]);
5279 
5280             if (s->prefix & PREFIX_LOCK) {
5281                 if (mod == 3) {
5282                     goto illegal_op;
5283                 }
5284                 gen_lea_modrm(env, s, modrm);
5285                 tcg_gen_atomic_cmpxchg_tl(oldv, s->A0, cmpv, newv,
5286                                           s->mem_index, ot | MO_LE);
5287                 gen_op_mov_reg_v(s, ot, R_EAX, oldv);
5288             } else {
5289                 if (mod == 3) {
5290                     rm = (modrm & 7) | REX_B(s);
5291                     gen_op_mov_v_reg(s, ot, oldv, rm);
5292                 } else {
5293                     gen_lea_modrm(env, s, modrm);
5294                     gen_op_ld_v(s, ot, oldv, s->A0);
5295                     rm = 0; /* avoid warning */
5296                 }
5297                 gen_extu(ot, oldv);
5298                 gen_extu(ot, cmpv);
5299                 /* store value = (old == cmp ? new : old);  */
5300                 tcg_gen_movcond_tl(TCG_COND_EQ, newv, oldv, cmpv, newv, oldv);
5301                 if (mod == 3) {
5302                     gen_op_mov_reg_v(s, ot, R_EAX, oldv);
5303                     gen_op_mov_reg_v(s, ot, rm, newv);
5304                 } else {
5305                     /* Perform an unconditional store cycle like physical cpu;
5306                        must be before changing accumulator to ensure
5307                        idempotency if the store faults and the instruction
5308                        is restarted */
5309                     gen_op_st_v(s, ot, newv, s->A0);
5310                     gen_op_mov_reg_v(s, ot, R_EAX, oldv);
5311                 }
5312             }
5313             tcg_gen_mov_tl(cpu_cc_src, oldv);
5314             tcg_gen_mov_tl(s->cc_srcT, cmpv);
5315             tcg_gen_sub_tl(cpu_cc_dst, cmpv, oldv);
5316             set_cc_op(s, CC_OP_SUBB + ot);
5317             tcg_temp_free(oldv);
5318             tcg_temp_free(newv);
5319             tcg_temp_free(cmpv);
5320         }
5321         break;
5322     case 0x1c7: /* cmpxchg8b */
5323         modrm = x86_ldub_code(env, s);
5324         mod = (modrm >> 6) & 3;
5325         switch ((modrm >> 3) & 7) {
5326         case 1: /* CMPXCHG8, CMPXCHG16 */
5327             if (mod == 3) {
5328                 goto illegal_op;
5329             }
5330 #ifdef TARGET_X86_64
5331             if (dflag == MO_64) {
5332                 if (!(s->cpuid_ext_features & CPUID_EXT_CX16)) {
5333                     goto illegal_op;
5334                 }
5335                 gen_lea_modrm(env, s, modrm);
5336                 if ((s->prefix & PREFIX_LOCK) &&
5337                     (tb_cflags(s->base.tb) & CF_PARALLEL)) {
5338                     gen_helper_cmpxchg16b(cpu_env, s->A0);
5339                 } else {
5340                     gen_helper_cmpxchg16b_unlocked(cpu_env, s->A0);
5341                 }
5342                 set_cc_op(s, CC_OP_EFLAGS);
5343                 break;
5344             }
5345 #endif
5346             if (!(s->cpuid_features & CPUID_CX8)) {
5347                 goto illegal_op;
5348             }
5349             gen_lea_modrm(env, s, modrm);
5350             if ((s->prefix & PREFIX_LOCK) &&
5351                 (tb_cflags(s->base.tb) & CF_PARALLEL)) {
5352                 gen_helper_cmpxchg8b(cpu_env, s->A0);
5353             } else {
5354                 gen_helper_cmpxchg8b_unlocked(cpu_env, s->A0);
5355             }
5356             set_cc_op(s, CC_OP_EFLAGS);
5357             break;
5358 
5359         case 7: /* RDSEED */
5360         case 6: /* RDRAND */
5361             if (mod != 3 ||
5362                 (s->prefix & (PREFIX_LOCK | PREFIX_REPZ | PREFIX_REPNZ)) ||
5363                 !(s->cpuid_ext_features & CPUID_EXT_RDRAND)) {
5364                 goto illegal_op;
5365             }
5366             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
5367                 gen_io_start();
5368             }
5369             gen_helper_rdrand(s->T0, cpu_env);
5370             rm = (modrm & 7) | REX_B(s);
5371             gen_op_mov_reg_v(s, dflag, rm, s->T0);
5372             set_cc_op(s, CC_OP_EFLAGS);
5373             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
5374                 gen_jmp(s, s->pc - s->cs_base);
5375             }
5376             break;
5377 
5378         default:
5379             goto illegal_op;
5380         }
5381         break;
5382 
5383         /**************************/
5384         /* push/pop */
5385     case 0x50 ... 0x57: /* push */
5386         gen_op_mov_v_reg(s, MO_32, s->T0, (b & 7) | REX_B(s));
5387         gen_push_v(s, s->T0);
5388         break;
5389     case 0x58 ... 0x5f: /* pop */
5390         ot = gen_pop_T0(s);
5391         /* NOTE: order is important for pop %sp */
5392         gen_pop_update(s, ot);
5393         gen_op_mov_reg_v(s, ot, (b & 7) | REX_B(s), s->T0);
5394         break;
5395     case 0x60: /* pusha */
5396         if (CODE64(s))
5397             goto illegal_op;
5398         gen_pusha(s);
5399         break;
5400     case 0x61: /* popa */
5401         if (CODE64(s))
5402             goto illegal_op;
5403         gen_popa(s);
5404         break;
5405     case 0x68: /* push Iv */
5406     case 0x6a:
5407         ot = mo_pushpop(s, dflag);
5408         if (b == 0x68)
5409             val = insn_get(env, s, ot);
5410         else
5411             val = (int8_t)insn_get(env, s, MO_8);
5412         tcg_gen_movi_tl(s->T0, val);
5413         gen_push_v(s, s->T0);
5414         break;
5415     case 0x8f: /* pop Ev */
5416         modrm = x86_ldub_code(env, s);
5417         mod = (modrm >> 6) & 3;
5418         ot = gen_pop_T0(s);
5419         if (mod == 3) {
5420             /* NOTE: order is important for pop %sp */
5421             gen_pop_update(s, ot);
5422             rm = (modrm & 7) | REX_B(s);
5423             gen_op_mov_reg_v(s, ot, rm, s->T0);
5424         } else {
5425             /* NOTE: order is important too for MMU exceptions */
5426             s->popl_esp_hack = 1 << ot;
5427             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
5428             s->popl_esp_hack = 0;
5429             gen_pop_update(s, ot);
5430         }
5431         break;
5432     case 0xc8: /* enter */
5433         {
5434             int level;
5435             val = x86_lduw_code(env, s);
5436             level = x86_ldub_code(env, s);
5437             gen_enter(s, val, level);
5438         }
5439         break;
5440     case 0xc9: /* leave */
5441         gen_leave(s);
5442         break;
5443     case 0x06: /* push es */
5444     case 0x0e: /* push cs */
5445     case 0x16: /* push ss */
5446     case 0x1e: /* push ds */
5447         if (CODE64(s))
5448             goto illegal_op;
5449         gen_op_movl_T0_seg(s, b >> 3);
5450         gen_push_v(s, s->T0);
5451         break;
5452     case 0x1a0: /* push fs */
5453     case 0x1a8: /* push gs */
5454         gen_op_movl_T0_seg(s, (b >> 3) & 7);
5455         gen_push_v(s, s->T0);
5456         break;
5457     case 0x07: /* pop es */
5458     case 0x17: /* pop ss */
5459     case 0x1f: /* pop ds */
5460         if (CODE64(s))
5461             goto illegal_op;
5462         reg = b >> 3;
5463         ot = gen_pop_T0(s);
5464         gen_movl_seg_T0(s, reg);
5465         gen_pop_update(s, ot);
5466         /* Note that reg == R_SS in gen_movl_seg_T0 always sets is_jmp.  */
5467         if (s->base.is_jmp) {
5468             gen_jmp_im(s, s->pc - s->cs_base);
5469             if (reg == R_SS) {
5470                 s->tf = 0;
5471                 gen_eob_inhibit_irq(s, true);
5472             } else {
5473                 gen_eob(s);
5474             }
5475         }
5476         break;
5477     case 0x1a1: /* pop fs */
5478     case 0x1a9: /* pop gs */
5479         ot = gen_pop_T0(s);
5480         gen_movl_seg_T0(s, (b >> 3) & 7);
5481         gen_pop_update(s, ot);
5482         if (s->base.is_jmp) {
5483             gen_jmp_im(s, s->pc - s->cs_base);
5484             gen_eob(s);
5485         }
5486         break;
5487 
5488         /**************************/
5489         /* mov */
5490     case 0x88:
5491     case 0x89: /* mov Gv, Ev */
5492         ot = mo_b_d(b, dflag);
5493         modrm = x86_ldub_code(env, s);
5494         reg = ((modrm >> 3) & 7) | rex_r;
5495 
5496         /* generate a generic store */
5497         gen_ldst_modrm(env, s, modrm, ot, reg, 1);
5498         break;
5499     case 0xc6:
5500     case 0xc7: /* mov Ev, Iv */
5501         ot = mo_b_d(b, dflag);
5502         modrm = x86_ldub_code(env, s);
5503         mod = (modrm >> 6) & 3;
5504         if (mod != 3) {
5505             s->rip_offset = insn_const_size(ot);
5506             gen_lea_modrm(env, s, modrm);
5507         }
5508         val = insn_get(env, s, ot);
5509         tcg_gen_movi_tl(s->T0, val);
5510         if (mod != 3) {
5511             gen_op_st_v(s, ot, s->T0, s->A0);
5512         } else {
5513             gen_op_mov_reg_v(s, ot, (modrm & 7) | REX_B(s), s->T0);
5514         }
5515         break;
5516     case 0x8a:
5517     case 0x8b: /* mov Ev, Gv */
5518         ot = mo_b_d(b, dflag);
5519         modrm = x86_ldub_code(env, s);
5520         reg = ((modrm >> 3) & 7) | rex_r;
5521 
5522         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
5523         gen_op_mov_reg_v(s, ot, reg, s->T0);
5524         break;
5525     case 0x8e: /* mov seg, Gv */
5526         modrm = x86_ldub_code(env, s);
5527         reg = (modrm >> 3) & 7;
5528         if (reg >= 6 || reg == R_CS)
5529             goto illegal_op;
5530         gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
5531         gen_movl_seg_T0(s, reg);
5532         /* Note that reg == R_SS in gen_movl_seg_T0 always sets is_jmp.  */
5533         if (s->base.is_jmp) {
5534             gen_jmp_im(s, s->pc - s->cs_base);
5535             if (reg == R_SS) {
5536                 s->tf = 0;
5537                 gen_eob_inhibit_irq(s, true);
5538             } else {
5539                 gen_eob(s);
5540             }
5541         }
5542         break;
5543     case 0x8c: /* mov Gv, seg */
5544         modrm = x86_ldub_code(env, s);
5545         reg = (modrm >> 3) & 7;
5546         mod = (modrm >> 6) & 3;
5547         if (reg >= 6)
5548             goto illegal_op;
5549         gen_op_movl_T0_seg(s, reg);
5550         ot = mod == 3 ? dflag : MO_16;
5551         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
5552         break;
5553 
5554     case 0x1b6: /* movzbS Gv, Eb */
5555     case 0x1b7: /* movzwS Gv, Eb */
5556     case 0x1be: /* movsbS Gv, Eb */
5557     case 0x1bf: /* movswS Gv, Eb */
5558         {
5559             MemOp d_ot;
5560             MemOp s_ot;
5561 
5562             /* d_ot is the size of destination */
5563             d_ot = dflag;
5564             /* ot is the size of source */
5565             ot = (b & 1) + MO_8;
5566             /* s_ot is the sign+size of source */
5567             s_ot = b & 8 ? MO_SIGN | ot : ot;
5568 
5569             modrm = x86_ldub_code(env, s);
5570             reg = ((modrm >> 3) & 7) | rex_r;
5571             mod = (modrm >> 6) & 3;
5572             rm = (modrm & 7) | REX_B(s);
5573 
5574             if (mod == 3) {
5575                 if (s_ot == MO_SB && byte_reg_is_xH(s, rm)) {
5576                     tcg_gen_sextract_tl(s->T0, cpu_regs[rm - 4], 8, 8);
5577                 } else {
5578                     gen_op_mov_v_reg(s, ot, s->T0, rm);
5579                     switch (s_ot) {
5580                     case MO_UB:
5581                         tcg_gen_ext8u_tl(s->T0, s->T0);
5582                         break;
5583                     case MO_SB:
5584                         tcg_gen_ext8s_tl(s->T0, s->T0);
5585                         break;
5586                     case MO_UW:
5587                         tcg_gen_ext16u_tl(s->T0, s->T0);
5588                         break;
5589                     default:
5590                     case MO_SW:
5591                         tcg_gen_ext16s_tl(s->T0, s->T0);
5592                         break;
5593                     }
5594                 }
5595                 gen_op_mov_reg_v(s, d_ot, reg, s->T0);
5596             } else {
5597                 gen_lea_modrm(env, s, modrm);
5598                 gen_op_ld_v(s, s_ot, s->T0, s->A0);
5599                 gen_op_mov_reg_v(s, d_ot, reg, s->T0);
5600             }
5601         }
5602         break;
5603 
5604     case 0x8d: /* lea */
5605         modrm = x86_ldub_code(env, s);
5606         mod = (modrm >> 6) & 3;
5607         if (mod == 3)
5608             goto illegal_op;
5609         reg = ((modrm >> 3) & 7) | rex_r;
5610         {
5611             AddressParts a = gen_lea_modrm_0(env, s, modrm);
5612             TCGv ea = gen_lea_modrm_1(s, a);
5613             gen_lea_v_seg(s, s->aflag, ea, -1, -1);
5614             gen_op_mov_reg_v(s, dflag, reg, s->A0);
5615         }
5616         break;
5617 
5618     case 0xa0: /* mov EAX, Ov */
5619     case 0xa1:
5620     case 0xa2: /* mov Ov, EAX */
5621     case 0xa3:
5622         {
5623             target_ulong offset_addr;
5624 
5625             ot = mo_b_d(b, dflag);
5626             switch (s->aflag) {
5627 #ifdef TARGET_X86_64
5628             case MO_64:
5629                 offset_addr = x86_ldq_code(env, s);
5630                 break;
5631 #endif
5632             default:
5633                 offset_addr = insn_get(env, s, s->aflag);
5634                 break;
5635             }
5636             tcg_gen_movi_tl(s->A0, offset_addr);
5637             gen_add_A0_ds_seg(s);
5638             if ((b & 2) == 0) {
5639                 gen_op_ld_v(s, ot, s->T0, s->A0);
5640                 gen_op_mov_reg_v(s, ot, R_EAX, s->T0);
5641             } else {
5642                 gen_op_mov_v_reg(s, ot, s->T0, R_EAX);
5643                 gen_op_st_v(s, ot, s->T0, s->A0);
5644             }
5645         }
5646         break;
5647     case 0xd7: /* xlat */
5648         tcg_gen_mov_tl(s->A0, cpu_regs[R_EBX]);
5649         tcg_gen_ext8u_tl(s->T0, cpu_regs[R_EAX]);
5650         tcg_gen_add_tl(s->A0, s->A0, s->T0);
5651         gen_extu(s->aflag, s->A0);
5652         gen_add_A0_ds_seg(s);
5653         gen_op_ld_v(s, MO_8, s->T0, s->A0);
5654         gen_op_mov_reg_v(s, MO_8, R_EAX, s->T0);
5655         break;
5656     case 0xb0 ... 0xb7: /* mov R, Ib */
5657         val = insn_get(env, s, MO_8);
5658         tcg_gen_movi_tl(s->T0, val);
5659         gen_op_mov_reg_v(s, MO_8, (b & 7) | REX_B(s), s->T0);
5660         break;
5661     case 0xb8 ... 0xbf: /* mov R, Iv */
5662 #ifdef TARGET_X86_64
5663         if (dflag == MO_64) {
5664             uint64_t tmp;
5665             /* 64 bit case */
5666             tmp = x86_ldq_code(env, s);
5667             reg = (b & 7) | REX_B(s);
5668             tcg_gen_movi_tl(s->T0, tmp);
5669             gen_op_mov_reg_v(s, MO_64, reg, s->T0);
5670         } else
5671 #endif
5672         {
5673             ot = dflag;
5674             val = insn_get(env, s, ot);
5675             reg = (b & 7) | REX_B(s);
5676             tcg_gen_movi_tl(s->T0, val);
5677             gen_op_mov_reg_v(s, ot, reg, s->T0);
5678         }
5679         break;
5680 
5681     case 0x91 ... 0x97: /* xchg R, EAX */
5682     do_xchg_reg_eax:
5683         ot = dflag;
5684         reg = (b & 7) | REX_B(s);
5685         rm = R_EAX;
5686         goto do_xchg_reg;
5687     case 0x86:
5688     case 0x87: /* xchg Ev, Gv */
5689         ot = mo_b_d(b, dflag);
5690         modrm = x86_ldub_code(env, s);
5691         reg = ((modrm >> 3) & 7) | rex_r;
5692         mod = (modrm >> 6) & 3;
5693         if (mod == 3) {
5694             rm = (modrm & 7) | REX_B(s);
5695         do_xchg_reg:
5696             gen_op_mov_v_reg(s, ot, s->T0, reg);
5697             gen_op_mov_v_reg(s, ot, s->T1, rm);
5698             gen_op_mov_reg_v(s, ot, rm, s->T0);
5699             gen_op_mov_reg_v(s, ot, reg, s->T1);
5700         } else {
5701             gen_lea_modrm(env, s, modrm);
5702             gen_op_mov_v_reg(s, ot, s->T0, reg);
5703             /* for xchg, lock is implicit */
5704             tcg_gen_atomic_xchg_tl(s->T1, s->A0, s->T0,
5705                                    s->mem_index, ot | MO_LE);
5706             gen_op_mov_reg_v(s, ot, reg, s->T1);
5707         }
5708         break;
5709     case 0xc4: /* les Gv */
5710         /* In CODE64 this is VEX3; see above.  */
5711         op = R_ES;
5712         goto do_lxx;
5713     case 0xc5: /* lds Gv */
5714         /* In CODE64 this is VEX2; see above.  */
5715         op = R_DS;
5716         goto do_lxx;
5717     case 0x1b2: /* lss Gv */
5718         op = R_SS;
5719         goto do_lxx;
5720     case 0x1b4: /* lfs Gv */
5721         op = R_FS;
5722         goto do_lxx;
5723     case 0x1b5: /* lgs Gv */
5724         op = R_GS;
5725     do_lxx:
5726         ot = dflag != MO_16 ? MO_32 : MO_16;
5727         modrm = x86_ldub_code(env, s);
5728         reg = ((modrm >> 3) & 7) | rex_r;
5729         mod = (modrm >> 6) & 3;
5730         if (mod == 3)
5731             goto illegal_op;
5732         gen_lea_modrm(env, s, modrm);
5733         gen_op_ld_v(s, ot, s->T1, s->A0);
5734         gen_add_A0_im(s, 1 << ot);
5735         /* load the segment first to handle exceptions properly */
5736         gen_op_ld_v(s, MO_16, s->T0, s->A0);
5737         gen_movl_seg_T0(s, op);
5738         /* then put the data */
5739         gen_op_mov_reg_v(s, ot, reg, s->T1);
5740         if (s->base.is_jmp) {
5741             gen_jmp_im(s, s->pc - s->cs_base);
5742             gen_eob(s);
5743         }
5744         break;
5745 
5746         /************************/
5747         /* shifts */
5748     case 0xc0:
5749     case 0xc1:
5750         /* shift Ev,Ib */
5751         shift = 2;
5752     grp2:
5753         {
5754             ot = mo_b_d(b, dflag);
5755             modrm = x86_ldub_code(env, s);
5756             mod = (modrm >> 6) & 3;
5757             op = (modrm >> 3) & 7;
5758 
5759             if (mod != 3) {
5760                 if (shift == 2) {
5761                     s->rip_offset = 1;
5762                 }
5763                 gen_lea_modrm(env, s, modrm);
5764                 opreg = OR_TMP0;
5765             } else {
5766                 opreg = (modrm & 7) | REX_B(s);
5767             }
5768 
5769             /* simpler op */
5770             if (shift == 0) {
5771                 gen_shift(s, op, ot, opreg, OR_ECX);
5772             } else {
5773                 if (shift == 2) {
5774                     shift = x86_ldub_code(env, s);
5775                 }
5776                 gen_shifti(s, op, ot, opreg, shift);
5777             }
5778         }
5779         break;
5780     case 0xd0:
5781     case 0xd1:
5782         /* shift Ev,1 */
5783         shift = 1;
5784         goto grp2;
5785     case 0xd2:
5786     case 0xd3:
5787         /* shift Ev,cl */
5788         shift = 0;
5789         goto grp2;
5790 
5791     case 0x1a4: /* shld imm */
5792         op = 0;
5793         shift = 1;
5794         goto do_shiftd;
5795     case 0x1a5: /* shld cl */
5796         op = 0;
5797         shift = 0;
5798         goto do_shiftd;
5799     case 0x1ac: /* shrd imm */
5800         op = 1;
5801         shift = 1;
5802         goto do_shiftd;
5803     case 0x1ad: /* shrd cl */
5804         op = 1;
5805         shift = 0;
5806     do_shiftd:
5807         ot = dflag;
5808         modrm = x86_ldub_code(env, s);
5809         mod = (modrm >> 6) & 3;
5810         rm = (modrm & 7) | REX_B(s);
5811         reg = ((modrm >> 3) & 7) | rex_r;
5812         if (mod != 3) {
5813             gen_lea_modrm(env, s, modrm);
5814             opreg = OR_TMP0;
5815         } else {
5816             opreg = rm;
5817         }
5818         gen_op_mov_v_reg(s, ot, s->T1, reg);
5819 
5820         if (shift) {
5821             TCGv imm = tcg_const_tl(x86_ldub_code(env, s));
5822             gen_shiftd_rm_T1(s, ot, opreg, op, imm);
5823             tcg_temp_free(imm);
5824         } else {
5825             gen_shiftd_rm_T1(s, ot, opreg, op, cpu_regs[R_ECX]);
5826         }
5827         break;
5828 
5829         /************************/
5830         /* floats */
5831     case 0xd8 ... 0xdf:
5832         if (s->flags & (HF_EM_MASK | HF_TS_MASK)) {
5833             /* if CR0.EM or CR0.TS are set, generate an FPU exception */
5834             /* XXX: what to do if illegal op ? */
5835             gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
5836             break;
5837         }
5838         modrm = x86_ldub_code(env, s);
5839         mod = (modrm >> 6) & 3;
5840         rm = modrm & 7;
5841         op = ((b & 7) << 3) | ((modrm >> 3) & 7);
5842         if (mod != 3) {
5843             /* memory op */
5844             gen_lea_modrm(env, s, modrm);
5845             switch(op) {
5846             case 0x00 ... 0x07: /* fxxxs */
5847             case 0x10 ... 0x17: /* fixxxl */
5848             case 0x20 ... 0x27: /* fxxxl */
5849             case 0x30 ... 0x37: /* fixxx */
5850                 {
5851                     int op1;
5852                     op1 = op & 7;
5853 
5854                     switch(op >> 4) {
5855                     case 0:
5856                         tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5857                                             s->mem_index, MO_LEUL);
5858                         gen_helper_flds_FT0(cpu_env, s->tmp2_i32);
5859                         break;
5860                     case 1:
5861                         tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5862                                             s->mem_index, MO_LEUL);
5863                         gen_helper_fildl_FT0(cpu_env, s->tmp2_i32);
5864                         break;
5865                     case 2:
5866                         tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
5867                                             s->mem_index, MO_LEQ);
5868                         gen_helper_fldl_FT0(cpu_env, s->tmp1_i64);
5869                         break;
5870                     case 3:
5871                     default:
5872                         tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5873                                             s->mem_index, MO_LESW);
5874                         gen_helper_fildl_FT0(cpu_env, s->tmp2_i32);
5875                         break;
5876                     }
5877 
5878                     gen_helper_fp_arith_ST0_FT0(op1);
5879                     if (op1 == 3) {
5880                         /* fcomp needs pop */
5881                         gen_helper_fpop(cpu_env);
5882                     }
5883                 }
5884                 break;
5885             case 0x08: /* flds */
5886             case 0x0a: /* fsts */
5887             case 0x0b: /* fstps */
5888             case 0x18 ... 0x1b: /* fildl, fisttpl, fistl, fistpl */
5889             case 0x28 ... 0x2b: /* fldl, fisttpll, fstl, fstpl */
5890             case 0x38 ... 0x3b: /* filds, fisttps, fists, fistps */
5891                 switch(op & 7) {
5892                 case 0:
5893                     switch(op >> 4) {
5894                     case 0:
5895                         tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5896                                             s->mem_index, MO_LEUL);
5897                         gen_helper_flds_ST0(cpu_env, s->tmp2_i32);
5898                         break;
5899                     case 1:
5900                         tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5901                                             s->mem_index, MO_LEUL);
5902                         gen_helper_fildl_ST0(cpu_env, s->tmp2_i32);
5903                         break;
5904                     case 2:
5905                         tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
5906                                             s->mem_index, MO_LEQ);
5907                         gen_helper_fldl_ST0(cpu_env, s->tmp1_i64);
5908                         break;
5909                     case 3:
5910                     default:
5911                         tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5912                                             s->mem_index, MO_LESW);
5913                         gen_helper_fildl_ST0(cpu_env, s->tmp2_i32);
5914                         break;
5915                     }
5916                     break;
5917                 case 1:
5918                     /* XXX: the corresponding CPUID bit must be tested ! */
5919                     switch(op >> 4) {
5920                     case 1:
5921                         gen_helper_fisttl_ST0(s->tmp2_i32, cpu_env);
5922                         tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
5923                                             s->mem_index, MO_LEUL);
5924                         break;
5925                     case 2:
5926                         gen_helper_fisttll_ST0(s->tmp1_i64, cpu_env);
5927                         tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
5928                                             s->mem_index, MO_LEQ);
5929                         break;
5930                     case 3:
5931                     default:
5932                         gen_helper_fistt_ST0(s->tmp2_i32, cpu_env);
5933                         tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
5934                                             s->mem_index, MO_LEUW);
5935                         break;
5936                     }
5937                     gen_helper_fpop(cpu_env);
5938                     break;
5939                 default:
5940                     switch(op >> 4) {
5941                     case 0:
5942                         gen_helper_fsts_ST0(s->tmp2_i32, cpu_env);
5943                         tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
5944                                             s->mem_index, MO_LEUL);
5945                         break;
5946                     case 1:
5947                         gen_helper_fistl_ST0(s->tmp2_i32, cpu_env);
5948                         tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
5949                                             s->mem_index, MO_LEUL);
5950                         break;
5951                     case 2:
5952                         gen_helper_fstl_ST0(s->tmp1_i64, cpu_env);
5953                         tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
5954                                             s->mem_index, MO_LEQ);
5955                         break;
5956                     case 3:
5957                     default:
5958                         gen_helper_fist_ST0(s->tmp2_i32, cpu_env);
5959                         tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
5960                                             s->mem_index, MO_LEUW);
5961                         break;
5962                     }
5963                     if ((op & 7) == 3)
5964                         gen_helper_fpop(cpu_env);
5965                     break;
5966                 }
5967                 break;
5968             case 0x0c: /* fldenv mem */
5969                 gen_helper_fldenv(cpu_env, s->A0, tcg_const_i32(dflag - 1));
5970                 break;
5971             case 0x0d: /* fldcw mem */
5972                 tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5973                                     s->mem_index, MO_LEUW);
5974                 gen_helper_fldcw(cpu_env, s->tmp2_i32);
5975                 break;
5976             case 0x0e: /* fnstenv mem */
5977                 gen_helper_fstenv(cpu_env, s->A0, tcg_const_i32(dflag - 1));
5978                 break;
5979             case 0x0f: /* fnstcw mem */
5980                 gen_helper_fnstcw(s->tmp2_i32, cpu_env);
5981                 tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
5982                                     s->mem_index, MO_LEUW);
5983                 break;
5984             case 0x1d: /* fldt mem */
5985                 gen_helper_fldt_ST0(cpu_env, s->A0);
5986                 break;
5987             case 0x1f: /* fstpt mem */
5988                 gen_helper_fstt_ST0(cpu_env, s->A0);
5989                 gen_helper_fpop(cpu_env);
5990                 break;
5991             case 0x2c: /* frstor mem */
5992                 gen_helper_frstor(cpu_env, s->A0, tcg_const_i32(dflag - 1));
5993                 break;
5994             case 0x2e: /* fnsave mem */
5995                 gen_helper_fsave(cpu_env, s->A0, tcg_const_i32(dflag - 1));
5996                 break;
5997             case 0x2f: /* fnstsw mem */
5998                 gen_helper_fnstsw(s->tmp2_i32, cpu_env);
5999                 tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6000                                     s->mem_index, MO_LEUW);
6001                 break;
6002             case 0x3c: /* fbld */
6003                 gen_helper_fbld_ST0(cpu_env, s->A0);
6004                 break;
6005             case 0x3e: /* fbstp */
6006                 gen_helper_fbst_ST0(cpu_env, s->A0);
6007                 gen_helper_fpop(cpu_env);
6008                 break;
6009             case 0x3d: /* fildll */
6010                 tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ);
6011                 gen_helper_fildll_ST0(cpu_env, s->tmp1_i64);
6012                 break;
6013             case 0x3f: /* fistpll */
6014                 gen_helper_fistll_ST0(s->tmp1_i64, cpu_env);
6015                 tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ);
6016                 gen_helper_fpop(cpu_env);
6017                 break;
6018             default:
6019                 goto unknown_op;
6020             }
6021         } else {
6022             /* register float ops */
6023             opreg = rm;
6024 
6025             switch(op) {
6026             case 0x08: /* fld sti */
6027                 gen_helper_fpush(cpu_env);
6028                 gen_helper_fmov_ST0_STN(cpu_env,
6029                                         tcg_const_i32((opreg + 1) & 7));
6030                 break;
6031             case 0x09: /* fxchg sti */
6032             case 0x29: /* fxchg4 sti, undocumented op */
6033             case 0x39: /* fxchg7 sti, undocumented op */
6034                 gen_helper_fxchg_ST0_STN(cpu_env, tcg_const_i32(opreg));
6035                 break;
6036             case 0x0a: /* grp d9/2 */
6037                 switch(rm) {
6038                 case 0: /* fnop */
6039                     /* check exceptions (FreeBSD FPU probe) */
6040                     gen_helper_fwait(cpu_env);
6041                     break;
6042                 default:
6043                     goto unknown_op;
6044                 }
6045                 break;
6046             case 0x0c: /* grp d9/4 */
6047                 switch(rm) {
6048                 case 0: /* fchs */
6049                     gen_helper_fchs_ST0(cpu_env);
6050                     break;
6051                 case 1: /* fabs */
6052                     gen_helper_fabs_ST0(cpu_env);
6053                     break;
6054                 case 4: /* ftst */
6055                     gen_helper_fldz_FT0(cpu_env);
6056                     gen_helper_fcom_ST0_FT0(cpu_env);
6057                     break;
6058                 case 5: /* fxam */
6059                     gen_helper_fxam_ST0(cpu_env);
6060                     break;
6061                 default:
6062                     goto unknown_op;
6063                 }
6064                 break;
6065             case 0x0d: /* grp d9/5 */
6066                 {
6067                     switch(rm) {
6068                     case 0:
6069                         gen_helper_fpush(cpu_env);
6070                         gen_helper_fld1_ST0(cpu_env);
6071                         break;
6072                     case 1:
6073                         gen_helper_fpush(cpu_env);
6074                         gen_helper_fldl2t_ST0(cpu_env);
6075                         break;
6076                     case 2:
6077                         gen_helper_fpush(cpu_env);
6078                         gen_helper_fldl2e_ST0(cpu_env);
6079                         break;
6080                     case 3:
6081                         gen_helper_fpush(cpu_env);
6082                         gen_helper_fldpi_ST0(cpu_env);
6083                         break;
6084                     case 4:
6085                         gen_helper_fpush(cpu_env);
6086                         gen_helper_fldlg2_ST0(cpu_env);
6087                         break;
6088                     case 5:
6089                         gen_helper_fpush(cpu_env);
6090                         gen_helper_fldln2_ST0(cpu_env);
6091                         break;
6092                     case 6:
6093                         gen_helper_fpush(cpu_env);
6094                         gen_helper_fldz_ST0(cpu_env);
6095                         break;
6096                     default:
6097                         goto unknown_op;
6098                     }
6099                 }
6100                 break;
6101             case 0x0e: /* grp d9/6 */
6102                 switch(rm) {
6103                 case 0: /* f2xm1 */
6104                     gen_helper_f2xm1(cpu_env);
6105                     break;
6106                 case 1: /* fyl2x */
6107                     gen_helper_fyl2x(cpu_env);
6108                     break;
6109                 case 2: /* fptan */
6110                     gen_helper_fptan(cpu_env);
6111                     break;
6112                 case 3: /* fpatan */
6113                     gen_helper_fpatan(cpu_env);
6114                     break;
6115                 case 4: /* fxtract */
6116                     gen_helper_fxtract(cpu_env);
6117                     break;
6118                 case 5: /* fprem1 */
6119                     gen_helper_fprem1(cpu_env);
6120                     break;
6121                 case 6: /* fdecstp */
6122                     gen_helper_fdecstp(cpu_env);
6123                     break;
6124                 default:
6125                 case 7: /* fincstp */
6126                     gen_helper_fincstp(cpu_env);
6127                     break;
6128                 }
6129                 break;
6130             case 0x0f: /* grp d9/7 */
6131                 switch(rm) {
6132                 case 0: /* fprem */
6133                     gen_helper_fprem(cpu_env);
6134                     break;
6135                 case 1: /* fyl2xp1 */
6136                     gen_helper_fyl2xp1(cpu_env);
6137                     break;
6138                 case 2: /* fsqrt */
6139                     gen_helper_fsqrt(cpu_env);
6140                     break;
6141                 case 3: /* fsincos */
6142                     gen_helper_fsincos(cpu_env);
6143                     break;
6144                 case 5: /* fscale */
6145                     gen_helper_fscale(cpu_env);
6146                     break;
6147                 case 4: /* frndint */
6148                     gen_helper_frndint(cpu_env);
6149                     break;
6150                 case 6: /* fsin */
6151                     gen_helper_fsin(cpu_env);
6152                     break;
6153                 default:
6154                 case 7: /* fcos */
6155                     gen_helper_fcos(cpu_env);
6156                     break;
6157                 }
6158                 break;
6159             case 0x00: case 0x01: case 0x04 ... 0x07: /* fxxx st, sti */
6160             case 0x20: case 0x21: case 0x24 ... 0x27: /* fxxx sti, st */
6161             case 0x30: case 0x31: case 0x34 ... 0x37: /* fxxxp sti, st */
6162                 {
6163                     int op1;
6164 
6165                     op1 = op & 7;
6166                     if (op >= 0x20) {
6167                         gen_helper_fp_arith_STN_ST0(op1, opreg);
6168                         if (op >= 0x30)
6169                             gen_helper_fpop(cpu_env);
6170                     } else {
6171                         gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6172                         gen_helper_fp_arith_ST0_FT0(op1);
6173                     }
6174                 }
6175                 break;
6176             case 0x02: /* fcom */
6177             case 0x22: /* fcom2, undocumented op */
6178                 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6179                 gen_helper_fcom_ST0_FT0(cpu_env);
6180                 break;
6181             case 0x03: /* fcomp */
6182             case 0x23: /* fcomp3, undocumented op */
6183             case 0x32: /* fcomp5, undocumented op */
6184                 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6185                 gen_helper_fcom_ST0_FT0(cpu_env);
6186                 gen_helper_fpop(cpu_env);
6187                 break;
6188             case 0x15: /* da/5 */
6189                 switch(rm) {
6190                 case 1: /* fucompp */
6191                     gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(1));
6192                     gen_helper_fucom_ST0_FT0(cpu_env);
6193                     gen_helper_fpop(cpu_env);
6194                     gen_helper_fpop(cpu_env);
6195                     break;
6196                 default:
6197                     goto unknown_op;
6198                 }
6199                 break;
6200             case 0x1c:
6201                 switch(rm) {
6202                 case 0: /* feni (287 only, just do nop here) */
6203                     break;
6204                 case 1: /* fdisi (287 only, just do nop here) */
6205                     break;
6206                 case 2: /* fclex */
6207                     gen_helper_fclex(cpu_env);
6208                     break;
6209                 case 3: /* fninit */
6210                     gen_helper_fninit(cpu_env);
6211                     break;
6212                 case 4: /* fsetpm (287 only, just do nop here) */
6213                     break;
6214                 default:
6215                     goto unknown_op;
6216                 }
6217                 break;
6218             case 0x1d: /* fucomi */
6219                 if (!(s->cpuid_features & CPUID_CMOV)) {
6220                     goto illegal_op;
6221                 }
6222                 gen_update_cc_op(s);
6223                 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6224                 gen_helper_fucomi_ST0_FT0(cpu_env);
6225                 set_cc_op(s, CC_OP_EFLAGS);
6226                 break;
6227             case 0x1e: /* fcomi */
6228                 if (!(s->cpuid_features & CPUID_CMOV)) {
6229                     goto illegal_op;
6230                 }
6231                 gen_update_cc_op(s);
6232                 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6233                 gen_helper_fcomi_ST0_FT0(cpu_env);
6234                 set_cc_op(s, CC_OP_EFLAGS);
6235                 break;
6236             case 0x28: /* ffree sti */
6237                 gen_helper_ffree_STN(cpu_env, tcg_const_i32(opreg));
6238                 break;
6239             case 0x2a: /* fst sti */
6240                 gen_helper_fmov_STN_ST0(cpu_env, tcg_const_i32(opreg));
6241                 break;
6242             case 0x2b: /* fstp sti */
6243             case 0x0b: /* fstp1 sti, undocumented op */
6244             case 0x3a: /* fstp8 sti, undocumented op */
6245             case 0x3b: /* fstp9 sti, undocumented op */
6246                 gen_helper_fmov_STN_ST0(cpu_env, tcg_const_i32(opreg));
6247                 gen_helper_fpop(cpu_env);
6248                 break;
6249             case 0x2c: /* fucom st(i) */
6250                 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6251                 gen_helper_fucom_ST0_FT0(cpu_env);
6252                 break;
6253             case 0x2d: /* fucomp st(i) */
6254                 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6255                 gen_helper_fucom_ST0_FT0(cpu_env);
6256                 gen_helper_fpop(cpu_env);
6257                 break;
6258             case 0x33: /* de/3 */
6259                 switch(rm) {
6260                 case 1: /* fcompp */
6261                     gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(1));
6262                     gen_helper_fcom_ST0_FT0(cpu_env);
6263                     gen_helper_fpop(cpu_env);
6264                     gen_helper_fpop(cpu_env);
6265                     break;
6266                 default:
6267                     goto unknown_op;
6268                 }
6269                 break;
6270             case 0x38: /* ffreep sti, undocumented op */
6271                 gen_helper_ffree_STN(cpu_env, tcg_const_i32(opreg));
6272                 gen_helper_fpop(cpu_env);
6273                 break;
6274             case 0x3c: /* df/4 */
6275                 switch(rm) {
6276                 case 0:
6277                     gen_helper_fnstsw(s->tmp2_i32, cpu_env);
6278                     tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
6279                     gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
6280                     break;
6281                 default:
6282                     goto unknown_op;
6283                 }
6284                 break;
6285             case 0x3d: /* fucomip */
6286                 if (!(s->cpuid_features & CPUID_CMOV)) {
6287                     goto illegal_op;
6288                 }
6289                 gen_update_cc_op(s);
6290                 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6291                 gen_helper_fucomi_ST0_FT0(cpu_env);
6292                 gen_helper_fpop(cpu_env);
6293                 set_cc_op(s, CC_OP_EFLAGS);
6294                 break;
6295             case 0x3e: /* fcomip */
6296                 if (!(s->cpuid_features & CPUID_CMOV)) {
6297                     goto illegal_op;
6298                 }
6299                 gen_update_cc_op(s);
6300                 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6301                 gen_helper_fcomi_ST0_FT0(cpu_env);
6302                 gen_helper_fpop(cpu_env);
6303                 set_cc_op(s, CC_OP_EFLAGS);
6304                 break;
6305             case 0x10 ... 0x13: /* fcmovxx */
6306             case 0x18 ... 0x1b:
6307                 {
6308                     int op1;
6309                     TCGLabel *l1;
6310                     static const uint8_t fcmov_cc[8] = {
6311                         (JCC_B << 1),
6312                         (JCC_Z << 1),
6313                         (JCC_BE << 1),
6314                         (JCC_P << 1),
6315                     };
6316 
6317                     if (!(s->cpuid_features & CPUID_CMOV)) {
6318                         goto illegal_op;
6319                     }
6320                     op1 = fcmov_cc[op & 3] | (((op >> 3) & 1) ^ 1);
6321                     l1 = gen_new_label();
6322                     gen_jcc1_noeob(s, op1, l1);
6323                     gen_helper_fmov_ST0_STN(cpu_env, tcg_const_i32(opreg));
6324                     gen_set_label(l1);
6325                 }
6326                 break;
6327             default:
6328                 goto unknown_op;
6329             }
6330         }
6331         break;
6332         /************************/
6333         /* string ops */
6334 
6335     case 0xa4: /* movsS */
6336     case 0xa5:
6337         ot = mo_b_d(b, dflag);
6338         if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6339             gen_repz_movs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6340         } else {
6341             gen_movs(s, ot);
6342         }
6343         break;
6344 
6345     case 0xaa: /* stosS */
6346     case 0xab:
6347         ot = mo_b_d(b, dflag);
6348         if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6349             gen_repz_stos(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6350         } else {
6351             gen_stos(s, ot);
6352         }
6353         break;
6354     case 0xac: /* lodsS */
6355     case 0xad:
6356         ot = mo_b_d(b, dflag);
6357         if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6358             gen_repz_lods(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6359         } else {
6360             gen_lods(s, ot);
6361         }
6362         break;
6363     case 0xae: /* scasS */
6364     case 0xaf:
6365         ot = mo_b_d(b, dflag);
6366         if (prefixes & PREFIX_REPNZ) {
6367             gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1);
6368         } else if (prefixes & PREFIX_REPZ) {
6369             gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0);
6370         } else {
6371             gen_scas(s, ot);
6372         }
6373         break;
6374 
6375     case 0xa6: /* cmpsS */
6376     case 0xa7:
6377         ot = mo_b_d(b, dflag);
6378         if (prefixes & PREFIX_REPNZ) {
6379             gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1);
6380         } else if (prefixes & PREFIX_REPZ) {
6381             gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0);
6382         } else {
6383             gen_cmps(s, ot);
6384         }
6385         break;
6386     case 0x6c: /* insS */
6387     case 0x6d:
6388         ot = mo_b_d32(b, dflag);
6389         tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]);
6390         gen_check_io(s, ot, pc_start - s->cs_base,
6391                      SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes) | 4);
6392         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6393             gen_io_start();
6394         }
6395         if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6396             gen_repz_ins(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6397             /* jump generated by gen_repz_ins */
6398         } else {
6399             gen_ins(s, ot);
6400             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6401                 gen_jmp(s, s->pc - s->cs_base);
6402             }
6403         }
6404         break;
6405     case 0x6e: /* outsS */
6406     case 0x6f:
6407         ot = mo_b_d32(b, dflag);
6408         tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]);
6409         gen_check_io(s, ot, pc_start - s->cs_base,
6410                      svm_is_rep(prefixes) | 4);
6411         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6412             gen_io_start();
6413         }
6414         if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6415             gen_repz_outs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6416             /* jump generated by gen_repz_outs */
6417         } else {
6418             gen_outs(s, ot);
6419             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6420                 gen_jmp(s, s->pc - s->cs_base);
6421             }
6422         }
6423         break;
6424 
6425         /************************/
6426         /* port I/O */
6427 
6428     case 0xe4:
6429     case 0xe5:
6430         ot = mo_b_d32(b, dflag);
6431         val = x86_ldub_code(env, s);
6432         tcg_gen_movi_tl(s->T0, val);
6433         gen_check_io(s, ot, pc_start - s->cs_base,
6434                      SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes));
6435         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6436             gen_io_start();
6437         }
6438         tcg_gen_movi_i32(s->tmp2_i32, val);
6439         gen_helper_in_func(ot, s->T1, s->tmp2_i32);
6440         gen_op_mov_reg_v(s, ot, R_EAX, s->T1);
6441         gen_bpt_io(s, s->tmp2_i32, ot);
6442         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6443             gen_jmp(s, s->pc - s->cs_base);
6444         }
6445         break;
6446     case 0xe6:
6447     case 0xe7:
6448         ot = mo_b_d32(b, dflag);
6449         val = x86_ldub_code(env, s);
6450         tcg_gen_movi_tl(s->T0, val);
6451         gen_check_io(s, ot, pc_start - s->cs_base,
6452                      svm_is_rep(prefixes));
6453         gen_op_mov_v_reg(s, ot, s->T1, R_EAX);
6454 
6455         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6456             gen_io_start();
6457         }
6458         tcg_gen_movi_i32(s->tmp2_i32, val);
6459         tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
6460         gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32);
6461         gen_bpt_io(s, s->tmp2_i32, ot);
6462         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6463             gen_jmp(s, s->pc - s->cs_base);
6464         }
6465         break;
6466     case 0xec:
6467     case 0xed:
6468         ot = mo_b_d32(b, dflag);
6469         tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]);
6470         gen_check_io(s, ot, pc_start - s->cs_base,
6471                      SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes));
6472         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6473             gen_io_start();
6474         }
6475         tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
6476         gen_helper_in_func(ot, s->T1, s->tmp2_i32);
6477         gen_op_mov_reg_v(s, ot, R_EAX, s->T1);
6478         gen_bpt_io(s, s->tmp2_i32, ot);
6479         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6480             gen_jmp(s, s->pc - s->cs_base);
6481         }
6482         break;
6483     case 0xee:
6484     case 0xef:
6485         ot = mo_b_d32(b, dflag);
6486         tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]);
6487         gen_check_io(s, ot, pc_start - s->cs_base,
6488                      svm_is_rep(prefixes));
6489         gen_op_mov_v_reg(s, ot, s->T1, R_EAX);
6490 
6491         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6492             gen_io_start();
6493         }
6494         tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
6495         tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
6496         gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32);
6497         gen_bpt_io(s, s->tmp2_i32, ot);
6498         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6499             gen_jmp(s, s->pc - s->cs_base);
6500         }
6501         break;
6502 
6503         /************************/
6504         /* control */
6505     case 0xc2: /* ret im */
6506         val = x86_ldsw_code(env, s);
6507         ot = gen_pop_T0(s);
6508         gen_stack_update(s, val + (1 << ot));
6509         /* Note that gen_pop_T0 uses a zero-extending load.  */
6510         gen_op_jmp_v(s->T0);
6511         gen_bnd_jmp(s);
6512         gen_jr(s, s->T0);
6513         break;
6514     case 0xc3: /* ret */
6515         ot = gen_pop_T0(s);
6516         gen_pop_update(s, ot);
6517         /* Note that gen_pop_T0 uses a zero-extending load.  */
6518         gen_op_jmp_v(s->T0);
6519         gen_bnd_jmp(s);
6520         gen_jr(s, s->T0);
6521         break;
6522     case 0xca: /* lret im */
6523         val = x86_ldsw_code(env, s);
6524     do_lret:
6525         if (s->pe && !s->vm86) {
6526             gen_update_cc_op(s);
6527             gen_jmp_im(s, pc_start - s->cs_base);
6528             gen_helper_lret_protected(cpu_env, tcg_const_i32(dflag - 1),
6529                                       tcg_const_i32(val));
6530         } else {
6531             gen_stack_A0(s);
6532             /* pop offset */
6533             gen_op_ld_v(s, dflag, s->T0, s->A0);
6534             /* NOTE: keeping EIP updated is not a problem in case of
6535                exception */
6536             gen_op_jmp_v(s->T0);
6537             /* pop selector */
6538             gen_add_A0_im(s, 1 << dflag);
6539             gen_op_ld_v(s, dflag, s->T0, s->A0);
6540             gen_op_movl_seg_T0_vm(s, R_CS);
6541             /* add stack offset */
6542             gen_stack_update(s, val + (2 << dflag));
6543         }
6544         gen_eob(s);
6545         break;
6546     case 0xcb: /* lret */
6547         val = 0;
6548         goto do_lret;
6549     case 0xcf: /* iret */
6550         gen_svm_check_intercept(s, pc_start, SVM_EXIT_IRET);
6551         if (!s->pe) {
6552             /* real mode */
6553             gen_helper_iret_real(cpu_env, tcg_const_i32(dflag - 1));
6554             set_cc_op(s, CC_OP_EFLAGS);
6555         } else if (s->vm86) {
6556             if (s->iopl != 3) {
6557                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6558             } else {
6559                 gen_helper_iret_real(cpu_env, tcg_const_i32(dflag - 1));
6560                 set_cc_op(s, CC_OP_EFLAGS);
6561             }
6562         } else {
6563             gen_helper_iret_protected(cpu_env, tcg_const_i32(dflag - 1),
6564                                       tcg_const_i32(s->pc - s->cs_base));
6565             set_cc_op(s, CC_OP_EFLAGS);
6566         }
6567         gen_eob(s);
6568         break;
6569     case 0xe8: /* call im */
6570         {
6571             if (dflag != MO_16) {
6572                 tval = (int32_t)insn_get(env, s, MO_32);
6573             } else {
6574                 tval = (int16_t)insn_get(env, s, MO_16);
6575             }
6576             next_eip = s->pc - s->cs_base;
6577             tval += next_eip;
6578             if (dflag == MO_16) {
6579                 tval &= 0xffff;
6580             } else if (!CODE64(s)) {
6581                 tval &= 0xffffffff;
6582             }
6583             tcg_gen_movi_tl(s->T0, next_eip);
6584             gen_push_v(s, s->T0);
6585             gen_bnd_jmp(s);
6586             gen_jmp(s, tval);
6587         }
6588         break;
6589     case 0x9a: /* lcall im */
6590         {
6591             unsigned int selector, offset;
6592 
6593             if (CODE64(s))
6594                 goto illegal_op;
6595             ot = dflag;
6596             offset = insn_get(env, s, ot);
6597             selector = insn_get(env, s, MO_16);
6598 
6599             tcg_gen_movi_tl(s->T0, selector);
6600             tcg_gen_movi_tl(s->T1, offset);
6601         }
6602         goto do_lcall;
6603     case 0xe9: /* jmp im */
6604         if (dflag != MO_16) {
6605             tval = (int32_t)insn_get(env, s, MO_32);
6606         } else {
6607             tval = (int16_t)insn_get(env, s, MO_16);
6608         }
6609         tval += s->pc - s->cs_base;
6610         if (dflag == MO_16) {
6611             tval &= 0xffff;
6612         } else if (!CODE64(s)) {
6613             tval &= 0xffffffff;
6614         }
6615         gen_bnd_jmp(s);
6616         gen_jmp(s, tval);
6617         break;
6618     case 0xea: /* ljmp im */
6619         {
6620             unsigned int selector, offset;
6621 
6622             if (CODE64(s))
6623                 goto illegal_op;
6624             ot = dflag;
6625             offset = insn_get(env, s, ot);
6626             selector = insn_get(env, s, MO_16);
6627 
6628             tcg_gen_movi_tl(s->T0, selector);
6629             tcg_gen_movi_tl(s->T1, offset);
6630         }
6631         goto do_ljmp;
6632     case 0xeb: /* jmp Jb */
6633         tval = (int8_t)insn_get(env, s, MO_8);
6634         tval += s->pc - s->cs_base;
6635         if (dflag == MO_16) {
6636             tval &= 0xffff;
6637         }
6638         gen_jmp(s, tval);
6639         break;
6640     case 0x70 ... 0x7f: /* jcc Jb */
6641         tval = (int8_t)insn_get(env, s, MO_8);
6642         goto do_jcc;
6643     case 0x180 ... 0x18f: /* jcc Jv */
6644         if (dflag != MO_16) {
6645             tval = (int32_t)insn_get(env, s, MO_32);
6646         } else {
6647             tval = (int16_t)insn_get(env, s, MO_16);
6648         }
6649     do_jcc:
6650         next_eip = s->pc - s->cs_base;
6651         tval += next_eip;
6652         if (dflag == MO_16) {
6653             tval &= 0xffff;
6654         }
6655         gen_bnd_jmp(s);
6656         gen_jcc(s, b, tval, next_eip);
6657         break;
6658 
6659     case 0x190 ... 0x19f: /* setcc Gv */
6660         modrm = x86_ldub_code(env, s);
6661         gen_setcc1(s, b, s->T0);
6662         gen_ldst_modrm(env, s, modrm, MO_8, OR_TMP0, 1);
6663         break;
6664     case 0x140 ... 0x14f: /* cmov Gv, Ev */
6665         if (!(s->cpuid_features & CPUID_CMOV)) {
6666             goto illegal_op;
6667         }
6668         ot = dflag;
6669         modrm = x86_ldub_code(env, s);
6670         reg = ((modrm >> 3) & 7) | rex_r;
6671         gen_cmovcc1(env, s, ot, b, modrm, reg);
6672         break;
6673 
6674         /************************/
6675         /* flags */
6676     case 0x9c: /* pushf */
6677         gen_svm_check_intercept(s, pc_start, SVM_EXIT_PUSHF);
6678         if (s->vm86 && s->iopl != 3) {
6679             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6680         } else {
6681             gen_update_cc_op(s);
6682             gen_helper_read_eflags(s->T0, cpu_env);
6683             gen_push_v(s, s->T0);
6684         }
6685         break;
6686     case 0x9d: /* popf */
6687         gen_svm_check_intercept(s, pc_start, SVM_EXIT_POPF);
6688         if (s->vm86 && s->iopl != 3) {
6689             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6690         } else {
6691             ot = gen_pop_T0(s);
6692             if (s->cpl == 0) {
6693                 if (dflag != MO_16) {
6694                     gen_helper_write_eflags(cpu_env, s->T0,
6695                                             tcg_const_i32((TF_MASK | AC_MASK |
6696                                                            ID_MASK | NT_MASK |
6697                                                            IF_MASK |
6698                                                            IOPL_MASK)));
6699                 } else {
6700                     gen_helper_write_eflags(cpu_env, s->T0,
6701                                             tcg_const_i32((TF_MASK | AC_MASK |
6702                                                            ID_MASK | NT_MASK |
6703                                                            IF_MASK | IOPL_MASK)
6704                                                           & 0xffff));
6705                 }
6706             } else {
6707                 if (s->cpl <= s->iopl) {
6708                     if (dflag != MO_16) {
6709                         gen_helper_write_eflags(cpu_env, s->T0,
6710                                                 tcg_const_i32((TF_MASK |
6711                                                                AC_MASK |
6712                                                                ID_MASK |
6713                                                                NT_MASK |
6714                                                                IF_MASK)));
6715                     } else {
6716                         gen_helper_write_eflags(cpu_env, s->T0,
6717                                                 tcg_const_i32((TF_MASK |
6718                                                                AC_MASK |
6719                                                                ID_MASK |
6720                                                                NT_MASK |
6721                                                                IF_MASK)
6722                                                               & 0xffff));
6723                     }
6724                 } else {
6725                     if (dflag != MO_16) {
6726                         gen_helper_write_eflags(cpu_env, s->T0,
6727                                            tcg_const_i32((TF_MASK | AC_MASK |
6728                                                           ID_MASK | NT_MASK)));
6729                     } else {
6730                         gen_helper_write_eflags(cpu_env, s->T0,
6731                                            tcg_const_i32((TF_MASK | AC_MASK |
6732                                                           ID_MASK | NT_MASK)
6733                                                          & 0xffff));
6734                     }
6735                 }
6736             }
6737             gen_pop_update(s, ot);
6738             set_cc_op(s, CC_OP_EFLAGS);
6739             /* abort translation because TF/AC flag may change */
6740             gen_jmp_im(s, s->pc - s->cs_base);
6741             gen_eob(s);
6742         }
6743         break;
6744     case 0x9e: /* sahf */
6745         if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
6746             goto illegal_op;
6747         gen_op_mov_v_reg(s, MO_8, s->T0, R_AH);
6748         gen_compute_eflags(s);
6749         tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, CC_O);
6750         tcg_gen_andi_tl(s->T0, s->T0, CC_S | CC_Z | CC_A | CC_P | CC_C);
6751         tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, s->T0);
6752         break;
6753     case 0x9f: /* lahf */
6754         if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
6755             goto illegal_op;
6756         gen_compute_eflags(s);
6757         /* Note: gen_compute_eflags() only gives the condition codes */
6758         tcg_gen_ori_tl(s->T0, cpu_cc_src, 0x02);
6759         gen_op_mov_reg_v(s, MO_8, R_AH, s->T0);
6760         break;
6761     case 0xf5: /* cmc */
6762         gen_compute_eflags(s);
6763         tcg_gen_xori_tl(cpu_cc_src, cpu_cc_src, CC_C);
6764         break;
6765     case 0xf8: /* clc */
6766         gen_compute_eflags(s);
6767         tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_C);
6768         break;
6769     case 0xf9: /* stc */
6770         gen_compute_eflags(s);
6771         tcg_gen_ori_tl(cpu_cc_src, cpu_cc_src, CC_C);
6772         break;
6773     case 0xfc: /* cld */
6774         tcg_gen_movi_i32(s->tmp2_i32, 1);
6775         tcg_gen_st_i32(s->tmp2_i32, cpu_env, offsetof(CPUX86State, df));
6776         break;
6777     case 0xfd: /* std */
6778         tcg_gen_movi_i32(s->tmp2_i32, -1);
6779         tcg_gen_st_i32(s->tmp2_i32, cpu_env, offsetof(CPUX86State, df));
6780         break;
6781 
6782         /************************/
6783         /* bit operations */
6784     case 0x1ba: /* bt/bts/btr/btc Gv, im */
6785         ot = dflag;
6786         modrm = x86_ldub_code(env, s);
6787         op = (modrm >> 3) & 7;
6788         mod = (modrm >> 6) & 3;
6789         rm = (modrm & 7) | REX_B(s);
6790         if (mod != 3) {
6791             s->rip_offset = 1;
6792             gen_lea_modrm(env, s, modrm);
6793             if (!(s->prefix & PREFIX_LOCK)) {
6794                 gen_op_ld_v(s, ot, s->T0, s->A0);
6795             }
6796         } else {
6797             gen_op_mov_v_reg(s, ot, s->T0, rm);
6798         }
6799         /* load shift */
6800         val = x86_ldub_code(env, s);
6801         tcg_gen_movi_tl(s->T1, val);
6802         if (op < 4)
6803             goto unknown_op;
6804         op -= 4;
6805         goto bt_op;
6806     case 0x1a3: /* bt Gv, Ev */
6807         op = 0;
6808         goto do_btx;
6809     case 0x1ab: /* bts */
6810         op = 1;
6811         goto do_btx;
6812     case 0x1b3: /* btr */
6813         op = 2;
6814         goto do_btx;
6815     case 0x1bb: /* btc */
6816         op = 3;
6817     do_btx:
6818         ot = dflag;
6819         modrm = x86_ldub_code(env, s);
6820         reg = ((modrm >> 3) & 7) | rex_r;
6821         mod = (modrm >> 6) & 3;
6822         rm = (modrm & 7) | REX_B(s);
6823         gen_op_mov_v_reg(s, MO_32, s->T1, reg);
6824         if (mod != 3) {
6825             AddressParts a = gen_lea_modrm_0(env, s, modrm);
6826             /* specific case: we need to add a displacement */
6827             gen_exts(ot, s->T1);
6828             tcg_gen_sari_tl(s->tmp0, s->T1, 3 + ot);
6829             tcg_gen_shli_tl(s->tmp0, s->tmp0, ot);
6830             tcg_gen_add_tl(s->A0, gen_lea_modrm_1(s, a), s->tmp0);
6831             gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
6832             if (!(s->prefix & PREFIX_LOCK)) {
6833                 gen_op_ld_v(s, ot, s->T0, s->A0);
6834             }
6835         } else {
6836             gen_op_mov_v_reg(s, ot, s->T0, rm);
6837         }
6838     bt_op:
6839         tcg_gen_andi_tl(s->T1, s->T1, (1 << (3 + ot)) - 1);
6840         tcg_gen_movi_tl(s->tmp0, 1);
6841         tcg_gen_shl_tl(s->tmp0, s->tmp0, s->T1);
6842         if (s->prefix & PREFIX_LOCK) {
6843             switch (op) {
6844             case 0: /* bt */
6845                 /* Needs no atomic ops; we surpressed the normal
6846                    memory load for LOCK above so do it now.  */
6847                 gen_op_ld_v(s, ot, s->T0, s->A0);
6848                 break;
6849             case 1: /* bts */
6850                 tcg_gen_atomic_fetch_or_tl(s->T0, s->A0, s->tmp0,
6851                                            s->mem_index, ot | MO_LE);
6852                 break;
6853             case 2: /* btr */
6854                 tcg_gen_not_tl(s->tmp0, s->tmp0);
6855                 tcg_gen_atomic_fetch_and_tl(s->T0, s->A0, s->tmp0,
6856                                             s->mem_index, ot | MO_LE);
6857                 break;
6858             default:
6859             case 3: /* btc */
6860                 tcg_gen_atomic_fetch_xor_tl(s->T0, s->A0, s->tmp0,
6861                                             s->mem_index, ot | MO_LE);
6862                 break;
6863             }
6864             tcg_gen_shr_tl(s->tmp4, s->T0, s->T1);
6865         } else {
6866             tcg_gen_shr_tl(s->tmp4, s->T0, s->T1);
6867             switch (op) {
6868             case 0: /* bt */
6869                 /* Data already loaded; nothing to do.  */
6870                 break;
6871             case 1: /* bts */
6872                 tcg_gen_or_tl(s->T0, s->T0, s->tmp0);
6873                 break;
6874             case 2: /* btr */
6875                 tcg_gen_andc_tl(s->T0, s->T0, s->tmp0);
6876                 break;
6877             default:
6878             case 3: /* btc */
6879                 tcg_gen_xor_tl(s->T0, s->T0, s->tmp0);
6880                 break;
6881             }
6882             if (op != 0) {
6883                 if (mod != 3) {
6884                     gen_op_st_v(s, ot, s->T0, s->A0);
6885                 } else {
6886                     gen_op_mov_reg_v(s, ot, rm, s->T0);
6887                 }
6888             }
6889         }
6890 
6891         /* Delay all CC updates until after the store above.  Note that
6892            C is the result of the test, Z is unchanged, and the others
6893            are all undefined.  */
6894         switch (s->cc_op) {
6895         case CC_OP_MULB ... CC_OP_MULQ:
6896         case CC_OP_ADDB ... CC_OP_ADDQ:
6897         case CC_OP_ADCB ... CC_OP_ADCQ:
6898         case CC_OP_SUBB ... CC_OP_SUBQ:
6899         case CC_OP_SBBB ... CC_OP_SBBQ:
6900         case CC_OP_LOGICB ... CC_OP_LOGICQ:
6901         case CC_OP_INCB ... CC_OP_INCQ:
6902         case CC_OP_DECB ... CC_OP_DECQ:
6903         case CC_OP_SHLB ... CC_OP_SHLQ:
6904         case CC_OP_SARB ... CC_OP_SARQ:
6905         case CC_OP_BMILGB ... CC_OP_BMILGQ:
6906             /* Z was going to be computed from the non-zero status of CC_DST.
6907                We can get that same Z value (and the new C value) by leaving
6908                CC_DST alone, setting CC_SRC, and using a CC_OP_SAR of the
6909                same width.  */
6910             tcg_gen_mov_tl(cpu_cc_src, s->tmp4);
6911             set_cc_op(s, ((s->cc_op - CC_OP_MULB) & 3) + CC_OP_SARB);
6912             break;
6913         default:
6914             /* Otherwise, generate EFLAGS and replace the C bit.  */
6915             gen_compute_eflags(s);
6916             tcg_gen_deposit_tl(cpu_cc_src, cpu_cc_src, s->tmp4,
6917                                ctz32(CC_C), 1);
6918             break;
6919         }
6920         break;
6921     case 0x1bc: /* bsf / tzcnt */
6922     case 0x1bd: /* bsr / lzcnt */
6923         ot = dflag;
6924         modrm = x86_ldub_code(env, s);
6925         reg = ((modrm >> 3) & 7) | rex_r;
6926         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
6927         gen_extu(ot, s->T0);
6928 
6929         /* Note that lzcnt and tzcnt are in different extensions.  */
6930         if ((prefixes & PREFIX_REPZ)
6931             && (b & 1
6932                 ? s->cpuid_ext3_features & CPUID_EXT3_ABM
6933                 : s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)) {
6934             int size = 8 << ot;
6935             /* For lzcnt/tzcnt, C bit is defined related to the input. */
6936             tcg_gen_mov_tl(cpu_cc_src, s->T0);
6937             if (b & 1) {
6938                 /* For lzcnt, reduce the target_ulong result by the
6939                    number of zeros that we expect to find at the top.  */
6940                 tcg_gen_clzi_tl(s->T0, s->T0, TARGET_LONG_BITS);
6941                 tcg_gen_subi_tl(s->T0, s->T0, TARGET_LONG_BITS - size);
6942             } else {
6943                 /* For tzcnt, a zero input must return the operand size.  */
6944                 tcg_gen_ctzi_tl(s->T0, s->T0, size);
6945             }
6946             /* For lzcnt/tzcnt, Z bit is defined related to the result.  */
6947             gen_op_update1_cc(s);
6948             set_cc_op(s, CC_OP_BMILGB + ot);
6949         } else {
6950             /* For bsr/bsf, only the Z bit is defined and it is related
6951                to the input and not the result.  */
6952             tcg_gen_mov_tl(cpu_cc_dst, s->T0);
6953             set_cc_op(s, CC_OP_LOGICB + ot);
6954 
6955             /* ??? The manual says that the output is undefined when the
6956                input is zero, but real hardware leaves it unchanged, and
6957                real programs appear to depend on that.  Accomplish this
6958                by passing the output as the value to return upon zero.  */
6959             if (b & 1) {
6960                 /* For bsr, return the bit index of the first 1 bit,
6961                    not the count of leading zeros.  */
6962                 tcg_gen_xori_tl(s->T1, cpu_regs[reg], TARGET_LONG_BITS - 1);
6963                 tcg_gen_clz_tl(s->T0, s->T0, s->T1);
6964                 tcg_gen_xori_tl(s->T0, s->T0, TARGET_LONG_BITS - 1);
6965             } else {
6966                 tcg_gen_ctz_tl(s->T0, s->T0, cpu_regs[reg]);
6967             }
6968         }
6969         gen_op_mov_reg_v(s, ot, reg, s->T0);
6970         break;
6971         /************************/
6972         /* bcd */
6973     case 0x27: /* daa */
6974         if (CODE64(s))
6975             goto illegal_op;
6976         gen_update_cc_op(s);
6977         gen_helper_daa(cpu_env);
6978         set_cc_op(s, CC_OP_EFLAGS);
6979         break;
6980     case 0x2f: /* das */
6981         if (CODE64(s))
6982             goto illegal_op;
6983         gen_update_cc_op(s);
6984         gen_helper_das(cpu_env);
6985         set_cc_op(s, CC_OP_EFLAGS);
6986         break;
6987     case 0x37: /* aaa */
6988         if (CODE64(s))
6989             goto illegal_op;
6990         gen_update_cc_op(s);
6991         gen_helper_aaa(cpu_env);
6992         set_cc_op(s, CC_OP_EFLAGS);
6993         break;
6994     case 0x3f: /* aas */
6995         if (CODE64(s))
6996             goto illegal_op;
6997         gen_update_cc_op(s);
6998         gen_helper_aas(cpu_env);
6999         set_cc_op(s, CC_OP_EFLAGS);
7000         break;
7001     case 0xd4: /* aam */
7002         if (CODE64(s))
7003             goto illegal_op;
7004         val = x86_ldub_code(env, s);
7005         if (val == 0) {
7006             gen_exception(s, EXCP00_DIVZ, pc_start - s->cs_base);
7007         } else {
7008             gen_helper_aam(cpu_env, tcg_const_i32(val));
7009             set_cc_op(s, CC_OP_LOGICB);
7010         }
7011         break;
7012     case 0xd5: /* aad */
7013         if (CODE64(s))
7014             goto illegal_op;
7015         val = x86_ldub_code(env, s);
7016         gen_helper_aad(cpu_env, tcg_const_i32(val));
7017         set_cc_op(s, CC_OP_LOGICB);
7018         break;
7019         /************************/
7020         /* misc */
7021     case 0x90: /* nop */
7022         /* XXX: correct lock test for all insn */
7023         if (prefixes & PREFIX_LOCK) {
7024             goto illegal_op;
7025         }
7026         /* If REX_B is set, then this is xchg eax, r8d, not a nop.  */
7027         if (REX_B(s)) {
7028             goto do_xchg_reg_eax;
7029         }
7030         if (prefixes & PREFIX_REPZ) {
7031             gen_update_cc_op(s);
7032             gen_jmp_im(s, pc_start - s->cs_base);
7033             gen_helper_pause(cpu_env, tcg_const_i32(s->pc - pc_start));
7034             s->base.is_jmp = DISAS_NORETURN;
7035         }
7036         break;
7037     case 0x9b: /* fwait */
7038         if ((s->flags & (HF_MP_MASK | HF_TS_MASK)) ==
7039             (HF_MP_MASK | HF_TS_MASK)) {
7040             gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
7041         } else {
7042             gen_helper_fwait(cpu_env);
7043         }
7044         break;
7045     case 0xcc: /* int3 */
7046         gen_interrupt(s, EXCP03_INT3, pc_start - s->cs_base, s->pc - s->cs_base);
7047         break;
7048     case 0xcd: /* int N */
7049         val = x86_ldub_code(env, s);
7050         if (s->vm86 && s->iopl != 3) {
7051             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7052         } else {
7053             gen_interrupt(s, val, pc_start - s->cs_base, s->pc - s->cs_base);
7054         }
7055         break;
7056     case 0xce: /* into */
7057         if (CODE64(s))
7058             goto illegal_op;
7059         gen_update_cc_op(s);
7060         gen_jmp_im(s, pc_start - s->cs_base);
7061         gen_helper_into(cpu_env, tcg_const_i32(s->pc - pc_start));
7062         break;
7063 #ifdef WANT_ICEBP
7064     case 0xf1: /* icebp (undocumented, exits to external debugger) */
7065         gen_svm_check_intercept(s, pc_start, SVM_EXIT_ICEBP);
7066         gen_debug(s, pc_start - s->cs_base);
7067         break;
7068 #endif
7069     case 0xfa: /* cli */
7070         if (!s->vm86) {
7071             if (s->cpl <= s->iopl) {
7072                 gen_helper_cli(cpu_env);
7073             } else {
7074                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7075             }
7076         } else {
7077             if (s->iopl == 3) {
7078                 gen_helper_cli(cpu_env);
7079             } else {
7080                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7081             }
7082         }
7083         break;
7084     case 0xfb: /* sti */
7085         if (s->vm86 ? s->iopl == 3 : s->cpl <= s->iopl) {
7086             gen_helper_sti(cpu_env);
7087             /* interruptions are enabled only the first insn after sti */
7088             gen_jmp_im(s, s->pc - s->cs_base);
7089             gen_eob_inhibit_irq(s, true);
7090         } else {
7091             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7092         }
7093         break;
7094     case 0x62: /* bound */
7095         if (CODE64(s))
7096             goto illegal_op;
7097         ot = dflag;
7098         modrm = x86_ldub_code(env, s);
7099         reg = (modrm >> 3) & 7;
7100         mod = (modrm >> 6) & 3;
7101         if (mod == 3)
7102             goto illegal_op;
7103         gen_op_mov_v_reg(s, ot, s->T0, reg);
7104         gen_lea_modrm(env, s, modrm);
7105         tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
7106         if (ot == MO_16) {
7107             gen_helper_boundw(cpu_env, s->A0, s->tmp2_i32);
7108         } else {
7109             gen_helper_boundl(cpu_env, s->A0, s->tmp2_i32);
7110         }
7111         break;
7112     case 0x1c8 ... 0x1cf: /* bswap reg */
7113         reg = (b & 7) | REX_B(s);
7114 #ifdef TARGET_X86_64
7115         if (dflag == MO_64) {
7116             gen_op_mov_v_reg(s, MO_64, s->T0, reg);
7117             tcg_gen_bswap64_i64(s->T0, s->T0);
7118             gen_op_mov_reg_v(s, MO_64, reg, s->T0);
7119         } else
7120 #endif
7121         {
7122             gen_op_mov_v_reg(s, MO_32, s->T0, reg);
7123             tcg_gen_ext32u_tl(s->T0, s->T0);
7124             tcg_gen_bswap32_tl(s->T0, s->T0);
7125             gen_op_mov_reg_v(s, MO_32, reg, s->T0);
7126         }
7127         break;
7128     case 0xd6: /* salc */
7129         if (CODE64(s))
7130             goto illegal_op;
7131         gen_compute_eflags_c(s, s->T0);
7132         tcg_gen_neg_tl(s->T0, s->T0);
7133         gen_op_mov_reg_v(s, MO_8, R_EAX, s->T0);
7134         break;
7135     case 0xe0: /* loopnz */
7136     case 0xe1: /* loopz */
7137     case 0xe2: /* loop */
7138     case 0xe3: /* jecxz */
7139         {
7140             TCGLabel *l1, *l2, *l3;
7141 
7142             tval = (int8_t)insn_get(env, s, MO_8);
7143             next_eip = s->pc - s->cs_base;
7144             tval += next_eip;
7145             if (dflag == MO_16) {
7146                 tval &= 0xffff;
7147             }
7148 
7149             l1 = gen_new_label();
7150             l2 = gen_new_label();
7151             l3 = gen_new_label();
7152             gen_update_cc_op(s);
7153             b &= 3;
7154             switch(b) {
7155             case 0: /* loopnz */
7156             case 1: /* loopz */
7157                 gen_op_add_reg_im(s, s->aflag, R_ECX, -1);
7158                 gen_op_jz_ecx(s, s->aflag, l3);
7159                 gen_jcc1(s, (JCC_Z << 1) | (b ^ 1), l1);
7160                 break;
7161             case 2: /* loop */
7162                 gen_op_add_reg_im(s, s->aflag, R_ECX, -1);
7163                 gen_op_jnz_ecx(s, s->aflag, l1);
7164                 break;
7165             default:
7166             case 3: /* jcxz */
7167                 gen_op_jz_ecx(s, s->aflag, l1);
7168                 break;
7169             }
7170 
7171             gen_set_label(l3);
7172             gen_jmp_im(s, next_eip);
7173             tcg_gen_br(l2);
7174 
7175             gen_set_label(l1);
7176             gen_jmp_im(s, tval);
7177             gen_set_label(l2);
7178             gen_eob(s);
7179         }
7180         break;
7181     case 0x130: /* wrmsr */
7182     case 0x132: /* rdmsr */
7183         if (s->cpl != 0) {
7184             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7185         } else {
7186             gen_update_cc_op(s);
7187             gen_jmp_im(s, pc_start - s->cs_base);
7188             if (b & 2) {
7189                 gen_helper_rdmsr(cpu_env);
7190             } else {
7191                 gen_helper_wrmsr(cpu_env);
7192             }
7193         }
7194         break;
7195     case 0x131: /* rdtsc */
7196         gen_update_cc_op(s);
7197         gen_jmp_im(s, pc_start - s->cs_base);
7198         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7199             gen_io_start();
7200         }
7201         gen_helper_rdtsc(cpu_env);
7202         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7203             gen_jmp(s, s->pc - s->cs_base);
7204         }
7205         break;
7206     case 0x133: /* rdpmc */
7207         gen_update_cc_op(s);
7208         gen_jmp_im(s, pc_start - s->cs_base);
7209         gen_helper_rdpmc(cpu_env);
7210         break;
7211     case 0x134: /* sysenter */
7212         /* For Intel SYSENTER is valid on 64-bit */
7213         if (CODE64(s) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1)
7214             goto illegal_op;
7215         if (!s->pe) {
7216             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7217         } else {
7218             gen_helper_sysenter(cpu_env);
7219             gen_eob(s);
7220         }
7221         break;
7222     case 0x135: /* sysexit */
7223         /* For Intel SYSEXIT is valid on 64-bit */
7224         if (CODE64(s) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1)
7225             goto illegal_op;
7226         if (!s->pe) {
7227             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7228         } else {
7229             gen_helper_sysexit(cpu_env, tcg_const_i32(dflag - 1));
7230             gen_eob(s);
7231         }
7232         break;
7233 #ifdef TARGET_X86_64
7234     case 0x105: /* syscall */
7235         /* XXX: is it usable in real mode ? */
7236         gen_update_cc_op(s);
7237         gen_jmp_im(s, pc_start - s->cs_base);
7238         gen_helper_syscall(cpu_env, tcg_const_i32(s->pc - pc_start));
7239         /* TF handling for the syscall insn is different. The TF bit is  checked
7240            after the syscall insn completes. This allows #DB to not be
7241            generated after one has entered CPL0 if TF is set in FMASK.  */
7242         gen_eob_worker(s, false, true);
7243         break;
7244     case 0x107: /* sysret */
7245         if (!s->pe) {
7246             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7247         } else {
7248             gen_helper_sysret(cpu_env, tcg_const_i32(dflag - 1));
7249             /* condition codes are modified only in long mode */
7250             if (s->lma) {
7251                 set_cc_op(s, CC_OP_EFLAGS);
7252             }
7253             /* TF handling for the sysret insn is different. The TF bit is
7254                checked after the sysret insn completes. This allows #DB to be
7255                generated "as if" the syscall insn in userspace has just
7256                completed.  */
7257             gen_eob_worker(s, false, true);
7258         }
7259         break;
7260 #endif
7261     case 0x1a2: /* cpuid */
7262         gen_update_cc_op(s);
7263         gen_jmp_im(s, pc_start - s->cs_base);
7264         gen_helper_cpuid(cpu_env);
7265         break;
7266     case 0xf4: /* hlt */
7267         if (s->cpl != 0) {
7268             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7269         } else {
7270             gen_update_cc_op(s);
7271             gen_jmp_im(s, pc_start - s->cs_base);
7272             gen_helper_hlt(cpu_env, tcg_const_i32(s->pc - pc_start));
7273             s->base.is_jmp = DISAS_NORETURN;
7274         }
7275         break;
7276     case 0x100:
7277         modrm = x86_ldub_code(env, s);
7278         mod = (modrm >> 6) & 3;
7279         op = (modrm >> 3) & 7;
7280         switch(op) {
7281         case 0: /* sldt */
7282             if (!s->pe || s->vm86)
7283                 goto illegal_op;
7284             gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_READ);
7285             tcg_gen_ld32u_tl(s->T0, cpu_env,
7286                              offsetof(CPUX86State, ldt.selector));
7287             ot = mod == 3 ? dflag : MO_16;
7288             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7289             break;
7290         case 2: /* lldt */
7291             if (!s->pe || s->vm86)
7292                 goto illegal_op;
7293             if (s->cpl != 0) {
7294                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7295             } else {
7296                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_WRITE);
7297                 gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7298                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
7299                 gen_helper_lldt(cpu_env, s->tmp2_i32);
7300             }
7301             break;
7302         case 1: /* str */
7303             if (!s->pe || s->vm86)
7304                 goto illegal_op;
7305             gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_READ);
7306             tcg_gen_ld32u_tl(s->T0, cpu_env,
7307                              offsetof(CPUX86State, tr.selector));
7308             ot = mod == 3 ? dflag : MO_16;
7309             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7310             break;
7311         case 3: /* ltr */
7312             if (!s->pe || s->vm86)
7313                 goto illegal_op;
7314             if (s->cpl != 0) {
7315                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7316             } else {
7317                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_WRITE);
7318                 gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7319                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
7320                 gen_helper_ltr(cpu_env, s->tmp2_i32);
7321             }
7322             break;
7323         case 4: /* verr */
7324         case 5: /* verw */
7325             if (!s->pe || s->vm86)
7326                 goto illegal_op;
7327             gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7328             gen_update_cc_op(s);
7329             if (op == 4) {
7330                 gen_helper_verr(cpu_env, s->T0);
7331             } else {
7332                 gen_helper_verw(cpu_env, s->T0);
7333             }
7334             set_cc_op(s, CC_OP_EFLAGS);
7335             break;
7336         default:
7337             goto unknown_op;
7338         }
7339         break;
7340 
7341     case 0x101:
7342         modrm = x86_ldub_code(env, s);
7343         switch (modrm) {
7344         CASE_MODRM_MEM_OP(0): /* sgdt */
7345             gen_svm_check_intercept(s, pc_start, SVM_EXIT_GDTR_READ);
7346             gen_lea_modrm(env, s, modrm);
7347             tcg_gen_ld32u_tl(s->T0,
7348                              cpu_env, offsetof(CPUX86State, gdt.limit));
7349             gen_op_st_v(s, MO_16, s->T0, s->A0);
7350             gen_add_A0_im(s, 2);
7351             tcg_gen_ld_tl(s->T0, cpu_env, offsetof(CPUX86State, gdt.base));
7352             if (dflag == MO_16) {
7353                 tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
7354             }
7355             gen_op_st_v(s, CODE64(s) + MO_32, s->T0, s->A0);
7356             break;
7357 
7358         case 0xc8: /* monitor */
7359             if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) || s->cpl != 0) {
7360                 goto illegal_op;
7361             }
7362             gen_update_cc_op(s);
7363             gen_jmp_im(s, pc_start - s->cs_base);
7364             tcg_gen_mov_tl(s->A0, cpu_regs[R_EAX]);
7365             gen_extu(s->aflag, s->A0);
7366             gen_add_A0_ds_seg(s);
7367             gen_helper_monitor(cpu_env, s->A0);
7368             break;
7369 
7370         case 0xc9: /* mwait */
7371             if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) || s->cpl != 0) {
7372                 goto illegal_op;
7373             }
7374             gen_update_cc_op(s);
7375             gen_jmp_im(s, pc_start - s->cs_base);
7376             gen_helper_mwait(cpu_env, tcg_const_i32(s->pc - pc_start));
7377             gen_eob(s);
7378             break;
7379 
7380         case 0xca: /* clac */
7381             if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SMAP)
7382                 || s->cpl != 0) {
7383                 goto illegal_op;
7384             }
7385             gen_helper_clac(cpu_env);
7386             gen_jmp_im(s, s->pc - s->cs_base);
7387             gen_eob(s);
7388             break;
7389 
7390         case 0xcb: /* stac */
7391             if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SMAP)
7392                 || s->cpl != 0) {
7393                 goto illegal_op;
7394             }
7395             gen_helper_stac(cpu_env);
7396             gen_jmp_im(s, s->pc - s->cs_base);
7397             gen_eob(s);
7398             break;
7399 
7400         CASE_MODRM_MEM_OP(1): /* sidt */
7401             gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_READ);
7402             gen_lea_modrm(env, s, modrm);
7403             tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.limit));
7404             gen_op_st_v(s, MO_16, s->T0, s->A0);
7405             gen_add_A0_im(s, 2);
7406             tcg_gen_ld_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.base));
7407             if (dflag == MO_16) {
7408                 tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
7409             }
7410             gen_op_st_v(s, CODE64(s) + MO_32, s->T0, s->A0);
7411             break;
7412 
7413         case 0xd0: /* xgetbv */
7414             if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
7415                 || (s->prefix & (PREFIX_LOCK | PREFIX_DATA
7416                                  | PREFIX_REPZ | PREFIX_REPNZ))) {
7417                 goto illegal_op;
7418             }
7419             tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
7420             gen_helper_xgetbv(s->tmp1_i64, cpu_env, s->tmp2_i32);
7421             tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], s->tmp1_i64);
7422             break;
7423 
7424         case 0xd1: /* xsetbv */
7425             if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
7426                 || (s->prefix & (PREFIX_LOCK | PREFIX_DATA
7427                                  | PREFIX_REPZ | PREFIX_REPNZ))) {
7428                 goto illegal_op;
7429             }
7430             if (s->cpl != 0) {
7431                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7432                 break;
7433             }
7434             tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
7435                                   cpu_regs[R_EDX]);
7436             tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
7437             gen_helper_xsetbv(cpu_env, s->tmp2_i32, s->tmp1_i64);
7438             /* End TB because translation flags may change.  */
7439             gen_jmp_im(s, s->pc - s->cs_base);
7440             gen_eob(s);
7441             break;
7442 
7443         case 0xd8: /* VMRUN */
7444             if (!(s->flags & HF_SVME_MASK) || !s->pe) {
7445                 goto illegal_op;
7446             }
7447             if (s->cpl != 0) {
7448                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7449                 break;
7450             }
7451             gen_update_cc_op(s);
7452             gen_jmp_im(s, pc_start - s->cs_base);
7453             gen_helper_vmrun(cpu_env, tcg_const_i32(s->aflag - 1),
7454                              tcg_const_i32(s->pc - pc_start));
7455             tcg_gen_exit_tb(NULL, 0);
7456             s->base.is_jmp = DISAS_NORETURN;
7457             break;
7458 
7459         case 0xd9: /* VMMCALL */
7460             if (!(s->flags & HF_SVME_MASK)) {
7461                 goto illegal_op;
7462             }
7463             gen_update_cc_op(s);
7464             gen_jmp_im(s, pc_start - s->cs_base);
7465             gen_helper_vmmcall(cpu_env);
7466             break;
7467 
7468         case 0xda: /* VMLOAD */
7469             if (!(s->flags & HF_SVME_MASK) || !s->pe) {
7470                 goto illegal_op;
7471             }
7472             if (s->cpl != 0) {
7473                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7474                 break;
7475             }
7476             gen_update_cc_op(s);
7477             gen_jmp_im(s, pc_start - s->cs_base);
7478             gen_helper_vmload(cpu_env, tcg_const_i32(s->aflag - 1));
7479             break;
7480 
7481         case 0xdb: /* VMSAVE */
7482             if (!(s->flags & HF_SVME_MASK) || !s->pe) {
7483                 goto illegal_op;
7484             }
7485             if (s->cpl != 0) {
7486                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7487                 break;
7488             }
7489             gen_update_cc_op(s);
7490             gen_jmp_im(s, pc_start - s->cs_base);
7491             gen_helper_vmsave(cpu_env, tcg_const_i32(s->aflag - 1));
7492             break;
7493 
7494         case 0xdc: /* STGI */
7495             if ((!(s->flags & HF_SVME_MASK)
7496                    && !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT))
7497                 || !s->pe) {
7498                 goto illegal_op;
7499             }
7500             if (s->cpl != 0) {
7501                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7502                 break;
7503             }
7504             gen_update_cc_op(s);
7505             gen_helper_stgi(cpu_env);
7506             gen_jmp_im(s, s->pc - s->cs_base);
7507             gen_eob(s);
7508             break;
7509 
7510         case 0xdd: /* CLGI */
7511             if (!(s->flags & HF_SVME_MASK) || !s->pe) {
7512                 goto illegal_op;
7513             }
7514             if (s->cpl != 0) {
7515                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7516                 break;
7517             }
7518             gen_update_cc_op(s);
7519             gen_jmp_im(s, pc_start - s->cs_base);
7520             gen_helper_clgi(cpu_env);
7521             break;
7522 
7523         case 0xde: /* SKINIT */
7524             if ((!(s->flags & HF_SVME_MASK)
7525                  && !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT))
7526                 || !s->pe) {
7527                 goto illegal_op;
7528             }
7529             gen_update_cc_op(s);
7530             gen_jmp_im(s, pc_start - s->cs_base);
7531             gen_helper_skinit(cpu_env);
7532             break;
7533 
7534         case 0xdf: /* INVLPGA */
7535             if (!(s->flags & HF_SVME_MASK) || !s->pe) {
7536                 goto illegal_op;
7537             }
7538             if (s->cpl != 0) {
7539                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7540                 break;
7541             }
7542             gen_update_cc_op(s);
7543             gen_jmp_im(s, pc_start - s->cs_base);
7544             gen_helper_invlpga(cpu_env, tcg_const_i32(s->aflag - 1));
7545             break;
7546 
7547         CASE_MODRM_MEM_OP(2): /* lgdt */
7548             if (s->cpl != 0) {
7549                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7550                 break;
7551             }
7552             gen_svm_check_intercept(s, pc_start, SVM_EXIT_GDTR_WRITE);
7553             gen_lea_modrm(env, s, modrm);
7554             gen_op_ld_v(s, MO_16, s->T1, s->A0);
7555             gen_add_A0_im(s, 2);
7556             gen_op_ld_v(s, CODE64(s) + MO_32, s->T0, s->A0);
7557             if (dflag == MO_16) {
7558                 tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
7559             }
7560             tcg_gen_st_tl(s->T0, cpu_env, offsetof(CPUX86State, gdt.base));
7561             tcg_gen_st32_tl(s->T1, cpu_env, offsetof(CPUX86State, gdt.limit));
7562             break;
7563 
7564         CASE_MODRM_MEM_OP(3): /* lidt */
7565             if (s->cpl != 0) {
7566                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7567                 break;
7568             }
7569             gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_WRITE);
7570             gen_lea_modrm(env, s, modrm);
7571             gen_op_ld_v(s, MO_16, s->T1, s->A0);
7572             gen_add_A0_im(s, 2);
7573             gen_op_ld_v(s, CODE64(s) + MO_32, s->T0, s->A0);
7574             if (dflag == MO_16) {
7575                 tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
7576             }
7577             tcg_gen_st_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.base));
7578             tcg_gen_st32_tl(s->T1, cpu_env, offsetof(CPUX86State, idt.limit));
7579             break;
7580 
7581         CASE_MODRM_OP(4): /* smsw */
7582             gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_CR0);
7583             tcg_gen_ld_tl(s->T0, cpu_env, offsetof(CPUX86State, cr[0]));
7584             /*
7585              * In 32-bit mode, the higher 16 bits of the destination
7586              * register are undefined.  In practice CR0[31:0] is stored
7587              * just like in 64-bit mode.
7588              */
7589             mod = (modrm >> 6) & 3;
7590             ot = (mod != 3 ? MO_16 : s->dflag);
7591             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7592             break;
7593         case 0xee: /* rdpkru */
7594             if (prefixes & PREFIX_LOCK) {
7595                 goto illegal_op;
7596             }
7597             tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
7598             gen_helper_rdpkru(s->tmp1_i64, cpu_env, s->tmp2_i32);
7599             tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], s->tmp1_i64);
7600             break;
7601         case 0xef: /* wrpkru */
7602             if (prefixes & PREFIX_LOCK) {
7603                 goto illegal_op;
7604             }
7605             tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
7606                                   cpu_regs[R_EDX]);
7607             tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
7608             gen_helper_wrpkru(cpu_env, s->tmp2_i32, s->tmp1_i64);
7609             break;
7610         CASE_MODRM_OP(6): /* lmsw */
7611             if (s->cpl != 0) {
7612                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7613                 break;
7614             }
7615             gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0);
7616             gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7617             gen_helper_lmsw(cpu_env, s->T0);
7618             gen_jmp_im(s, s->pc - s->cs_base);
7619             gen_eob(s);
7620             break;
7621 
7622         CASE_MODRM_MEM_OP(7): /* invlpg */
7623             if (s->cpl != 0) {
7624                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7625                 break;
7626             }
7627             gen_update_cc_op(s);
7628             gen_jmp_im(s, pc_start - s->cs_base);
7629             gen_lea_modrm(env, s, modrm);
7630             gen_helper_invlpg(cpu_env, s->A0);
7631             gen_jmp_im(s, s->pc - s->cs_base);
7632             gen_eob(s);
7633             break;
7634 
7635         case 0xf8: /* swapgs */
7636 #ifdef TARGET_X86_64
7637             if (CODE64(s)) {
7638                 if (s->cpl != 0) {
7639                     gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7640                 } else {
7641                     tcg_gen_mov_tl(s->T0, cpu_seg_base[R_GS]);
7642                     tcg_gen_ld_tl(cpu_seg_base[R_GS], cpu_env,
7643                                   offsetof(CPUX86State, kernelgsbase));
7644                     tcg_gen_st_tl(s->T0, cpu_env,
7645                                   offsetof(CPUX86State, kernelgsbase));
7646                 }
7647                 break;
7648             }
7649 #endif
7650             goto illegal_op;
7651 
7652         case 0xf9: /* rdtscp */
7653             if (!(s->cpuid_ext2_features & CPUID_EXT2_RDTSCP)) {
7654                 goto illegal_op;
7655             }
7656             gen_update_cc_op(s);
7657             gen_jmp_im(s, pc_start - s->cs_base);
7658             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7659                 gen_io_start();
7660             }
7661             gen_helper_rdtscp(cpu_env);
7662             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7663                 gen_jmp(s, s->pc - s->cs_base);
7664             }
7665             break;
7666 
7667         default:
7668             goto unknown_op;
7669         }
7670         break;
7671 
7672     case 0x108: /* invd */
7673     case 0x109: /* wbinvd */
7674         if (s->cpl != 0) {
7675             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7676         } else {
7677             gen_svm_check_intercept(s, pc_start, (b & 2) ? SVM_EXIT_INVD : SVM_EXIT_WBINVD);
7678             /* nothing to do */
7679         }
7680         break;
7681     case 0x63: /* arpl or movslS (x86_64) */
7682 #ifdef TARGET_X86_64
7683         if (CODE64(s)) {
7684             int d_ot;
7685             /* d_ot is the size of destination */
7686             d_ot = dflag;
7687 
7688             modrm = x86_ldub_code(env, s);
7689             reg = ((modrm >> 3) & 7) | rex_r;
7690             mod = (modrm >> 6) & 3;
7691             rm = (modrm & 7) | REX_B(s);
7692 
7693             if (mod == 3) {
7694                 gen_op_mov_v_reg(s, MO_32, s->T0, rm);
7695                 /* sign extend */
7696                 if (d_ot == MO_64) {
7697                     tcg_gen_ext32s_tl(s->T0, s->T0);
7698                 }
7699                 gen_op_mov_reg_v(s, d_ot, reg, s->T0);
7700             } else {
7701                 gen_lea_modrm(env, s, modrm);
7702                 gen_op_ld_v(s, MO_32 | MO_SIGN, s->T0, s->A0);
7703                 gen_op_mov_reg_v(s, d_ot, reg, s->T0);
7704             }
7705         } else
7706 #endif
7707         {
7708             TCGLabel *label1;
7709             TCGv t0, t1, t2, a0;
7710 
7711             if (!s->pe || s->vm86)
7712                 goto illegal_op;
7713             t0 = tcg_temp_local_new();
7714             t1 = tcg_temp_local_new();
7715             t2 = tcg_temp_local_new();
7716             ot = MO_16;
7717             modrm = x86_ldub_code(env, s);
7718             reg = (modrm >> 3) & 7;
7719             mod = (modrm >> 6) & 3;
7720             rm = modrm & 7;
7721             if (mod != 3) {
7722                 gen_lea_modrm(env, s, modrm);
7723                 gen_op_ld_v(s, ot, t0, s->A0);
7724                 a0 = tcg_temp_local_new();
7725                 tcg_gen_mov_tl(a0, s->A0);
7726             } else {
7727                 gen_op_mov_v_reg(s, ot, t0, rm);
7728                 a0 = NULL;
7729             }
7730             gen_op_mov_v_reg(s, ot, t1, reg);
7731             tcg_gen_andi_tl(s->tmp0, t0, 3);
7732             tcg_gen_andi_tl(t1, t1, 3);
7733             tcg_gen_movi_tl(t2, 0);
7734             label1 = gen_new_label();
7735             tcg_gen_brcond_tl(TCG_COND_GE, s->tmp0, t1, label1);
7736             tcg_gen_andi_tl(t0, t0, ~3);
7737             tcg_gen_or_tl(t0, t0, t1);
7738             tcg_gen_movi_tl(t2, CC_Z);
7739             gen_set_label(label1);
7740             if (mod != 3) {
7741                 gen_op_st_v(s, ot, t0, a0);
7742                 tcg_temp_free(a0);
7743            } else {
7744                 gen_op_mov_reg_v(s, ot, rm, t0);
7745             }
7746             gen_compute_eflags(s);
7747             tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_Z);
7748             tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t2);
7749             tcg_temp_free(t0);
7750             tcg_temp_free(t1);
7751             tcg_temp_free(t2);
7752         }
7753         break;
7754     case 0x102: /* lar */
7755     case 0x103: /* lsl */
7756         {
7757             TCGLabel *label1;
7758             TCGv t0;
7759             if (!s->pe || s->vm86)
7760                 goto illegal_op;
7761             ot = dflag != MO_16 ? MO_32 : MO_16;
7762             modrm = x86_ldub_code(env, s);
7763             reg = ((modrm >> 3) & 7) | rex_r;
7764             gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7765             t0 = tcg_temp_local_new();
7766             gen_update_cc_op(s);
7767             if (b == 0x102) {
7768                 gen_helper_lar(t0, cpu_env, s->T0);
7769             } else {
7770                 gen_helper_lsl(t0, cpu_env, s->T0);
7771             }
7772             tcg_gen_andi_tl(s->tmp0, cpu_cc_src, CC_Z);
7773             label1 = gen_new_label();
7774             tcg_gen_brcondi_tl(TCG_COND_EQ, s->tmp0, 0, label1);
7775             gen_op_mov_reg_v(s, ot, reg, t0);
7776             gen_set_label(label1);
7777             set_cc_op(s, CC_OP_EFLAGS);
7778             tcg_temp_free(t0);
7779         }
7780         break;
7781     case 0x118:
7782         modrm = x86_ldub_code(env, s);
7783         mod = (modrm >> 6) & 3;
7784         op = (modrm >> 3) & 7;
7785         switch(op) {
7786         case 0: /* prefetchnta */
7787         case 1: /* prefetchnt0 */
7788         case 2: /* prefetchnt0 */
7789         case 3: /* prefetchnt0 */
7790             if (mod == 3)
7791                 goto illegal_op;
7792             gen_nop_modrm(env, s, modrm);
7793             /* nothing more to do */
7794             break;
7795         default: /* nop (multi byte) */
7796             gen_nop_modrm(env, s, modrm);
7797             break;
7798         }
7799         break;
7800     case 0x11a:
7801         modrm = x86_ldub_code(env, s);
7802         if (s->flags & HF_MPX_EN_MASK) {
7803             mod = (modrm >> 6) & 3;
7804             reg = ((modrm >> 3) & 7) | rex_r;
7805             if (prefixes & PREFIX_REPZ) {
7806                 /* bndcl */
7807                 if (reg >= 4
7808                     || (prefixes & PREFIX_LOCK)
7809                     || s->aflag == MO_16) {
7810                     goto illegal_op;
7811                 }
7812                 gen_bndck(env, s, modrm, TCG_COND_LTU, cpu_bndl[reg]);
7813             } else if (prefixes & PREFIX_REPNZ) {
7814                 /* bndcu */
7815                 if (reg >= 4
7816                     || (prefixes & PREFIX_LOCK)
7817                     || s->aflag == MO_16) {
7818                     goto illegal_op;
7819                 }
7820                 TCGv_i64 notu = tcg_temp_new_i64();
7821                 tcg_gen_not_i64(notu, cpu_bndu[reg]);
7822                 gen_bndck(env, s, modrm, TCG_COND_GTU, notu);
7823                 tcg_temp_free_i64(notu);
7824             } else if (prefixes & PREFIX_DATA) {
7825                 /* bndmov -- from reg/mem */
7826                 if (reg >= 4 || s->aflag == MO_16) {
7827                     goto illegal_op;
7828                 }
7829                 if (mod == 3) {
7830                     int reg2 = (modrm & 7) | REX_B(s);
7831                     if (reg2 >= 4 || (prefixes & PREFIX_LOCK)) {
7832                         goto illegal_op;
7833                     }
7834                     if (s->flags & HF_MPX_IU_MASK) {
7835                         tcg_gen_mov_i64(cpu_bndl[reg], cpu_bndl[reg2]);
7836                         tcg_gen_mov_i64(cpu_bndu[reg], cpu_bndu[reg2]);
7837                     }
7838                 } else {
7839                     gen_lea_modrm(env, s, modrm);
7840                     if (CODE64(s)) {
7841                         tcg_gen_qemu_ld_i64(cpu_bndl[reg], s->A0,
7842                                             s->mem_index, MO_LEQ);
7843                         tcg_gen_addi_tl(s->A0, s->A0, 8);
7844                         tcg_gen_qemu_ld_i64(cpu_bndu[reg], s->A0,
7845                                             s->mem_index, MO_LEQ);
7846                     } else {
7847                         tcg_gen_qemu_ld_i64(cpu_bndl[reg], s->A0,
7848                                             s->mem_index, MO_LEUL);
7849                         tcg_gen_addi_tl(s->A0, s->A0, 4);
7850                         tcg_gen_qemu_ld_i64(cpu_bndu[reg], s->A0,
7851                                             s->mem_index, MO_LEUL);
7852                     }
7853                     /* bnd registers are now in-use */
7854                     gen_set_hflag(s, HF_MPX_IU_MASK);
7855                 }
7856             } else if (mod != 3) {
7857                 /* bndldx */
7858                 AddressParts a = gen_lea_modrm_0(env, s, modrm);
7859                 if (reg >= 4
7860                     || (prefixes & PREFIX_LOCK)
7861                     || s->aflag == MO_16
7862                     || a.base < -1) {
7863                     goto illegal_op;
7864                 }
7865                 if (a.base >= 0) {
7866                     tcg_gen_addi_tl(s->A0, cpu_regs[a.base], a.disp);
7867                 } else {
7868                     tcg_gen_movi_tl(s->A0, 0);
7869                 }
7870                 gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
7871                 if (a.index >= 0) {
7872                     tcg_gen_mov_tl(s->T0, cpu_regs[a.index]);
7873                 } else {
7874                     tcg_gen_movi_tl(s->T0, 0);
7875                 }
7876                 if (CODE64(s)) {
7877                     gen_helper_bndldx64(cpu_bndl[reg], cpu_env, s->A0, s->T0);
7878                     tcg_gen_ld_i64(cpu_bndu[reg], cpu_env,
7879                                    offsetof(CPUX86State, mmx_t0.MMX_Q(0)));
7880                 } else {
7881                     gen_helper_bndldx32(cpu_bndu[reg], cpu_env, s->A0, s->T0);
7882                     tcg_gen_ext32u_i64(cpu_bndl[reg], cpu_bndu[reg]);
7883                     tcg_gen_shri_i64(cpu_bndu[reg], cpu_bndu[reg], 32);
7884                 }
7885                 gen_set_hflag(s, HF_MPX_IU_MASK);
7886             }
7887         }
7888         gen_nop_modrm(env, s, modrm);
7889         break;
7890     case 0x11b:
7891         modrm = x86_ldub_code(env, s);
7892         if (s->flags & HF_MPX_EN_MASK) {
7893             mod = (modrm >> 6) & 3;
7894             reg = ((modrm >> 3) & 7) | rex_r;
7895             if (mod != 3 && (prefixes & PREFIX_REPZ)) {
7896                 /* bndmk */
7897                 if (reg >= 4
7898                     || (prefixes & PREFIX_LOCK)
7899                     || s->aflag == MO_16) {
7900                     goto illegal_op;
7901                 }
7902                 AddressParts a = gen_lea_modrm_0(env, s, modrm);
7903                 if (a.base >= 0) {
7904                     tcg_gen_extu_tl_i64(cpu_bndl[reg], cpu_regs[a.base]);
7905                     if (!CODE64(s)) {
7906                         tcg_gen_ext32u_i64(cpu_bndl[reg], cpu_bndl[reg]);
7907                     }
7908                 } else if (a.base == -1) {
7909                     /* no base register has lower bound of 0 */
7910                     tcg_gen_movi_i64(cpu_bndl[reg], 0);
7911                 } else {
7912                     /* rip-relative generates #ud */
7913                     goto illegal_op;
7914                 }
7915                 tcg_gen_not_tl(s->A0, gen_lea_modrm_1(s, a));
7916                 if (!CODE64(s)) {
7917                     tcg_gen_ext32u_tl(s->A0, s->A0);
7918                 }
7919                 tcg_gen_extu_tl_i64(cpu_bndu[reg], s->A0);
7920                 /* bnd registers are now in-use */
7921                 gen_set_hflag(s, HF_MPX_IU_MASK);
7922                 break;
7923             } else if (prefixes & PREFIX_REPNZ) {
7924                 /* bndcn */
7925                 if (reg >= 4
7926                     || (prefixes & PREFIX_LOCK)
7927                     || s->aflag == MO_16) {
7928                     goto illegal_op;
7929                 }
7930                 gen_bndck(env, s, modrm, TCG_COND_GTU, cpu_bndu[reg]);
7931             } else if (prefixes & PREFIX_DATA) {
7932                 /* bndmov -- to reg/mem */
7933                 if (reg >= 4 || s->aflag == MO_16) {
7934                     goto illegal_op;
7935                 }
7936                 if (mod == 3) {
7937                     int reg2 = (modrm & 7) | REX_B(s);
7938                     if (reg2 >= 4 || (prefixes & PREFIX_LOCK)) {
7939                         goto illegal_op;
7940                     }
7941                     if (s->flags & HF_MPX_IU_MASK) {
7942                         tcg_gen_mov_i64(cpu_bndl[reg2], cpu_bndl[reg]);
7943                         tcg_gen_mov_i64(cpu_bndu[reg2], cpu_bndu[reg]);
7944                     }
7945                 } else {
7946                     gen_lea_modrm(env, s, modrm);
7947                     if (CODE64(s)) {
7948                         tcg_gen_qemu_st_i64(cpu_bndl[reg], s->A0,
7949                                             s->mem_index, MO_LEQ);
7950                         tcg_gen_addi_tl(s->A0, s->A0, 8);
7951                         tcg_gen_qemu_st_i64(cpu_bndu[reg], s->A0,
7952                                             s->mem_index, MO_LEQ);
7953                     } else {
7954                         tcg_gen_qemu_st_i64(cpu_bndl[reg], s->A0,
7955                                             s->mem_index, MO_LEUL);
7956                         tcg_gen_addi_tl(s->A0, s->A0, 4);
7957                         tcg_gen_qemu_st_i64(cpu_bndu[reg], s->A0,
7958                                             s->mem_index, MO_LEUL);
7959                     }
7960                 }
7961             } else if (mod != 3) {
7962                 /* bndstx */
7963                 AddressParts a = gen_lea_modrm_0(env, s, modrm);
7964                 if (reg >= 4
7965                     || (prefixes & PREFIX_LOCK)
7966                     || s->aflag == MO_16
7967                     || a.base < -1) {
7968                     goto illegal_op;
7969                 }
7970                 if (a.base >= 0) {
7971                     tcg_gen_addi_tl(s->A0, cpu_regs[a.base], a.disp);
7972                 } else {
7973                     tcg_gen_movi_tl(s->A0, 0);
7974                 }
7975                 gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
7976                 if (a.index >= 0) {
7977                     tcg_gen_mov_tl(s->T0, cpu_regs[a.index]);
7978                 } else {
7979                     tcg_gen_movi_tl(s->T0, 0);
7980                 }
7981                 if (CODE64(s)) {
7982                     gen_helper_bndstx64(cpu_env, s->A0, s->T0,
7983                                         cpu_bndl[reg], cpu_bndu[reg]);
7984                 } else {
7985                     gen_helper_bndstx32(cpu_env, s->A0, s->T0,
7986                                         cpu_bndl[reg], cpu_bndu[reg]);
7987                 }
7988             }
7989         }
7990         gen_nop_modrm(env, s, modrm);
7991         break;
7992     case 0x119: case 0x11c ... 0x11f: /* nop (multi byte) */
7993         modrm = x86_ldub_code(env, s);
7994         gen_nop_modrm(env, s, modrm);
7995         break;
7996     case 0x120: /* mov reg, crN */
7997     case 0x122: /* mov crN, reg */
7998         if (s->cpl != 0) {
7999             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
8000         } else {
8001             modrm = x86_ldub_code(env, s);
8002             /* Ignore the mod bits (assume (modrm&0xc0)==0xc0).
8003              * AMD documentation (24594.pdf) and testing of
8004              * intel 386 and 486 processors all show that the mod bits
8005              * are assumed to be 1's, regardless of actual values.
8006              */
8007             rm = (modrm & 7) | REX_B(s);
8008             reg = ((modrm >> 3) & 7) | rex_r;
8009             if (CODE64(s))
8010                 ot = MO_64;
8011             else
8012                 ot = MO_32;
8013             if ((prefixes & PREFIX_LOCK) && (reg == 0) &&
8014                 (s->cpuid_ext3_features & CPUID_EXT3_CR8LEG)) {
8015                 reg = 8;
8016             }
8017             switch(reg) {
8018             case 0:
8019             case 2:
8020             case 3:
8021             case 4:
8022             case 8:
8023                 gen_update_cc_op(s);
8024                 gen_jmp_im(s, pc_start - s->cs_base);
8025                 if (b & 2) {
8026                     if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
8027                         gen_io_start();
8028                     }
8029                     gen_op_mov_v_reg(s, ot, s->T0, rm);
8030                     gen_helper_write_crN(cpu_env, tcg_const_i32(reg),
8031                                          s->T0);
8032                     gen_jmp_im(s, s->pc - s->cs_base);
8033                     gen_eob(s);
8034                 } else {
8035                     if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
8036                         gen_io_start();
8037                     }
8038                     gen_helper_read_crN(s->T0, cpu_env, tcg_const_i32(reg));
8039                     gen_op_mov_reg_v(s, ot, rm, s->T0);
8040                     if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
8041                         gen_jmp(s, s->pc - s->cs_base);
8042                     }
8043                 }
8044                 break;
8045             default:
8046                 goto unknown_op;
8047             }
8048         }
8049         break;
8050     case 0x121: /* mov reg, drN */
8051     case 0x123: /* mov drN, reg */
8052         if (s->cpl != 0) {
8053             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
8054         } else {
8055             modrm = x86_ldub_code(env, s);
8056             /* Ignore the mod bits (assume (modrm&0xc0)==0xc0).
8057              * AMD documentation (24594.pdf) and testing of
8058              * intel 386 and 486 processors all show that the mod bits
8059              * are assumed to be 1's, regardless of actual values.
8060              */
8061             rm = (modrm & 7) | REX_B(s);
8062             reg = ((modrm >> 3) & 7) | rex_r;
8063             if (CODE64(s))
8064                 ot = MO_64;
8065             else
8066                 ot = MO_32;
8067             if (reg >= 8) {
8068                 goto illegal_op;
8069             }
8070             if (b & 2) {
8071                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_DR0 + reg);
8072                 gen_op_mov_v_reg(s, ot, s->T0, rm);
8073                 tcg_gen_movi_i32(s->tmp2_i32, reg);
8074                 gen_helper_set_dr(cpu_env, s->tmp2_i32, s->T0);
8075                 gen_jmp_im(s, s->pc - s->cs_base);
8076                 gen_eob(s);
8077             } else {
8078                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_DR0 + reg);
8079                 tcg_gen_movi_i32(s->tmp2_i32, reg);
8080                 gen_helper_get_dr(s->T0, cpu_env, s->tmp2_i32);
8081                 gen_op_mov_reg_v(s, ot, rm, s->T0);
8082             }
8083         }
8084         break;
8085     case 0x106: /* clts */
8086         if (s->cpl != 0) {
8087             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
8088         } else {
8089             gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0);
8090             gen_helper_clts(cpu_env);
8091             /* abort block because static cpu state changed */
8092             gen_jmp_im(s, s->pc - s->cs_base);
8093             gen_eob(s);
8094         }
8095         break;
8096     /* MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4 support */
8097     case 0x1c3: /* MOVNTI reg, mem */
8098         if (!(s->cpuid_features & CPUID_SSE2))
8099             goto illegal_op;
8100         ot = mo_64_32(dflag);
8101         modrm = x86_ldub_code(env, s);
8102         mod = (modrm >> 6) & 3;
8103         if (mod == 3)
8104             goto illegal_op;
8105         reg = ((modrm >> 3) & 7) | rex_r;
8106         /* generate a generic store */
8107         gen_ldst_modrm(env, s, modrm, ot, reg, 1);
8108         break;
8109     case 0x1ae:
8110         modrm = x86_ldub_code(env, s);
8111         switch (modrm) {
8112         CASE_MODRM_MEM_OP(0): /* fxsave */
8113             if (!(s->cpuid_features & CPUID_FXSR)
8114                 || (prefixes & PREFIX_LOCK)) {
8115                 goto illegal_op;
8116             }
8117             if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) {
8118                 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8119                 break;
8120             }
8121             gen_lea_modrm(env, s, modrm);
8122             gen_helper_fxsave(cpu_env, s->A0);
8123             break;
8124 
8125         CASE_MODRM_MEM_OP(1): /* fxrstor */
8126             if (!(s->cpuid_features & CPUID_FXSR)
8127                 || (prefixes & PREFIX_LOCK)) {
8128                 goto illegal_op;
8129             }
8130             if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) {
8131                 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8132                 break;
8133             }
8134             gen_lea_modrm(env, s, modrm);
8135             gen_helper_fxrstor(cpu_env, s->A0);
8136             break;
8137 
8138         CASE_MODRM_MEM_OP(2): /* ldmxcsr */
8139             if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK)) {
8140                 goto illegal_op;
8141             }
8142             if (s->flags & HF_TS_MASK) {
8143                 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8144                 break;
8145             }
8146             gen_lea_modrm(env, s, modrm);
8147             tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUL);
8148             gen_helper_ldmxcsr(cpu_env, s->tmp2_i32);
8149             break;
8150 
8151         CASE_MODRM_MEM_OP(3): /* stmxcsr */
8152             if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK)) {
8153                 goto illegal_op;
8154             }
8155             if (s->flags & HF_TS_MASK) {
8156                 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8157                 break;
8158             }
8159             gen_helper_update_mxcsr(cpu_env);
8160             gen_lea_modrm(env, s, modrm);
8161             tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State, mxcsr));
8162             gen_op_st_v(s, MO_32, s->T0, s->A0);
8163             break;
8164 
8165         CASE_MODRM_MEM_OP(4): /* xsave */
8166             if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
8167                 || (prefixes & (PREFIX_LOCK | PREFIX_DATA
8168                                 | PREFIX_REPZ | PREFIX_REPNZ))) {
8169                 goto illegal_op;
8170             }
8171             gen_lea_modrm(env, s, modrm);
8172             tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
8173                                   cpu_regs[R_EDX]);
8174             gen_helper_xsave(cpu_env, s->A0, s->tmp1_i64);
8175             break;
8176 
8177         CASE_MODRM_MEM_OP(5): /* xrstor */
8178             if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
8179                 || (prefixes & (PREFIX_LOCK | PREFIX_DATA
8180                                 | PREFIX_REPZ | PREFIX_REPNZ))) {
8181                 goto illegal_op;
8182             }
8183             gen_lea_modrm(env, s, modrm);
8184             tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
8185                                   cpu_regs[R_EDX]);
8186             gen_helper_xrstor(cpu_env, s->A0, s->tmp1_i64);
8187             /* XRSTOR is how MPX is enabled, which changes how
8188                we translate.  Thus we need to end the TB.  */
8189             gen_update_cc_op(s);
8190             gen_jmp_im(s, s->pc - s->cs_base);
8191             gen_eob(s);
8192             break;
8193 
8194         CASE_MODRM_MEM_OP(6): /* xsaveopt / clwb */
8195             if (prefixes & PREFIX_LOCK) {
8196                 goto illegal_op;
8197             }
8198             if (prefixes & PREFIX_DATA) {
8199                 /* clwb */
8200                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_CLWB)) {
8201                     goto illegal_op;
8202                 }
8203                 gen_nop_modrm(env, s, modrm);
8204             } else {
8205                 /* xsaveopt */
8206                 if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
8207                     || (s->cpuid_xsave_features & CPUID_XSAVE_XSAVEOPT) == 0
8208                     || (prefixes & (PREFIX_REPZ | PREFIX_REPNZ))) {
8209                     goto illegal_op;
8210                 }
8211                 gen_lea_modrm(env, s, modrm);
8212                 tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
8213                                       cpu_regs[R_EDX]);
8214                 gen_helper_xsaveopt(cpu_env, s->A0, s->tmp1_i64);
8215             }
8216             break;
8217 
8218         CASE_MODRM_MEM_OP(7): /* clflush / clflushopt */
8219             if (prefixes & PREFIX_LOCK) {
8220                 goto illegal_op;
8221             }
8222             if (prefixes & PREFIX_DATA) {
8223                 /* clflushopt */
8224                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_CLFLUSHOPT)) {
8225                     goto illegal_op;
8226                 }
8227             } else {
8228                 /* clflush */
8229                 if ((s->prefix & (PREFIX_REPZ | PREFIX_REPNZ))
8230                     || !(s->cpuid_features & CPUID_CLFLUSH)) {
8231                     goto illegal_op;
8232                 }
8233             }
8234             gen_nop_modrm(env, s, modrm);
8235             break;
8236 
8237         case 0xc0 ... 0xc7: /* rdfsbase (f3 0f ae /0) */
8238         case 0xc8 ... 0xcf: /* rdgsbase (f3 0f ae /1) */
8239         case 0xd0 ... 0xd7: /* wrfsbase (f3 0f ae /2) */
8240         case 0xd8 ... 0xdf: /* wrgsbase (f3 0f ae /3) */
8241             if (CODE64(s)
8242                 && (prefixes & PREFIX_REPZ)
8243                 && !(prefixes & PREFIX_LOCK)
8244                 && (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_FSGSBASE)) {
8245                 TCGv base, treg, src, dst;
8246 
8247                 /* Preserve hflags bits by testing CR4 at runtime.  */
8248                 tcg_gen_movi_i32(s->tmp2_i32, CR4_FSGSBASE_MASK);
8249                 gen_helper_cr4_testbit(cpu_env, s->tmp2_i32);
8250 
8251                 base = cpu_seg_base[modrm & 8 ? R_GS : R_FS];
8252                 treg = cpu_regs[(modrm & 7) | REX_B(s)];
8253 
8254                 if (modrm & 0x10) {
8255                     /* wr*base */
8256                     dst = base, src = treg;
8257                 } else {
8258                     /* rd*base */
8259                     dst = treg, src = base;
8260                 }
8261 
8262                 if (s->dflag == MO_32) {
8263                     tcg_gen_ext32u_tl(dst, src);
8264                 } else {
8265                     tcg_gen_mov_tl(dst, src);
8266                 }
8267                 break;
8268             }
8269             goto unknown_op;
8270 
8271         case 0xf8: /* sfence / pcommit */
8272             if (prefixes & PREFIX_DATA) {
8273                 /* pcommit */
8274                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_PCOMMIT)
8275                     || (prefixes & PREFIX_LOCK)) {
8276                     goto illegal_op;
8277                 }
8278                 break;
8279             }
8280             /* fallthru */
8281         case 0xf9 ... 0xff: /* sfence */
8282             if (!(s->cpuid_features & CPUID_SSE)
8283                 || (prefixes & PREFIX_LOCK)) {
8284                 goto illegal_op;
8285             }
8286             tcg_gen_mb(TCG_MO_ST_ST | TCG_BAR_SC);
8287             break;
8288         case 0xe8 ... 0xef: /* lfence */
8289             if (!(s->cpuid_features & CPUID_SSE)
8290                 || (prefixes & PREFIX_LOCK)) {
8291                 goto illegal_op;
8292             }
8293             tcg_gen_mb(TCG_MO_LD_LD | TCG_BAR_SC);
8294             break;
8295         case 0xf0 ... 0xf7: /* mfence */
8296             if (!(s->cpuid_features & CPUID_SSE2)
8297                 || (prefixes & PREFIX_LOCK)) {
8298                 goto illegal_op;
8299             }
8300             tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
8301             break;
8302 
8303         default:
8304             goto unknown_op;
8305         }
8306         break;
8307 
8308     case 0x10d: /* 3DNow! prefetch(w) */
8309         modrm = x86_ldub_code(env, s);
8310         mod = (modrm >> 6) & 3;
8311         if (mod == 3)
8312             goto illegal_op;
8313         gen_nop_modrm(env, s, modrm);
8314         break;
8315     case 0x1aa: /* rsm */
8316         gen_svm_check_intercept(s, pc_start, SVM_EXIT_RSM);
8317         if (!(s->flags & HF_SMM_MASK))
8318             goto illegal_op;
8319         gen_update_cc_op(s);
8320         gen_jmp_im(s, s->pc - s->cs_base);
8321         gen_helper_rsm(cpu_env);
8322         gen_eob(s);
8323         break;
8324     case 0x1b8: /* SSE4.2 popcnt */
8325         if ((prefixes & (PREFIX_REPZ | PREFIX_LOCK | PREFIX_REPNZ)) !=
8326              PREFIX_REPZ)
8327             goto illegal_op;
8328         if (!(s->cpuid_ext_features & CPUID_EXT_POPCNT))
8329             goto illegal_op;
8330 
8331         modrm = x86_ldub_code(env, s);
8332         reg = ((modrm >> 3) & 7) | rex_r;
8333 
8334         if (s->prefix & PREFIX_DATA) {
8335             ot = MO_16;
8336         } else {
8337             ot = mo_64_32(dflag);
8338         }
8339 
8340         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
8341         gen_extu(ot, s->T0);
8342         tcg_gen_mov_tl(cpu_cc_src, s->T0);
8343         tcg_gen_ctpop_tl(s->T0, s->T0);
8344         gen_op_mov_reg_v(s, ot, reg, s->T0);
8345 
8346         set_cc_op(s, CC_OP_POPCNT);
8347         break;
8348     case 0x10e ... 0x10f:
8349         /* 3DNow! instructions, ignore prefixes */
8350         s->prefix &= ~(PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA);
8351         /* fall through */
8352     case 0x110 ... 0x117:
8353     case 0x128 ... 0x12f:
8354     case 0x138 ... 0x13a:
8355     case 0x150 ... 0x179:
8356     case 0x17c ... 0x17f:
8357     case 0x1c2:
8358     case 0x1c4 ... 0x1c6:
8359     case 0x1d0 ... 0x1fe:
8360         gen_sse(env, s, b, pc_start, rex_r);
8361         break;
8362     default:
8363         goto unknown_op;
8364     }
8365     return s->pc;
8366  illegal_op:
8367     gen_illegal_opcode(s);
8368     return s->pc;
8369  unknown_op:
8370     gen_unknown_opcode(env, s);
8371     return s->pc;
8372 }
8373 
8374 void tcg_x86_init(void)
8375 {
8376     static const char reg_names[CPU_NB_REGS][4] = {
8377 #ifdef TARGET_X86_64
8378         [R_EAX] = "rax",
8379         [R_EBX] = "rbx",
8380         [R_ECX] = "rcx",
8381         [R_EDX] = "rdx",
8382         [R_ESI] = "rsi",
8383         [R_EDI] = "rdi",
8384         [R_EBP] = "rbp",
8385         [R_ESP] = "rsp",
8386         [8]  = "r8",
8387         [9]  = "r9",
8388         [10] = "r10",
8389         [11] = "r11",
8390         [12] = "r12",
8391         [13] = "r13",
8392         [14] = "r14",
8393         [15] = "r15",
8394 #else
8395         [R_EAX] = "eax",
8396         [R_EBX] = "ebx",
8397         [R_ECX] = "ecx",
8398         [R_EDX] = "edx",
8399         [R_ESI] = "esi",
8400         [R_EDI] = "edi",
8401         [R_EBP] = "ebp",
8402         [R_ESP] = "esp",
8403 #endif
8404     };
8405     static const char seg_base_names[6][8] = {
8406         [R_CS] = "cs_base",
8407         [R_DS] = "ds_base",
8408         [R_ES] = "es_base",
8409         [R_FS] = "fs_base",
8410         [R_GS] = "gs_base",
8411         [R_SS] = "ss_base",
8412     };
8413     static const char bnd_regl_names[4][8] = {
8414         "bnd0_lb", "bnd1_lb", "bnd2_lb", "bnd3_lb"
8415     };
8416     static const char bnd_regu_names[4][8] = {
8417         "bnd0_ub", "bnd1_ub", "bnd2_ub", "bnd3_ub"
8418     };
8419     int i;
8420 
8421     cpu_cc_op = tcg_global_mem_new_i32(cpu_env,
8422                                        offsetof(CPUX86State, cc_op), "cc_op");
8423     cpu_cc_dst = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_dst),
8424                                     "cc_dst");
8425     cpu_cc_src = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_src),
8426                                     "cc_src");
8427     cpu_cc_src2 = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_src2),
8428                                      "cc_src2");
8429 
8430     for (i = 0; i < CPU_NB_REGS; ++i) {
8431         cpu_regs[i] = tcg_global_mem_new(cpu_env,
8432                                          offsetof(CPUX86State, regs[i]),
8433                                          reg_names[i]);
8434     }
8435 
8436     for (i = 0; i < 6; ++i) {
8437         cpu_seg_base[i]
8438             = tcg_global_mem_new(cpu_env,
8439                                  offsetof(CPUX86State, segs[i].base),
8440                                  seg_base_names[i]);
8441     }
8442 
8443     for (i = 0; i < 4; ++i) {
8444         cpu_bndl[i]
8445             = tcg_global_mem_new_i64(cpu_env,
8446                                      offsetof(CPUX86State, bnd_regs[i].lb),
8447                                      bnd_regl_names[i]);
8448         cpu_bndu[i]
8449             = tcg_global_mem_new_i64(cpu_env,
8450                                      offsetof(CPUX86State, bnd_regs[i].ub),
8451                                      bnd_regu_names[i]);
8452     }
8453 }
8454 
8455 static void i386_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu)
8456 {
8457     DisasContext *dc = container_of(dcbase, DisasContext, base);
8458     CPUX86State *env = cpu->env_ptr;
8459     uint32_t flags = dc->base.tb->flags;
8460     target_ulong cs_base = dc->base.tb->cs_base;
8461 
8462     dc->pe = (flags >> HF_PE_SHIFT) & 1;
8463     dc->code32 = (flags >> HF_CS32_SHIFT) & 1;
8464     dc->ss32 = (flags >> HF_SS32_SHIFT) & 1;
8465     dc->addseg = (flags >> HF_ADDSEG_SHIFT) & 1;
8466     dc->f_st = 0;
8467     dc->vm86 = (flags >> VM_SHIFT) & 1;
8468     dc->cpl = (flags >> HF_CPL_SHIFT) & 3;
8469     dc->iopl = (flags >> IOPL_SHIFT) & 3;
8470     dc->tf = (flags >> TF_SHIFT) & 1;
8471     dc->cc_op = CC_OP_DYNAMIC;
8472     dc->cc_op_dirty = false;
8473     dc->cs_base = cs_base;
8474     dc->popl_esp_hack = 0;
8475     /* select memory access functions */
8476     dc->mem_index = 0;
8477 #ifdef CONFIG_SOFTMMU
8478     dc->mem_index = cpu_mmu_index(env, false);
8479 #endif
8480     dc->cpuid_features = env->features[FEAT_1_EDX];
8481     dc->cpuid_ext_features = env->features[FEAT_1_ECX];
8482     dc->cpuid_ext2_features = env->features[FEAT_8000_0001_EDX];
8483     dc->cpuid_ext3_features = env->features[FEAT_8000_0001_ECX];
8484     dc->cpuid_7_0_ebx_features = env->features[FEAT_7_0_EBX];
8485     dc->cpuid_xsave_features = env->features[FEAT_XSAVE];
8486 #ifdef TARGET_X86_64
8487     dc->lma = (flags >> HF_LMA_SHIFT) & 1;
8488     dc->code64 = (flags >> HF_CS64_SHIFT) & 1;
8489 #endif
8490     dc->flags = flags;
8491     dc->jmp_opt = !(dc->tf || dc->base.singlestep_enabled ||
8492                     (flags & HF_INHIBIT_IRQ_MASK));
8493     /* Do not optimize repz jumps at all in icount mode, because
8494        rep movsS instructions are execured with different paths
8495        in !repz_opt and repz_opt modes. The first one was used
8496        always except single step mode. And this setting
8497        disables jumps optimization and control paths become
8498        equivalent in run and single step modes.
8499        Now there will be no jump optimization for repz in
8500        record/replay modes and there will always be an
8501        additional step for ecx=0 when icount is enabled.
8502      */
8503     dc->repz_opt = !dc->jmp_opt && !(tb_cflags(dc->base.tb) & CF_USE_ICOUNT);
8504 #if 0
8505     /* check addseg logic */
8506     if (!dc->addseg && (dc->vm86 || !dc->pe || !dc->code32))
8507         printf("ERROR addseg\n");
8508 #endif
8509 
8510     dc->T0 = tcg_temp_new();
8511     dc->T1 = tcg_temp_new();
8512     dc->A0 = tcg_temp_new();
8513 
8514     dc->tmp0 = tcg_temp_new();
8515     dc->tmp1_i64 = tcg_temp_new_i64();
8516     dc->tmp2_i32 = tcg_temp_new_i32();
8517     dc->tmp3_i32 = tcg_temp_new_i32();
8518     dc->tmp4 = tcg_temp_new();
8519     dc->ptr0 = tcg_temp_new_ptr();
8520     dc->ptr1 = tcg_temp_new_ptr();
8521     dc->cc_srcT = tcg_temp_local_new();
8522 }
8523 
8524 static void i386_tr_tb_start(DisasContextBase *db, CPUState *cpu)
8525 {
8526 }
8527 
8528 static void i386_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
8529 {
8530     DisasContext *dc = container_of(dcbase, DisasContext, base);
8531 
8532     tcg_gen_insn_start(dc->base.pc_next, dc->cc_op);
8533 }
8534 
8535 static bool i386_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cpu,
8536                                      const CPUBreakpoint *bp)
8537 {
8538     DisasContext *dc = container_of(dcbase, DisasContext, base);
8539     /* If RF is set, suppress an internally generated breakpoint.  */
8540     int flags = dc->base.tb->flags & HF_RF_MASK ? BP_GDB : BP_ANY;
8541     if (bp->flags & flags) {
8542         gen_debug(dc, dc->base.pc_next - dc->cs_base);
8543         dc->base.is_jmp = DISAS_NORETURN;
8544         /* The address covered by the breakpoint must be included in
8545            [tb->pc, tb->pc + tb->size) in order to for it to be
8546            properly cleared -- thus we increment the PC here so that
8547            the generic logic setting tb->size later does the right thing.  */
8548         dc->base.pc_next += 1;
8549         return true;
8550     } else {
8551         return false;
8552     }
8553 }
8554 
8555 static void i386_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
8556 {
8557     DisasContext *dc = container_of(dcbase, DisasContext, base);
8558     target_ulong pc_next;
8559 
8560 #ifdef TARGET_VSYSCALL_PAGE
8561     /*
8562      * Detect entry into the vsyscall page and invoke the syscall.
8563      */
8564     if ((dc->base.pc_next & TARGET_PAGE_MASK) == TARGET_VSYSCALL_PAGE) {
8565         gen_exception(dc, EXCP_VSYSCALL, dc->base.pc_next);
8566         return;
8567     }
8568 #endif
8569 
8570     pc_next = disas_insn(dc, cpu);
8571 
8572     if (dc->tf || (dc->base.tb->flags & HF_INHIBIT_IRQ_MASK)) {
8573         /* if single step mode, we generate only one instruction and
8574            generate an exception */
8575         /* if irq were inhibited with HF_INHIBIT_IRQ_MASK, we clear
8576            the flag and abort the translation to give the irqs a
8577            chance to happen */
8578         dc->base.is_jmp = DISAS_TOO_MANY;
8579     } else if ((tb_cflags(dc->base.tb) & CF_USE_ICOUNT)
8580                && ((pc_next & TARGET_PAGE_MASK)
8581                    != ((pc_next + TARGET_MAX_INSN_SIZE - 1)
8582                        & TARGET_PAGE_MASK)
8583                    || (pc_next & ~TARGET_PAGE_MASK) == 0)) {
8584         /* Do not cross the boundary of the pages in icount mode,
8585            it can cause an exception. Do it only when boundary is
8586            crossed by the first instruction in the block.
8587            If current instruction already crossed the bound - it's ok,
8588            because an exception hasn't stopped this code.
8589          */
8590         dc->base.is_jmp = DISAS_TOO_MANY;
8591     } else if ((pc_next - dc->base.pc_first) >= (TARGET_PAGE_SIZE - 32)) {
8592         dc->base.is_jmp = DISAS_TOO_MANY;
8593     }
8594 
8595     dc->base.pc_next = pc_next;
8596 }
8597 
8598 static void i386_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
8599 {
8600     DisasContext *dc = container_of(dcbase, DisasContext, base);
8601 
8602     if (dc->base.is_jmp == DISAS_TOO_MANY) {
8603         gen_jmp_im(dc, dc->base.pc_next - dc->cs_base);
8604         gen_eob(dc);
8605     }
8606 }
8607 
8608 static void i386_tr_disas_log(const DisasContextBase *dcbase,
8609                               CPUState *cpu)
8610 {
8611     DisasContext *dc = container_of(dcbase, DisasContext, base);
8612 
8613     qemu_log("IN: %s\n", lookup_symbol(dc->base.pc_first));
8614     log_target_disas(cpu, dc->base.pc_first, dc->base.tb->size);
8615 }
8616 
8617 static const TranslatorOps i386_tr_ops = {
8618     .init_disas_context = i386_tr_init_disas_context,
8619     .tb_start           = i386_tr_tb_start,
8620     .insn_start         = i386_tr_insn_start,
8621     .breakpoint_check   = i386_tr_breakpoint_check,
8622     .translate_insn     = i386_tr_translate_insn,
8623     .tb_stop            = i386_tr_tb_stop,
8624     .disas_log          = i386_tr_disas_log,
8625 };
8626 
8627 /* generate intermediate code for basic block 'tb'.  */
8628 void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns)
8629 {
8630     DisasContext dc;
8631 
8632     translator_loop(&i386_tr_ops, &dc.base, cpu, tb, max_insns);
8633 }
8634 
8635 void restore_state_to_opc(CPUX86State *env, TranslationBlock *tb,
8636                           target_ulong *data)
8637 {
8638     int cc_op = data[1];
8639     env->eip = data[0] - tb->cs_base;
8640     if (cc_op != CC_OP_DYNAMIC) {
8641         env->cc_op = cc_op;
8642     }
8643 }
8644