1 // AsmJit - Machine code generation for C++
2 //
3 //  * Official AsmJit Home Page: https://asmjit.com
4 //  * Official Github Repository: https://github.com/asmjit/asmjit
5 //
6 // Copyright (c) 2008-2020 The AsmJit Authors
7 //
8 // This software is provided 'as-is', without any express or implied
9 // warranty. In no event will the authors be held liable for any damages
10 // arising from the use of this software.
11 //
12 // Permission is granted to anyone to use this software for any purpose,
13 // including commercial applications, and to alter it and redistribute it
14 // freely, subject to the following restrictions:
15 //
16 // 1. The origin of this software must not be misrepresented; you must not
17 //    claim that you wrote the original software. If you use this software
18 //    in a product, an acknowledgment in the product documentation would be
19 //    appreciated but is not required.
20 // 2. Altered source versions must be plainly marked as such, and must not be
21 //    misrepresented as being the original software.
22 // 3. This notice may not be removed or altered from any source distribution.
23 
24 #include "../core/api-build_p.h"
25 #ifdef ASMJIT_BUILD_X86
26 
27 #include "../core/assembler.h"
28 #include "../core/codewriter_p.h"
29 #include "../core/cpuinfo.h"
30 #include "../core/emitterutils_p.h"
31 #include "../core/formatter.h"
32 #include "../core/logger.h"
33 #include "../core/misc_p.h"
34 #include "../core/support.h"
35 #include "../x86/x86assembler.h"
36 #include "../x86/x86instdb_p.h"
37 #include "../x86/x86formatter_p.h"
38 #include "../x86/x86opcode_p.h"
39 
40 ASMJIT_BEGIN_SUB_NAMESPACE(x86)
41 
42 // ============================================================================
43 // [TypeDefs]
44 // ============================================================================
45 
46 typedef Support::FastUInt8 FastUInt8;
47 
48 // ============================================================================
49 // [Constants]
50 // ============================================================================
51 
52 //! X86 bytes used to encode important prefixes.
53 enum X86Byte : uint32_t {
54   //! 1-byte REX prefix mask.
55   kX86ByteRex = 0x40,
56 
57   //! 1-byte REX.W component.
58   kX86ByteRexW = 0x08,
59 
60   kX86ByteInvalidRex = 0x80,
61 
62   //! 2-byte VEX prefix:
63   //!   - `[0]` - `0xC5`.
64   //!   - `[1]` - `RvvvvLpp`.
65   kX86ByteVex2 = 0xC5,
66 
67   //! 3-byte VEX prefix:
68   //!   - `[0]` - `0xC4`.
69   //!   - `[1]` - `RXBmmmmm`.
70   //!   - `[2]` - `WvvvvLpp`.
71   kX86ByteVex3 = 0xC4,
72 
73   //! 3-byte XOP prefix:
74   //!   - `[0]` - `0x8F`.
75   //!   - `[1]` - `RXBmmmmm`.
76   //!   - `[2]` - `WvvvvLpp`.
77   kX86ByteXop3 = 0x8F,
78 
79   //! 4-byte EVEX prefix:
80   //!   - `[0]` - `0x62`.
81   //!   - `[1]` - Payload0 or `P[ 7: 0]` - `[R  X  B  R' 0  0  m  m]`.
82   //!   - `[2]` - Payload1 or `P[15: 8]` - `[W  v  v  v  v  1  p  p]`.
83   //!   - `[3]` - Payload2 or `P[23:16]` - `[z  L' L  b  V' a  a  a]`.
84   //!
85   //! Payload:
86   //!   - `P[ 1: 0]` - OPCODE: EVEX.mmmmm, only lowest 2 bits [1:0] used.
87   //!   - `P[ 3: 2]` - ______: Must be 0.
88   //!   - `P[    4]` - REG-ID: EVEX.R' - 5th bit of 'RRRRR'.
89   //!   - `P[    5]` - REG-ID: EVEX.B  - 4th bit of 'BBBBB'.
90   //!   - `P[    6]` - REG-ID: EVEX.X  - 5th bit of 'BBBBB' or 4th bit of 'XXXX' (with SIB).
91   //!   - `P[    7]` - REG-ID: EVEX.R  - 4th bit of 'RRRRR'.
92   //!   - `P[ 9: 8]` - OPCODE: EVEX.pp.
93   //!   - `P[   10]` - ______: Must be 1.
94   //!   - `P[14:11]` - REG-ID: 4 bits of 'VVVV'.
95   //!   - `P[   15]` - OPCODE: EVEX.W.
96   //!   - `P[18:16]` - REG-ID: K register k0...k7 (Merging/Zeroing Vector Ops).
97   //!   - `P[   19]` - REG-ID: 5th bit of 'VVVVV'.
98   //!   - `P[   20]` - OPCODE: Broadcast/Rounding Control/SAE bit.
99   //!   - `P[22.21]` - OPCODE: Vector Length (L' and  L) / Rounding Control.
100   //!   - `P[   23]` - OPCODE: Zeroing/Merging.
101   kX86ByteEvex = 0x62
102 };
103 
104 // AsmJit specific (used to encode VVVVV field in XOP/VEX/EVEX).
105 enum VexVVVVV : uint32_t {
106   kVexVVVVVShift = 7,
107   kVexVVVVVMask = 0x1F << kVexVVVVVShift
108 };
109 
110 //! Instruction 2-byte/3-byte opcode prefix definition.
111 struct X86OpcodeMM {
112   uint8_t size;
113   uint8_t data[3];
114 };
115 
116 //! Mandatory prefixes used to encode legacy [66, F3, F2] or [9B] byte.
117 static const uint8_t x86OpcodePP[8] = { 0x00, 0x66, 0xF3, 0xF2, 0x00, 0x00, 0x00, 0x9B };
118 
119 //! Instruction 2-byte/3-byte opcode prefix data.
120 static const X86OpcodeMM x86OpcodeMM[] = {
121   { 0, { 0x00, 0x00, 0 } }, // #00 (0b0000).
122   { 1, { 0x0F, 0x00, 0 } }, // #01 (0b0001).
123   { 2, { 0x0F, 0x38, 0 } }, // #02 (0b0010).
124   { 2, { 0x0F, 0x3A, 0 } }, // #03 (0b0011).
125   { 2, { 0x0F, 0x01, 0 } }, // #04 (0b0100).
126   { 0, { 0x00, 0x00, 0 } }, // #05 (0b0101).
127   { 0, { 0x00, 0x00, 0 } }, // #06 (0b0110).
128   { 0, { 0x00, 0x00, 0 } }, // #07 (0b0111).
129   { 0, { 0x00, 0x00, 0 } }, // #08 (0b1000).
130   { 0, { 0x00, 0x00, 0 } }, // #09 (0b1001).
131   { 0, { 0x00, 0x00, 0 } }, // #0A (0b1010).
132   { 0, { 0x00, 0x00, 0 } }, // #0B (0b1011).
133   { 0, { 0x00, 0x00, 0 } }, // #0C (0b1100).
134   { 0, { 0x00, 0x00, 0 } }, // #0D (0b1101).
135   { 0, { 0x00, 0x00, 0 } }, // #0E (0b1110).
136   { 0, { 0x00, 0x00, 0 } }  // #0F (0b1111).
137 };
138 
139 static const uint8_t x86SegmentPrefix[8] = {
140   0x00, // None.
141   0x26, // ES.
142   0x2E, // CS.
143   0x36, // SS.
144   0x3E, // DS.
145   0x64, // FS.
146   0x65  // GS.
147 };
148 
149 static const uint32_t x86OpcodePushSReg[8] = {
150   Opcode::k000000 | 0x00, // None.
151   Opcode::k000000 | 0x06, // Push ES.
152   Opcode::k000000 | 0x0E, // Push CS.
153   Opcode::k000000 | 0x16, // Push SS.
154   Opcode::k000000 | 0x1E, // Push DS.
155   Opcode::k000F00 | 0xA0, // Push FS.
156   Opcode::k000F00 | 0xA8  // Push GS.
157 };
158 
159 static const uint32_t x86OpcodePopSReg[8]  = {
160   Opcode::k000000 | 0x00, // None.
161   Opcode::k000000 | 0x07, // Pop ES.
162   Opcode::k000000 | 0x00, // Pop CS.
163   Opcode::k000000 | 0x17, // Pop SS.
164   Opcode::k000000 | 0x1F, // Pop DS.
165   Opcode::k000F00 | 0xA1, // Pop FS.
166   Opcode::k000F00 | 0xA9  // Pop GS.
167 };
168 
169 // ============================================================================
170 // [asmjit::X86MemInfo | X86VEXPrefix | X86LLByRegType | X86CDisp8Table]
171 // ============================================================================
172 
173 //! Memory operand's info bits.
174 //!
175 //! A lookup table that contains various information based on the BASE and INDEX
176 //! information of a memory operand. This is much better and safer than playing
177 //! with IFs in the code and can check for errors must faster and better.
178 enum X86MemInfo_Enum {
179   kX86MemInfo_0         = 0x00,
180 
181   kX86MemInfo_BaseGp    = 0x01, //!< Has BASE reg, REX.B can be 1, compatible with REX.B byte.
182   kX86MemInfo_Index     = 0x02, //!< Has INDEX reg, REX.X can be 1, compatible with REX.X byte.
183 
184   kX86MemInfo_BaseLabel = 0x10, //!< Base is Label.
185   kX86MemInfo_BaseRip   = 0x20, //!< Base is RIP.
186 
187   kX86MemInfo_67H_X86   = 0x40, //!< Address-size override in 32-bit mode.
188   kX86MemInfo_67H_X64   = 0x80, //!< Address-size override in 64-bit mode.
189   kX86MemInfo_67H_Mask  = 0xC0  //!< Contains all address-size override bits.
190 };
191 
192 template<uint32_t X>
193 struct X86MemInfo_T {
194   enum {
195     B = (X     ) & 0x1F,
196     I = (X >> 5) & 0x1F,
197 
198     kBase  = (B >= Reg::kTypeGpw    && B <= Reg::kTypeGpq ) ? kX86MemInfo_BaseGp    :
199              (B == Reg::kTypeRip                          ) ? kX86MemInfo_BaseRip   :
200              (B == Label::kLabelTag                       ) ? kX86MemInfo_BaseLabel : 0,
201 
202     kIndex = (I >= Reg::kTypeGpw    && I <= Reg::kTypeGpq ) ? kX86MemInfo_Index     :
203              (I >= Reg::kTypeXmm    && I <= Reg::kTypeZmm ) ? kX86MemInfo_Index     : 0,
204 
205     k67H   = (B == Reg::kTypeGpw    && I == Reg::kTypeNone) ? kX86MemInfo_67H_X86   :
206              (B == Reg::kTypeGpd    && I == Reg::kTypeNone) ? kX86MemInfo_67H_X64   :
207              (B == Reg::kTypeNone   && I == Reg::kTypeGpw ) ? kX86MemInfo_67H_X86   :
208              (B == Reg::kTypeNone   && I == Reg::kTypeGpd ) ? kX86MemInfo_67H_X64   :
209              (B == Reg::kTypeGpw    && I == Reg::kTypeGpw ) ? kX86MemInfo_67H_X86   :
210              (B == Reg::kTypeGpd    && I == Reg::kTypeGpd ) ? kX86MemInfo_67H_X64   :
211              (B == Reg::kTypeGpw    && I == Reg::kTypeXmm ) ? kX86MemInfo_67H_X86   :
212              (B == Reg::kTypeGpd    && I == Reg::kTypeXmm ) ? kX86MemInfo_67H_X64   :
213              (B == Reg::kTypeGpw    && I == Reg::kTypeYmm ) ? kX86MemInfo_67H_X86   :
214              (B == Reg::kTypeGpd    && I == Reg::kTypeYmm ) ? kX86MemInfo_67H_X64   :
215              (B == Reg::kTypeGpw    && I == Reg::kTypeZmm ) ? kX86MemInfo_67H_X86   :
216              (B == Reg::kTypeGpd    && I == Reg::kTypeZmm ) ? kX86MemInfo_67H_X64   :
217              (B == Label::kLabelTag && I == Reg::kTypeGpw ) ? kX86MemInfo_67H_X86   :
218              (B == Label::kLabelTag && I == Reg::kTypeGpd ) ? kX86MemInfo_67H_X64   : 0,
219 
220     kValue = kBase | kIndex | k67H | 0x04 | 0x08
221   };
222 };
223 
224 // The result stored in the LUT is a combination of
225 //   - 67H - Address override prefix - depends on BASE+INDEX register types and
226 //           the target architecture.
227 //   - REX - A possible combination of REX.[B|X|R|W] bits in REX prefix where
228 //           REX.B and REX.X are possibly masked out, but REX.R and REX.W are
229 //           kept as is.
230 #define VALUE(x) X86MemInfo_T<x>::kValue
231 static const uint8_t x86MemInfo[] = { ASMJIT_LOOKUP_TABLE_1024(VALUE, 0) };
232 #undef VALUE
233 
234 // VEX3 or XOP xor bits applied to the opcode before emitted. The index to this
235 // table is 'mmmmm' value, which contains all we need. This is only used by a
236 // 3 BYTE VEX and XOP prefixes, 2 BYTE VEX prefix is handled differently. The
237 // idea is to minimize the difference between VEX3 vs XOP when encoding VEX
238 // or XOP instruction. This should minimize the code required to emit such
239 // instructions and should also make it faster as we don't need any branch to
240 // decide between VEX3 vs XOP.
241 //            ____    ___
242 // [_OPCODE_|WvvvvLpp|RXBmmmmm|VEX3_XOP]
243 #define VALUE(x) ((x & 0x08) ? kX86ByteXop3 : kX86ByteVex3) | (0xF << 19) | (0x7 << 13)
244 static const uint32_t x86VEXPrefix[] = { ASMJIT_LOOKUP_TABLE_16(VALUE, 0) };
245 #undef VALUE
246 
247 // Table that contains LL opcode field addressed by a register size / 16. It's
248 // used to propagate L.256 or L.512 when YMM or ZMM registers are used,
249 // respectively.
250 #define VALUE(x) (x & (64 >> 4)) ? Opcode::kLL_2 : \
251                  (x & (32 >> 4)) ? Opcode::kLL_1 : Opcode::kLL_0
252 static const uint32_t x86LLBySizeDiv16[] = { ASMJIT_LOOKUP_TABLE_16(VALUE, 0) };
253 #undef VALUE
254 
255 // Table that contains LL opcode field addressed by a register size / 16. It's
256 // used to propagate L.256 or L.512 when YMM or ZMM registers are used,
257 // respectively.
258 #define VALUE(x) x == Reg::kTypeZmm ? Opcode::kLL_2 : \
259                  x == Reg::kTypeYmm ? Opcode::kLL_1 : Opcode::kLL_0
260 static const uint32_t x86LLByRegType[] = { ASMJIT_LOOKUP_TABLE_16(VALUE, 0) };
261 #undef VALUE
262 
263 // Table that contains a scale (shift left) based on 'TTWLL' field and
264 // the instruction's tuple-type (TT) field. The scale is then applied to
265 // the BASE-N stored in each opcode to calculate the final compressed
266 // displacement used by all EVEX encoded instructions.
267 template<uint32_t X>
268 struct X86CDisp8SHL_T {
269   enum {
270     TT = (X >> 3) << Opcode::kCDTT_Shift,
271     LL = (X >> 0) & 0x3,
272     W  = (X >> 2) & 0x1,
273 
274     kValue = (TT == Opcode::kCDTT_None ? ((LL==0) ? 0 : (LL==1) ? 0   : 0  ) :
275               TT == Opcode::kCDTT_ByLL ? ((LL==0) ? 0 : (LL==1) ? 1   : 2  ) :
276               TT == Opcode::kCDTT_T1W  ? ((LL==0) ? W : (LL==1) ? 1+W : 2+W) :
277               TT == Opcode::kCDTT_DUP  ? ((LL==0) ? 0 : (LL==1) ? 2   : 3  ) : 0) << Opcode::kCDSHL_Shift
278   };
279 };
280 
281 #define VALUE(x) X86CDisp8SHL_T<x>::kValue
282 static const uint32_t x86CDisp8SHL[] = { ASMJIT_LOOKUP_TABLE_32(VALUE, 0) };
283 #undef VALUE
284 
285 // Table that contains MOD byte of a 16-bit [BASE + disp] address.
286 //   0xFF == Invalid.
287 static const uint8_t x86Mod16BaseTable[8] = {
288   0xFF, // AX -> N/A.
289   0xFF, // CX -> N/A.
290   0xFF, // DX -> N/A.
291   0x07, // BX -> 111.
292   0xFF, // SP -> N/A.
293   0x06, // BP -> 110.
294   0x04, // SI -> 100.
295   0x05  // DI -> 101.
296 };
297 
298 // Table that contains MOD byte of a 16-bit [BASE + INDEX + disp] combination.
299 //   0xFF == Invalid.
300 template<uint32_t X>
301 struct X86Mod16BaseIndexTable_T {
302   enum {
303     B = X >> 3,
304     I = X & 0x7,
305 
306     kValue = ((B == Gp::kIdBx && I == Gp::kIdSi) || (B == Gp::kIdSi && I == Gp::kIdBx)) ? 0x00 :
307              ((B == Gp::kIdBx && I == Gp::kIdDi) || (B == Gp::kIdDi && I == Gp::kIdBx)) ? 0x01 :
308              ((B == Gp::kIdBp && I == Gp::kIdSi) || (B == Gp::kIdSi && I == Gp::kIdBp)) ? 0x02 :
309              ((B == Gp::kIdBp && I == Gp::kIdDi) || (B == Gp::kIdDi && I == Gp::kIdBp)) ? 0x03 : 0xFF
310   };
311 };
312 
313 #define VALUE(x) X86Mod16BaseIndexTable_T<x>::kValue
314 static const uint8_t x86Mod16BaseIndexTable[] = { ASMJIT_LOOKUP_TABLE_64(VALUE, 0) };
315 #undef VALUE
316 
317 // ============================================================================
318 // [asmjit::x86::Assembler - Helpers]
319 // ============================================================================
320 
x86IsJmpOrCall(uint32_t instId)321 static ASMJIT_INLINE bool x86IsJmpOrCall(uint32_t instId) noexcept {
322   return instId == Inst::kIdJmp || instId == Inst::kIdCall;
323 }
324 
x86IsImplicitMem(const Operand_ & op,uint32_t base)325 static ASMJIT_INLINE bool x86IsImplicitMem(const Operand_& op, uint32_t base) noexcept {
326   return op.isMem() && op.as<Mem>().baseId() == base && !op.as<Mem>().hasOffset();
327 }
328 
329 //! Combine `regId` and `vvvvvId` into a single value (used by AVX and AVX-512).
x86PackRegAndVvvvv(uint32_t regId,uint32_t vvvvvId)330 static ASMJIT_INLINE uint32_t x86PackRegAndVvvvv(uint32_t regId, uint32_t vvvvvId) noexcept {
331   return regId + (vvvvvId << kVexVVVVVShift);
332 }
333 
x86OpcodeLByVMem(const Operand_ & op)334 static ASMJIT_INLINE uint32_t x86OpcodeLByVMem(const Operand_& op) noexcept {
335   return x86LLByRegType[op.as<Mem>().indexType()];
336 }
337 
x86OpcodeLBySize(uint32_t size)338 static ASMJIT_INLINE uint32_t x86OpcodeLBySize(uint32_t size) noexcept {
339   return x86LLBySizeDiv16[size / 16];
340 }
341 
342 //! Encode MOD byte.
x86EncodeMod(uint32_t m,uint32_t o,uint32_t rm)343 static ASMJIT_INLINE uint32_t x86EncodeMod(uint32_t m, uint32_t o, uint32_t rm) noexcept {
344   ASMJIT_ASSERT(m <= 3);
345   ASMJIT_ASSERT(o <= 7);
346   ASMJIT_ASSERT(rm <= 7);
347   return (m << 6) + (o << 3) + rm;
348 }
349 
350 //! Encode SIB byte.
x86EncodeSib(uint32_t s,uint32_t i,uint32_t b)351 static ASMJIT_INLINE uint32_t x86EncodeSib(uint32_t s, uint32_t i, uint32_t b) noexcept {
352   ASMJIT_ASSERT(s <= 3);
353   ASMJIT_ASSERT(i <= 7);
354   ASMJIT_ASSERT(b <= 7);
355   return (s << 6) + (i << 3) + b;
356 }
357 
x86IsRexInvalid(uint32_t rex)358 static ASMJIT_INLINE bool x86IsRexInvalid(uint32_t rex) noexcept {
359   // Validates the following possibilities:
360   //   REX == 0x00      -> OKAY (X86_32 / X86_64).
361   //   REX == 0x40-0x4F -> OKAY (X86_64).
362   //   REX == 0x80      -> OKAY (X86_32 mode, rex prefix not used).
363   //   REX == 0x81-0xCF -> BAD  (X86_32 mode, rex prefix used).
364   return rex > kX86ByteInvalidRex;
365 }
366 
367 template<typename T>
x86SignExtendI32(T imm)368 static constexpr T x86SignExtendI32(T imm) noexcept { return T(int64_t(int32_t(imm & T(0xFFFFFFFF)))); }
369 
x86AltOpcodeOf(const InstDB::InstInfo * info)370 static ASMJIT_INLINE uint32_t x86AltOpcodeOf(const InstDB::InstInfo* info) noexcept {
371   return InstDB::_altOpcodeTable[info->_altOpcodeIndex];
372 }
373 
374 // ============================================================================
375 // [asmjit::X86BufferWriter]
376 // ============================================================================
377 
378 class X86BufferWriter : public CodeWriter {
379 public:
X86BufferWriter(Assembler * a)380   ASMJIT_INLINE explicit X86BufferWriter(Assembler* a) noexcept
381     : CodeWriter(a) {}
382 
emitPP(uint32_t opcode)383   ASMJIT_INLINE void emitPP(uint32_t opcode) noexcept {
384     uint32_t ppIndex = (opcode              >> Opcode::kPP_Shift) &
385                        (Opcode::kPP_FPUMask >> Opcode::kPP_Shift) ;
386     emit8If(x86OpcodePP[ppIndex], ppIndex != 0);
387   }
388 
emitMMAndOpcode(uint32_t opcode)389   ASMJIT_INLINE void emitMMAndOpcode(uint32_t opcode) noexcept {
390     uint32_t mmIndex = (opcode & Opcode::kMM_Mask) >> Opcode::kMM_Shift;
391     const X86OpcodeMM& mmCode = x86OpcodeMM[mmIndex];
392 
393     emit8If(mmCode.data[0], mmCode.size > 0);
394     emit8If(mmCode.data[1], mmCode.size > 1);
395     emit8(opcode);
396   }
397 
emitSegmentOverride(uint32_t segmentId)398   ASMJIT_INLINE void emitSegmentOverride(uint32_t segmentId) noexcept {
399     ASMJIT_ASSERT(segmentId < ASMJIT_ARRAY_SIZE(x86SegmentPrefix));
400 
401     FastUInt8 prefix = x86SegmentPrefix[segmentId];
402     emit8If(prefix, prefix != 0);
403   }
404 
405   template<typename CondT>
emitAddressOverride(CondT condition)406   ASMJIT_INLINE void emitAddressOverride(CondT condition) noexcept {
407     emit8If(0x67, condition);
408   }
409 
emitImmByteOrDWord(uint64_t immValue,FastUInt8 immSize)410   ASMJIT_INLINE void emitImmByteOrDWord(uint64_t immValue, FastUInt8 immSize) noexcept {
411     if (!immSize)
412       return;
413 
414     ASMJIT_ASSERT(immSize == 1 || immSize == 4);
415 
416 #if ASMJIT_ARCH_BITS >= 64
417     uint64_t imm = uint64_t(immValue);
418 #else
419     uint32_t imm = uint32_t(immValue & 0xFFFFFFFFu);
420 #endif
421 
422     // Many instructions just use a single byte immediate, so make it fast.
423     emit8(imm & 0xFFu);
424     if (immSize == 1) return;
425 
426     imm >>= 8;
427     emit8(imm & 0xFFu);
428     imm >>= 8;
429     emit8(imm & 0xFFu);
430     imm >>= 8;
431     emit8(imm & 0xFFu);
432   }
433 
emitImmediate(uint64_t immValue,FastUInt8 immSize)434   ASMJIT_INLINE void emitImmediate(uint64_t immValue, FastUInt8 immSize) noexcept {
435 #if ASMJIT_ARCH_BITS >= 64
436     uint64_t imm = immValue;
437     if (immSize >= 4) {
438       emit32uLE(imm & 0xFFFFFFFFu);
439       imm >>= 32;
440       immSize = FastUInt8(immSize - 4u);
441     }
442 #else
443     uint32_t imm = uint32_t(immValue & 0xFFFFFFFFu);
444     if (immSize >= 4) {
445       emit32uLE(imm);
446       imm = uint32_t(immValue >> 32);
447       immSize = FastUInt8(immSize - 4u);
448     }
449 #endif
450 
451     if (!immSize)
452       return;
453     emit8(imm & 0xFFu);
454     imm >>= 8;
455 
456     if (--immSize == 0)
457       return;
458     emit8(imm & 0xFFu);
459     imm >>= 8;
460 
461     if (--immSize == 0)
462       return;
463     emit8(imm & 0xFFu);
464     imm >>= 8;
465 
466     if (--immSize == 0)
467       return;
468     emit8(imm & 0xFFu);
469   }
470 };
471 
472 // If the operand is BPL|SPL|SIL|DIL|R8B-15B
473 //   - Force REX prefix
474 // If the operand is AH|BH|CH|DH
475 //   - patch its index from 0..3 to 4..7 as encoded by X86.
476 //   - Disallow REX prefix.
477 #define FIXUP_GPB(REG_OP, REG_ID)                           \
478   do {                                                      \
479     if (!static_cast<const Gp&>(REG_OP).isGpbHi()) {        \
480       options |= (REG_ID >= 4) ? uint32_t(Inst::kOptionRex) \
481                                : uint32_t(0);               \
482     }                                                       \
483     else {                                                  \
484       options |= Inst::_kOptionInvalidRex;                  \
485       REG_ID += 4;                                          \
486     }                                                       \
487   } while (0)
488 
489 #define ENC_OPS1(OP0)                ((Operand::kOp##OP0))
490 #define ENC_OPS2(OP0, OP1)           ((Operand::kOp##OP0) + ((Operand::kOp##OP1) << 3))
491 #define ENC_OPS3(OP0, OP1, OP2)      ((Operand::kOp##OP0) + ((Operand::kOp##OP1) << 3) + ((Operand::kOp##OP2) << 6))
492 #define ENC_OPS4(OP0, OP1, OP2, OP3) ((Operand::kOp##OP0) + ((Operand::kOp##OP1) << 3) + ((Operand::kOp##OP2) << 6) + ((Operand::kOp##OP3) << 9))
493 
494 // ============================================================================
495 // [asmjit::x86::Assembler - Movabs Heuristics]
496 // ============================================================================
497 
x86GetMovAbsInstSize64Bit(uint32_t regSize,uint32_t options,const Mem & rmRel)498 static ASMJIT_INLINE uint32_t x86GetMovAbsInstSize64Bit(uint32_t regSize, uint32_t options, const Mem& rmRel) noexcept {
499   uint32_t segmentPrefixSize = rmRel.segmentId() != 0;
500   uint32_t _66hPrefixSize = regSize == 2;
501   uint32_t rexPrefixSize = (regSize == 8) || ((options & Inst::kOptionRex) != 0);
502   uint32_t opCodeByteSize = 1;
503   uint32_t immediateSize = 8;
504 
505   return segmentPrefixSize + _66hPrefixSize + rexPrefixSize + opCodeByteSize + immediateSize;
506 }
507 
x86ShouldUseMovabs(Assembler * self,X86BufferWriter & writer,uint32_t regSize,uint32_t options,const Mem & rmRel)508 static ASMJIT_INLINE bool x86ShouldUseMovabs(Assembler* self, X86BufferWriter& writer, uint32_t regSize, uint32_t options, const Mem& rmRel) noexcept {
509   if (self->is32Bit()) {
510     // There is no relative addressing, just decide whether to use MOV encoded with MOD R/M or absolute.
511     return !(options & Inst::kOptionModMR);
512   }
513   else {
514     // If the addressing type is REL or MOD R/M was specified then absolute mov won't be used.
515     if (rmRel.addrType() == Mem::kAddrTypeRel || (options & Inst::kOptionModMR) != 0)
516       return false;
517 
518     int64_t addrValue = rmRel.offset();
519     uint64_t baseAddress = self->code()->baseAddress();
520 
521     // If the address type is default, it means to basically check whether relative addressing is possible. However,
522     // this is only possible when the base address is known - relative encoding uses RIP+N it has to be calculated.
523     if (rmRel.addrType() == Mem::kAddrTypeDefault && baseAddress != Globals::kNoBaseAddress && !rmRel.hasSegment()) {
524       uint32_t instructionSize = x86GetMovAbsInstSize64Bit(regSize, options, rmRel);
525       uint64_t virtualOffset = uint64_t(writer.offsetFrom(self->_bufferData));
526       uint64_t rip64 = baseAddress + self->_section->offset() + virtualOffset + instructionSize;
527       uint64_t rel64 = uint64_t(addrValue) - rip64;
528 
529       if (Support::isInt32(int64_t(rel64)))
530         return false;
531     }
532     else {
533       if (Support::isInt32(addrValue))
534         return false;
535     }
536 
537     return uint64_t(addrValue) > 0xFFFFFFFFu;
538   }
539 }
540 
541 // ============================================================================
542 // [asmjit::x86::Assembler - Construction / Destruction]
543 // ============================================================================
544 
Assembler(CodeHolder * code)545 Assembler::Assembler(CodeHolder* code) noexcept : BaseAssembler() {
546   if (code)
547     code->attach(this);
548 }
~Assembler()549 Assembler::~Assembler() noexcept {}
550 
551 // ============================================================================
552 // [asmjit::x86::Assembler - Emit (Low-Level)]
553 // ============================================================================
554 
_emit(uint32_t instId,const Operand_ & o0,const Operand_ & o1,const Operand_ & o2,const Operand_ * opExt)555 ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_* opExt) {
556   constexpr uint32_t kVSHR_W     = Opcode::kW_Shift  - 23;
557   constexpr uint32_t kVSHR_PP    = Opcode::kPP_Shift - 16;
558   constexpr uint32_t kVSHR_PP_EW = Opcode::kPP_Shift - 16;
559 
560   constexpr uint32_t kRequiresSpecialHandling =
561     uint32_t(Inst::kOptionReserved) | // Logging/Validation/Error.
562     uint32_t(Inst::kOptionRep     ) | // REP/REPE prefix.
563     uint32_t(Inst::kOptionRepne   ) | // REPNE prefix.
564     uint32_t(Inst::kOptionLock    ) | // LOCK prefix.
565     uint32_t(Inst::kOptionXAcquire) | // XACQUIRE prefix.
566     uint32_t(Inst::kOptionXRelease) ; // XRELEASE prefix.
567 
568   Error err;
569 
570   Opcode opcode;                   // Instruction opcode.
571   uint32_t options;                // Instruction options.
572   uint32_t isign3;                 // A combined signature of first 3 operands.
573 
574   const Operand_* rmRel;           // Memory operand or operand that holds Label|Imm.
575   uint32_t rmInfo;                 // Memory operand's info based on x86MemInfo.
576   uint32_t rbReg;                  // Memory base or modRM register.
577   uint32_t rxReg;                  // Memory index register.
578   uint32_t opReg;                  // ModR/M opcode or register id.
579 
580   LabelEntry* label;               // Label entry.
581   RelocEntry* re = nullptr;        // Relocation entry.
582   int32_t relOffset;               // Relative offset
583   FastUInt8 relSize = 0;           // Relative size.
584   uint8_t* memOpAOMark = nullptr;  // Marker that points before 'address-override prefix' is emitted.
585 
586   int64_t immValue = 0;            // Immediate value (must be 64-bit).
587   FastUInt8 immSize = 0;           // Immediate size.
588 
589   X86BufferWriter writer(this);
590 
591   if (instId >= Inst::_kIdCount)
592     instId = 0;
593 
594   const InstDB::InstInfo* instInfo = &InstDB::_instInfoTable[instId];
595   const InstDB::CommonInfo* commonInfo = &instInfo->commonInfo();
596 
597   // Signature of the first 3 operands.
598   isign3 = o0.opType() + (o1.opType() << 3) + (o2.opType() << 6);
599 
600   // Combine all instruction options and also check whether the instruction
601   // is valid. All options that require special handling (including invalid
602   // instruction) are handled by the next branch.
603   options  = uint32_t(instId == 0);
604   options |= uint32_t((size_t)(_bufferEnd - writer.cursor()) < 16);
605   options |= uint32_t(instOptions() | forcedInstOptions());
606 
607   // Handle failure and rare cases first.
608   if (ASMJIT_UNLIKELY(options & kRequiresSpecialHandling)) {
609     if (ASMJIT_UNLIKELY(!_code))
610       return reportError(DebugUtils::errored(kErrorNotInitialized));
611 
612     // Unknown instruction.
613     if (ASMJIT_UNLIKELY(instId == 0))
614       goto InvalidInstruction;
615 
616     // Grow request, happens rarely.
617     err = writer.ensureSpace(this, 16);
618     if (ASMJIT_UNLIKELY(err))
619       goto Failed;
620 
621 #ifndef ASMJIT_NO_VALIDATION
622     // Strict validation.
623     if (hasValidationOption(kValidationOptionAssembler)) {
624       Operand_ opArray[Globals::kMaxOpCount];
625       EmitterUtils::opArrayFromEmitArgs(opArray, o0, o1, o2, opExt);
626 
627       err = InstAPI::validate(arch(), BaseInst(instId, options, _extraReg), opArray, Globals::kMaxOpCount);
628       if (ASMJIT_UNLIKELY(err))
629         goto Failed;
630     }
631 #endif
632 
633     uint32_t iFlags = instInfo->flags();
634 
635     // LOCK, XACQUIRE, and XRELEASE prefixes.
636     if (options & Inst::kOptionLock) {
637       bool xAcqRel = (options & (Inst::kOptionXAcquire | Inst::kOptionXRelease)) != 0;
638 
639       if (ASMJIT_UNLIKELY(!(iFlags & (InstDB::kFlagLock)) && !xAcqRel))
640         goto InvalidLockPrefix;
641 
642       if (xAcqRel) {
643         if (ASMJIT_UNLIKELY((options & Inst::kOptionXAcquire) && !(iFlags & InstDB::kFlagXAcquire)))
644           goto InvalidXAcquirePrefix;
645 
646         if (ASMJIT_UNLIKELY((options & Inst::kOptionXRelease) && !(iFlags & InstDB::kFlagXRelease)))
647           goto InvalidXReleasePrefix;
648 
649         writer.emit8((options & Inst::kOptionXAcquire) ? 0xF2 : 0xF3);
650       }
651 
652       writer.emit8(0xF0);
653     }
654 
655     // REP and REPNE prefixes.
656     if (options & (Inst::kOptionRep | Inst::kOptionRepne)) {
657       if (ASMJIT_UNLIKELY(!(iFlags & InstDB::kFlagRep)))
658         goto InvalidRepPrefix;
659 
660       if (_extraReg.isReg() && ASMJIT_UNLIKELY(_extraReg.group() != Reg::kGroupGp || _extraReg.id() != Gp::kIdCx))
661         goto InvalidRepPrefix;
662 
663       writer.emit8((options & Inst::kOptionRepne) ? 0xF2 : 0xF3);
664     }
665   }
666 
667   // This sequence seems to be the fastest.
668   opcode = InstDB::_mainOpcodeTable[instInfo->_mainOpcodeIndex];
669   opReg = opcode.extractModO();
670   rbReg = 0;
671   opcode |= instInfo->_mainOpcodeValue;
672 
673   // --------------------------------------------------------------------------
674   // [Encoding Scope]
675   // --------------------------------------------------------------------------
676 
677   // How it works? Each case here represents a unique encoding of a group of
678   // instructions, which is handled separately. The handlers check instruction
679   // signature, possibly register types, etc, and process this information by
680   // writing some bits to opcode, opReg/rbReg, immValue/immSize, etc, and then
681   // at the end of the sequence it uses goto to jump into a lower level handler,
682   // that actually encodes the instruction.
683 
684   switch (instInfo->_encoding) {
685     case InstDB::kEncodingNone:
686       goto EmitDone;
687 
688     // ------------------------------------------------------------------------
689     // [X86]
690     // ------------------------------------------------------------------------
691 
692     case InstDB::kEncodingX86Op:
693       goto EmitX86Op;
694 
695     case InstDB::kEncodingX86Op_Mod11RM:
696       rbReg = opcode.extractModRM();
697       goto EmitX86R;
698 
699     case InstDB::kEncodingX86Op_Mod11RM_I8:
700       // The first operand must be immediate, we don't care of other operands as they could be implicit.
701       if (!o0.isImm())
702         goto InvalidInstruction;
703 
704       rbReg = opcode.extractModRM();
705       immValue = o0.as<Imm>().valueAs<uint8_t>();
706       immSize = 1;
707       goto EmitX86R;
708 
709     case InstDB::kEncodingX86Op_xAddr:
710       if (ASMJIT_UNLIKELY(!o0.isReg()))
711         goto InvalidInstruction;
712 
713       rmInfo = x86MemInfo[o0.as<Reg>().type()];
714       writer.emitAddressOverride((rmInfo & _addressOverrideMask()) != 0);
715       goto EmitX86Op;
716 
717     case InstDB::kEncodingX86Op_xAX:
718       if (isign3 == 0)
719         goto EmitX86Op;
720 
721       if (isign3 == ENC_OPS1(Reg) && o0.id() == Gp::kIdAx)
722         goto EmitX86Op;
723       break;
724 
725     case InstDB::kEncodingX86Op_xDX_xAX:
726       if (isign3 == 0)
727         goto EmitX86Op;
728 
729       if (isign3 == ENC_OPS2(Reg, Reg) && o0.id() == Gp::kIdDx && o1.id() == Gp::kIdAx)
730         goto EmitX86Op;
731       break;
732 
733     case InstDB::kEncodingX86Op_MemZAX:
734       if (isign3 == 0)
735         goto EmitX86Op;
736 
737       rmRel = &o0;
738       if (isign3 == ENC_OPS1(Mem) && x86IsImplicitMem(o0, Gp::kIdAx))
739         goto EmitX86OpImplicitMem;
740 
741       break;
742 
743     case InstDB::kEncodingX86I_xAX:
744       // Implicit form.
745       if (isign3 == ENC_OPS1(Imm)) {
746         immValue = o0.as<Imm>().valueAs<uint8_t>();
747         immSize = 1;
748         goto EmitX86Op;
749       }
750 
751       // Explicit form.
752       if (isign3 == ENC_OPS2(Reg, Imm) && o0.id() == Gp::kIdAx) {
753         immValue = o1.as<Imm>().valueAs<uint8_t>();
754         immSize = 1;
755         goto EmitX86Op;
756       }
757       break;
758 
759     case InstDB::kEncodingX86M_NoMemSize:
760       if (o0.isReg())
761         opcode.addPrefixBySize(o0.size());
762       goto CaseX86M_NoSize;
763 
764     case InstDB::kEncodingX86M:
765       opcode.addPrefixBySize(o0.size());
766       ASMJIT_FALLTHROUGH;
767 
768     case InstDB::kEncodingX86M_NoSize:
769 CaseX86M_NoSize:
770       rbReg = o0.id();
771       if (isign3 == ENC_OPS1(Reg))
772         goto EmitX86R;
773 
774       rmRel = &o0;
775       if (isign3 == ENC_OPS1(Mem))
776         goto EmitX86M;
777       break;
778 
779     case InstDB::kEncodingX86M_GPB_MulDiv:
780 CaseX86M_GPB_MulDiv:
781       // Explicit form?
782       if (isign3 > 0x7) {
783         // [AX] <- [AX] div|mul r8.
784         if (isign3 == ENC_OPS2(Reg, Reg)) {
785           if (ASMJIT_UNLIKELY(!Reg::isGpw(o0, Gp::kIdAx) || !Reg::isGpb(o1)))
786             goto InvalidInstruction;
787 
788           rbReg = o1.id();
789           FIXUP_GPB(o1, rbReg);
790           goto EmitX86R;
791         }
792 
793         // [AX] <- [AX] div|mul m8.
794         if (isign3 == ENC_OPS2(Reg, Mem)) {
795           if (ASMJIT_UNLIKELY(!Reg::isGpw(o0, Gp::kIdAx)))
796             goto InvalidInstruction;
797 
798           rmRel = &o1;
799           goto EmitX86M;
800         }
801 
802         // [?DX:?AX] <- [?DX:?AX] div|mul r16|r32|r64
803         if (isign3 == ENC_OPS3(Reg, Reg, Reg)) {
804           if (ASMJIT_UNLIKELY(o0.size() != o1.size()))
805             goto InvalidInstruction;
806 
807           opcode.addArithBySize(o0.size());
808           rbReg = o2.id();
809           goto EmitX86R;
810         }
811 
812         // [?DX:?AX] <- [?DX:?AX] div|mul m16|m32|m64
813         if (isign3 == ENC_OPS3(Reg, Reg, Mem)) {
814           if (ASMJIT_UNLIKELY(o0.size() != o1.size()))
815             goto InvalidInstruction;
816 
817           opcode.addArithBySize(o0.size());
818           rmRel = &o2;
819           goto EmitX86M;
820         }
821 
822         goto InvalidInstruction;
823       }
824 
825       ASMJIT_FALLTHROUGH;
826 
827     case InstDB::kEncodingX86M_GPB:
828       if (isign3 == ENC_OPS1(Reg)) {
829         opcode.addArithBySize(o0.size());
830         rbReg = o0.id();
831 
832         if (o0.size() != 1)
833           goto EmitX86R;
834 
835         FIXUP_GPB(o0, rbReg);
836         goto EmitX86R;
837       }
838 
839       if (isign3 == ENC_OPS1(Mem)) {
840         if (ASMJIT_UNLIKELY(o0.size() == 0))
841           goto AmbiguousOperandSize;
842 
843         opcode.addArithBySize(o0.size());
844         rmRel = &o0;
845         goto EmitX86M;
846       }
847       break;
848 
849     case InstDB::kEncodingX86M_Only_EDX_EAX:
850       if (isign3 == ENC_OPS3(Mem, Reg, Reg) && Reg::isGpd(o1, Gp::kIdDx) && Reg::isGpd(o2, Gp::kIdAx)) {
851         rmRel = &o0;
852         goto EmitX86M;
853       }
854       ASMJIT_FALLTHROUGH;
855 
856     case InstDB::kEncodingX86M_Only:
857       if (isign3 == ENC_OPS1(Mem)) {
858         rmRel = &o0;
859         goto EmitX86M;
860       }
861       break;
862 
863     case InstDB::kEncodingX86M_Nop:
864       if (isign3 == ENC_OPS1(None))
865         goto EmitX86Op;
866 
867       // Single operand NOP instruction "0F 1F /0".
868       opcode = Opcode::k000F00 | 0x1F;
869       opReg = 0;
870 
871       if (isign3 == ENC_OPS1(Reg)) {
872         opcode.addPrefixBySize(o0.size());
873         rbReg = o0.id();
874         goto EmitX86R;
875       }
876 
877       if (isign3 == ENC_OPS1(Mem)) {
878         opcode.addPrefixBySize(o0.size());
879         rmRel = &o0;
880         goto EmitX86M;
881       }
882 
883       // Two operand NOP instruction "0F 1F /r".
884       opReg = o1.id();
885       opcode.addPrefixBySize(o1.size());
886 
887       if (isign3 == ENC_OPS2(Reg, Reg)) {
888         rbReg = o0.id();
889         goto EmitX86R;
890       }
891 
892       if (isign3 == ENC_OPS2(Mem, Reg)) {
893         rmRel = &o0;
894         goto EmitX86M;
895       }
896       break;
897 
898     case InstDB::kEncodingX86R_FromM:
899       if (isign3 == ENC_OPS1(Mem)) {
900         rmRel = &o0;
901         rbReg = o0.id();
902         goto EmitX86RFromM;
903       }
904       break;
905 
906     case InstDB::kEncodingX86R32_EDX_EAX:
907       // Explicit form: R32, EDX, EAX.
908       if (isign3 == ENC_OPS3(Reg, Reg, Reg)) {
909         if (!Reg::isGpd(o1, Gp::kIdDx) || !Reg::isGpd(o2, Gp::kIdAx))
910           goto InvalidInstruction;
911         rbReg = o0.id();
912         goto EmitX86R;
913       }
914 
915       // Implicit form: R32.
916       if (isign3 == ENC_OPS1(Reg)) {
917         if (!Reg::isGpd(o0))
918           goto InvalidInstruction;
919         rbReg = o0.id();
920         goto EmitX86R;
921       }
922       break;
923 
924     case InstDB::kEncodingX86R_Native:
925       if (isign3 == ENC_OPS1(Reg)) {
926         rbReg = o0.id();
927         goto EmitX86R;
928       }
929       break;
930 
931     case InstDB::kEncodingX86Rm:
932       opcode.addPrefixBySize(o0.size());
933       ASMJIT_FALLTHROUGH;
934 
935     case InstDB::kEncodingX86Rm_NoSize:
936       if (isign3 == ENC_OPS2(Reg, Reg)) {
937         opReg = o0.id();
938         rbReg = o1.id();
939         goto EmitX86R;
940       }
941 
942       if (isign3 == ENC_OPS2(Reg, Mem)) {
943         opReg = o0.id();
944         rmRel = &o1;
945         goto EmitX86M;
946       }
947       break;
948 
949     case InstDB::kEncodingX86Rm_Raw66H:
950       // We normally emit either [66|F2|F3], this instruction requires 66+[F2|F3].
951       if (isign3 == ENC_OPS2(Reg, Reg)) {
952         opReg = o0.id();
953         rbReg = o1.id();
954 
955         if (o0.size() == 2)
956           writer.emit8(0x66);
957         else
958           opcode.addWBySize(o0.size());
959         goto EmitX86R;
960       }
961 
962       if (isign3 == ENC_OPS2(Reg, Mem)) {
963         opReg = o0.id();
964         rmRel = &o1;
965 
966         if (o0.size() == 2)
967           writer.emit8(0x66);
968         else
969           opcode.addWBySize(o0.size());
970         goto EmitX86M;
971       }
972       break;
973 
974     case InstDB::kEncodingX86Mr:
975       opcode.addPrefixBySize(o0.size());
976       ASMJIT_FALLTHROUGH;
977 
978     case InstDB::kEncodingX86Mr_NoSize:
979       if (isign3 == ENC_OPS2(Reg, Reg)) {
980         rbReg = o0.id();
981         opReg = o1.id();
982         goto EmitX86R;
983       }
984 
985       if (isign3 == ENC_OPS2(Mem, Reg)) {
986         rmRel = &o0;
987         opReg = o1.id();
988         goto EmitX86M;
989       }
990       break;
991 
992     case InstDB::kEncodingX86Arith:
993       if (isign3 == ENC_OPS2(Reg, Reg)) {
994         opcode.addArithBySize(o0.size());
995 
996         if (o0.size() != o1.size())
997           goto OperandSizeMismatch;
998 
999         rbReg = o0.id();
1000         opReg = o1.id();
1001 
1002         if (o0.size() == 1) {
1003           FIXUP_GPB(o0, rbReg);
1004           FIXUP_GPB(o1, opReg);
1005         }
1006 
1007         // MOD/MR: The default encoding used if not instructed otherwise..
1008         if (!(options & Inst::kOptionModRM))
1009           goto EmitX86R;
1010 
1011         // MOD/RM: Alternative encoding selected via instruction options.
1012         opcode += 2;
1013         std::swap(opReg, rbReg);
1014         goto EmitX86R;
1015       }
1016 
1017       if (isign3 == ENC_OPS2(Reg, Mem)) {
1018         opcode += 2;
1019         opcode.addArithBySize(o0.size());
1020 
1021         opReg = o0.id();
1022         rmRel = &o1;
1023 
1024         if (o0.size() != 1)
1025           goto EmitX86M;
1026 
1027         FIXUP_GPB(o0, opReg);
1028         goto EmitX86M;
1029       }
1030 
1031       if (isign3 == ENC_OPS2(Mem, Reg)) {
1032         opcode.addArithBySize(o1.size());
1033         opReg = o1.id();
1034         rmRel = &o0;
1035 
1036         if (o1.size() != 1)
1037           goto EmitX86M;
1038 
1039         FIXUP_GPB(o1, opReg);
1040         goto EmitX86M;
1041       }
1042 
1043       // The remaining instructions use 0x80 opcode.
1044       opcode = 0x80;
1045 
1046       if (isign3 == ENC_OPS2(Reg, Imm)) {
1047         uint32_t size = o0.size();
1048 
1049         rbReg = o0.id();
1050         immValue = o1.as<Imm>().value();
1051 
1052         if (size == 1) {
1053           FIXUP_GPB(o0, rbReg);
1054           immSize = 1;
1055         }
1056         else {
1057           if (size == 2) {
1058             opcode |= Opcode::kPP_66;
1059           }
1060           else if (size == 4) {
1061             // Sign extend so isInt8 returns the right result.
1062             immValue = x86SignExtendI32<int64_t>(immValue);
1063           }
1064           else if (size == 8) {
1065             bool canTransformTo32Bit = instId == Inst::kIdAnd && Support::isUInt32(immValue);
1066 
1067             if (!Support::isInt32(immValue)) {
1068               // We would do this by default when `kOptionOptimizedForSize` is
1069               // enabled, however, in this case we just force this as otherwise
1070               // we would have to fail.
1071               if (canTransformTo32Bit)
1072                 size = 4;
1073               else
1074                 goto InvalidImmediate;
1075             }
1076             else if (canTransformTo32Bit && hasEncodingOption(kEncodingOptionOptimizeForSize)) {
1077               size = 4;
1078             }
1079 
1080             opcode.addWBySize(size);
1081           }
1082 
1083           immSize = FastUInt8(Support::min<uint32_t>(size, 4));
1084           if (Support::isInt8(immValue) && !(options & Inst::kOptionLongForm))
1085             immSize = 1;
1086         }
1087 
1088         // Short form - AL, AX, EAX, RAX.
1089         if (rbReg == 0 && (size == 1 || immSize != 1) && !(options & Inst::kOptionLongForm)) {
1090           opcode &= Opcode::kPP_66 | Opcode::kW;
1091           opcode |= ((opReg << 3) | (0x04 + (size != 1)));
1092           immSize = FastUInt8(Support::min<uint32_t>(size, 4));
1093           goto EmitX86Op;
1094         }
1095 
1096         opcode += size != 1 ? (immSize != 1 ? 1 : 3) : 0;
1097         goto EmitX86R;
1098       }
1099 
1100       if (isign3 == ENC_OPS2(Mem, Imm)) {
1101         uint32_t memSize = o0.size();
1102 
1103         if (ASMJIT_UNLIKELY(memSize == 0))
1104           goto AmbiguousOperandSize;
1105 
1106         immValue = o1.as<Imm>().value();
1107         immSize = FastUInt8(Support::min<uint32_t>(memSize, 4));
1108 
1109         // Sign extend so isInt8 returns the right result.
1110         if (memSize == 4)
1111           immValue = x86SignExtendI32<int64_t>(immValue);
1112 
1113         if (Support::isInt8(immValue) && !(options & Inst::kOptionLongForm))
1114           immSize = 1;
1115 
1116         opcode += memSize != 1 ? (immSize != 1 ? 1 : 3) : 0;
1117         opcode.addPrefixBySize(memSize);
1118 
1119         rmRel = &o0;
1120         goto EmitX86M;
1121       }
1122       break;
1123 
1124     case InstDB::kEncodingX86Bswap:
1125       if (isign3 == ENC_OPS1(Reg)) {
1126         if (ASMJIT_UNLIKELY(o0.size() == 1))
1127           goto InvalidInstruction;
1128 
1129         opReg = o0.id();
1130         opcode.addPrefixBySize(o0.size());
1131         goto EmitX86OpReg;
1132       }
1133       break;
1134 
1135     case InstDB::kEncodingX86Bt:
1136       if (isign3 == ENC_OPS2(Reg, Reg)) {
1137         opcode.addPrefixBySize(o1.size());
1138         opReg = o1.id();
1139         rbReg = o0.id();
1140         goto EmitX86R;
1141       }
1142 
1143       if (isign3 == ENC_OPS2(Mem, Reg)) {
1144         opcode.addPrefixBySize(o1.size());
1145         opReg = o1.id();
1146         rmRel = &o0;
1147         goto EmitX86M;
1148       }
1149 
1150       // The remaining instructions use the secondary opcode/r.
1151       immValue = o1.as<Imm>().value();
1152       immSize = 1;
1153 
1154       opcode = x86AltOpcodeOf(instInfo);
1155       opcode.addPrefixBySize(o0.size());
1156       opReg = opcode.extractModO();
1157 
1158       if (isign3 == ENC_OPS2(Reg, Imm)) {
1159         rbReg = o0.id();
1160         goto EmitX86R;
1161       }
1162 
1163       if (isign3 == ENC_OPS2(Mem, Imm)) {
1164         if (ASMJIT_UNLIKELY(o0.size() == 0))
1165           goto AmbiguousOperandSize;
1166 
1167         rmRel = &o0;
1168         goto EmitX86M;
1169       }
1170       break;
1171 
1172     case InstDB::kEncodingX86Call:
1173       if (isign3 == ENC_OPS1(Reg)) {
1174         rbReg = o0.id();
1175         goto EmitX86R;
1176       }
1177 
1178       rmRel = &o0;
1179       if (isign3 == ENC_OPS1(Mem))
1180         goto EmitX86M;
1181 
1182       // Call with 32-bit displacement use 0xE8 opcode. Call with 8-bit
1183       // displacement is not encodable so the alternative opcode field
1184       // in X86DB must be zero.
1185       opcode = 0xE8;
1186       opReg = 0;
1187       goto EmitJmpCall;
1188 
1189     case InstDB::kEncodingX86Cmpxchg: {
1190       // Convert explicit to implicit.
1191       if (isign3 & (0x7 << 6)) {
1192         if (!Reg::isGp(o2) || o2.id() != Gp::kIdAx)
1193           goto InvalidInstruction;
1194         isign3 &= 0x3F;
1195       }
1196 
1197       if (isign3 == ENC_OPS2(Reg, Reg)) {
1198         if (o0.size() != o1.size())
1199           goto OperandSizeMismatch;
1200 
1201         opcode.addArithBySize(o0.size());
1202         rbReg = o0.id();
1203         opReg = o1.id();
1204 
1205         if (o0.size() != 1)
1206           goto EmitX86R;
1207 
1208         FIXUP_GPB(o0, rbReg);
1209         FIXUP_GPB(o1, opReg);
1210         goto EmitX86R;
1211       }
1212 
1213       if (isign3 == ENC_OPS2(Mem, Reg)) {
1214         opcode.addArithBySize(o1.size());
1215         opReg = o1.id();
1216         rmRel = &o0;
1217 
1218         if (o1.size() != 1)
1219           goto EmitX86M;
1220 
1221         FIXUP_GPB(o1, opReg);
1222         goto EmitX86M;
1223       }
1224       break;
1225     }
1226 
1227     case InstDB::kEncodingX86Cmpxchg8b_16b: {
1228       const Operand_& o3 = opExt[EmitterUtils::kOp3];
1229       const Operand_& o4 = opExt[EmitterUtils::kOp4];
1230 
1231       if (isign3 == ENC_OPS3(Mem, Reg, Reg)) {
1232         if (o3.isReg() && o4.isReg()) {
1233           rmRel = &o0;
1234           goto EmitX86M;
1235         }
1236       }
1237 
1238       if (isign3 == ENC_OPS1(Mem)) {
1239         rmRel = &o0;
1240         goto EmitX86M;
1241       }
1242       break;
1243     }
1244 
1245     case InstDB::kEncodingX86Crc:
1246       opReg = o0.id();
1247       opcode.addWBySize(o0.size());
1248 
1249       if (isign3 == ENC_OPS2(Reg, Reg)) {
1250         rbReg = o1.id();
1251 
1252         if (o1.size() == 1) {
1253           FIXUP_GPB(o1, rbReg);
1254           goto EmitX86R;
1255         }
1256         else {
1257           // This seems to be the only exception of encoding '66F2' prefix.
1258           if (o1.size() == 2) writer.emit8(0x66);
1259 
1260           opcode.add(1);
1261           goto EmitX86R;
1262         }
1263       }
1264 
1265       if (isign3 == ENC_OPS2(Reg, Mem)) {
1266         rmRel = &o1;
1267         if (o1.size() == 0)
1268           goto AmbiguousOperandSize;
1269 
1270         // This seems to be the only exception of encoding '66F2' prefix.
1271         if (o1.size() == 2) writer.emit8(0x66);
1272 
1273         opcode += o1.size() != 1;
1274         goto EmitX86M;
1275       }
1276       break;
1277 
1278     case InstDB::kEncodingX86Enter:
1279       if (isign3 == ENC_OPS2(Imm, Imm)) {
1280         uint32_t iw = o0.as<Imm>().valueAs<uint16_t>();
1281         uint32_t ib = o1.as<Imm>().valueAs<uint8_t>();
1282 
1283         immValue = iw | (ib << 16);
1284         immSize = 3;
1285         goto EmitX86Op;
1286       }
1287       break;
1288 
1289     case InstDB::kEncodingX86Imul:
1290       // First process all forms distinct of `kEncodingX86M_OptB_MulDiv`.
1291       if (isign3 == ENC_OPS3(Reg, Reg, Imm)) {
1292         opcode = 0x6B;
1293         opcode.addPrefixBySize(o0.size());
1294 
1295         immValue = o2.as<Imm>().value();
1296         immSize = 1;
1297 
1298         if (!Support::isInt8(immValue) || (options & Inst::kOptionLongForm)) {
1299           opcode -= 2;
1300           immSize = o0.size() == 2 ? 2 : 4;
1301         }
1302 
1303         opReg = o0.id();
1304         rbReg = o1.id();
1305 
1306         goto EmitX86R;
1307       }
1308 
1309       if (isign3 == ENC_OPS3(Reg, Mem, Imm)) {
1310         opcode = 0x6B;
1311         opcode.addPrefixBySize(o0.size());
1312 
1313         immValue = o2.as<Imm>().value();
1314         immSize = 1;
1315 
1316         // Sign extend so isInt8 returns the right result.
1317         if (o0.size() == 4)
1318           immValue = x86SignExtendI32<int64_t>(immValue);
1319 
1320         if (!Support::isInt8(immValue) || (options & Inst::kOptionLongForm)) {
1321           opcode -= 2;
1322           immSize = o0.size() == 2 ? 2 : 4;
1323         }
1324 
1325         opReg = o0.id();
1326         rmRel = &o1;
1327 
1328         goto EmitX86M;
1329       }
1330 
1331       if (isign3 == ENC_OPS2(Reg, Reg)) {
1332         // Must be explicit 'ax, r8' form.
1333         if (o1.size() == 1)
1334           goto CaseX86M_GPB_MulDiv;
1335 
1336         if (o0.size() != o1.size())
1337           goto OperandSizeMismatch;
1338 
1339         opReg = o0.id();
1340         rbReg = o1.id();
1341 
1342         opcode = Opcode::k000F00 | 0xAF;
1343         opcode.addPrefixBySize(o0.size());
1344         goto EmitX86R;
1345       }
1346 
1347       if (isign3 == ENC_OPS2(Reg, Mem)) {
1348         // Must be explicit 'ax, m8' form.
1349         if (o1.size() == 1)
1350           goto CaseX86M_GPB_MulDiv;
1351 
1352         opReg = o0.id();
1353         rmRel = &o1;
1354 
1355         opcode = Opcode::k000F00 | 0xAF;
1356         opcode.addPrefixBySize(o0.size());
1357         goto EmitX86M;
1358       }
1359 
1360       // Shorthand to imul 'reg, reg, imm'.
1361       if (isign3 == ENC_OPS2(Reg, Imm)) {
1362         opcode = 0x6B;
1363         opcode.addPrefixBySize(o0.size());
1364 
1365         immValue = o1.as<Imm>().value();
1366         immSize = 1;
1367 
1368         // Sign extend so isInt8 returns the right result.
1369         if (o0.size() == 4)
1370           immValue = x86SignExtendI32<int64_t>(immValue);
1371 
1372         if (!Support::isInt8(immValue) || (options & Inst::kOptionLongForm)) {
1373           opcode -= 2;
1374           immSize = o0.size() == 2 ? 2 : 4;
1375         }
1376 
1377         opReg = rbReg = o0.id();
1378         goto EmitX86R;
1379       }
1380 
1381       // Try implicit form.
1382       goto CaseX86M_GPB_MulDiv;
1383 
1384     case InstDB::kEncodingX86In:
1385       if (isign3 == ENC_OPS2(Reg, Imm)) {
1386         if (ASMJIT_UNLIKELY(o0.id() != Gp::kIdAx))
1387           goto InvalidInstruction;
1388 
1389         immValue = o1.as<Imm>().valueAs<uint8_t>();
1390         immSize = 1;
1391 
1392         opcode = x86AltOpcodeOf(instInfo) + (o0.size() != 1);
1393         opcode.add66hBySize(o0.size());
1394         goto EmitX86Op;
1395       }
1396 
1397       if (isign3 == ENC_OPS2(Reg, Reg)) {
1398         if (ASMJIT_UNLIKELY(o0.id() != Gp::kIdAx || o1.id() != Gp::kIdDx))
1399           goto InvalidInstruction;
1400 
1401         opcode += o0.size() != 1;
1402         opcode.add66hBySize(o0.size());
1403         goto EmitX86Op;
1404       }
1405       break;
1406 
1407     case InstDB::kEncodingX86Ins:
1408       if (isign3 == ENC_OPS2(Mem, Reg)) {
1409         if (ASMJIT_UNLIKELY(!x86IsImplicitMem(o0, Gp::kIdDi) || o1.id() != Gp::kIdDx))
1410           goto InvalidInstruction;
1411 
1412         uint32_t size = o0.size();
1413         if (ASMJIT_UNLIKELY(size == 0))
1414           goto AmbiguousOperandSize;
1415 
1416         rmRel = &o0;
1417         opcode += (size != 1);
1418 
1419         opcode.add66hBySize(size);
1420         goto EmitX86OpImplicitMem;
1421       }
1422       break;
1423 
1424     case InstDB::kEncodingX86IncDec:
1425       if (isign3 == ENC_OPS1(Reg)) {
1426         rbReg = o0.id();
1427 
1428         if (o0.size() == 1) {
1429           FIXUP_GPB(o0, rbReg);
1430           goto EmitX86R;
1431         }
1432 
1433         if (is32Bit()) {
1434           // INC r16|r32 is only encodable in 32-bit mode (collides with REX).
1435           opcode = x86AltOpcodeOf(instInfo) + (rbReg & 0x07);
1436           opcode.add66hBySize(o0.size());
1437           goto EmitX86Op;
1438         }
1439         else {
1440           opcode.addArithBySize(o0.size());
1441           goto EmitX86R;
1442         }
1443       }
1444 
1445       if (isign3 == ENC_OPS1(Mem)) {
1446         if (!o0.size())
1447           goto AmbiguousOperandSize;
1448         opcode.addArithBySize(o0.size());
1449         rmRel = &o0;
1450         goto EmitX86M;
1451       }
1452       break;
1453 
1454     case InstDB::kEncodingX86Int:
1455       if (isign3 == ENC_OPS1(Imm)) {
1456         immValue = o0.as<Imm>().value();
1457         immSize = 1;
1458         goto EmitX86Op;
1459       }
1460       break;
1461 
1462     case InstDB::kEncodingX86Jcc:
1463       if ((options & (Inst::kOptionTaken | Inst::kOptionNotTaken)) && hasEncodingOption(kEncodingOptionPredictedJumps)) {
1464         uint8_t prefix = (options & Inst::kOptionTaken) ? uint8_t(0x3E) : uint8_t(0x2E);
1465         writer.emit8(prefix);
1466       }
1467 
1468       rmRel = &o0;
1469       opReg = 0;
1470       goto EmitJmpCall;
1471 
1472     case InstDB::kEncodingX86JecxzLoop:
1473       rmRel = &o0;
1474       // Explicit jecxz|loop [r|e]cx, dst
1475       if (o0.isReg()) {
1476         if (ASMJIT_UNLIKELY(!Reg::isGp(o0, Gp::kIdCx)))
1477           goto InvalidInstruction;
1478 
1479         writer.emitAddressOverride((is32Bit() && o0.size() == 2) || (is64Bit() && o0.size() == 4));
1480         rmRel = &o1;
1481       }
1482 
1483       opReg = 0;
1484       goto EmitJmpCall;
1485 
1486     case InstDB::kEncodingX86Jmp:
1487       if (isign3 == ENC_OPS1(Reg)) {
1488         rbReg = o0.id();
1489         goto EmitX86R;
1490       }
1491 
1492       rmRel = &o0;
1493       if (isign3 == ENC_OPS1(Mem))
1494         goto EmitX86M;
1495 
1496       // Jump encoded with 32-bit displacement use 0xE9 opcode. Jump encoded
1497       // with 8-bit displacement's opcode is stored as an alternative opcode.
1498       opcode = 0xE9;
1499       opReg = 0;
1500       goto EmitJmpCall;
1501 
1502     case InstDB::kEncodingX86JmpRel:
1503       rmRel = &o0;
1504       goto EmitJmpCall;
1505 
1506     case InstDB::kEncodingX86LcallLjmp:
1507       if (isign3 == ENC_OPS1(Mem)) {
1508         rmRel = &o0;
1509         uint32_t mSize = rmRel->size();
1510         if (mSize == 0) {
1511           mSize = registerSize();
1512         }
1513         else {
1514           mSize -= 2;
1515           if (mSize != 2 && mSize != 4 && mSize != registerSize())
1516             goto InvalidAddress;
1517         }
1518         opcode.addPrefixBySize(mSize);
1519         goto EmitX86M;
1520       }
1521 
1522       if (isign3 == ENC_OPS2(Imm, Imm)) {
1523         if (!is32Bit())
1524           goto InvalidInstruction;
1525 
1526         const Imm& imm0 = o0.as<Imm>();
1527         const Imm& imm1 = o1.as<Imm>();
1528 
1529         if (imm0.value() > 0xFFFFu || imm1.value() > 0xFFFFFFFFu)
1530           goto InvalidImmediate;
1531 
1532         opcode = x86AltOpcodeOf(instInfo);
1533         immValue = imm1.value() | (imm0.value() << 32);
1534         immSize = 6;
1535         goto EmitX86Op;
1536       }
1537       break;
1538 
1539     case InstDB::kEncodingX86Lea:
1540       if (isign3 == ENC_OPS2(Reg, Mem)) {
1541         opcode.addPrefixBySize(o0.size());
1542         opReg = o0.id();
1543         rmRel = &o1;
1544         goto EmitX86M;
1545       }
1546       break;
1547 
1548     case InstDB::kEncodingX86Mov:
1549       // Reg <- Reg
1550       if (isign3 == ENC_OPS2(Reg, Reg)) {
1551         // Asmjit uses segment registers indexed from 1 to 6, leaving zero as
1552         // "no segment register used". We have to fix this (decrement the index
1553         // of the register) when emitting MOV instructions which move to/from
1554         // a segment register. The segment register is always `opReg`, because
1555         // the MOV instruction uses either RM or MR encoding.
1556 
1557         // GP <- ??
1558         if (Reg::isGp(o0)) {
1559           rbReg = o0.id();
1560           opReg = o1.id();
1561 
1562           // GP <- GP
1563           if (Reg::isGp(o1)) {
1564             uint32_t opSize = o0.size();
1565             if (opSize != o1.size()) {
1566               // TODO: [X86 Assembler] This is a non-standard extension, which should be removed.
1567               // We allow 'mov r64, r32' as it's basically zero-extend.
1568               if (opSize == 8 && o1.size() == 4)
1569                 opSize = 4; // Zero extend, don't promote to 64-bit.
1570               else
1571                 goto InvalidInstruction;
1572             }
1573 
1574             if (opSize == 1) {
1575               FIXUP_GPB(o0, rbReg);
1576               FIXUP_GPB(o1, opReg);
1577               opcode = 0x88;
1578 
1579               if (!(options & Inst::kOptionModRM))
1580                 goto EmitX86R;
1581 
1582               opcode += 2;
1583               std::swap(opReg, rbReg);
1584               goto EmitX86R;
1585             }
1586             else {
1587               opcode = 0x89;
1588               opcode.addPrefixBySize(opSize);
1589 
1590               if (!(options & Inst::kOptionModRM))
1591                 goto EmitX86R;
1592 
1593               opcode += 2;
1594               std::swap(opReg, rbReg);
1595               goto EmitX86R;
1596             }
1597           }
1598 
1599           // GP <- SReg
1600           if (Reg::isSReg(o1)) {
1601             opcode = 0x8C;
1602             opcode.addPrefixBySize(o0.size());
1603             opReg--;
1604             goto EmitX86R;
1605           }
1606 
1607           // GP <- CReg
1608           if (Reg::isCReg(o1)) {
1609             opcode = Opcode::k000F00 | 0x20;
1610 
1611             // Use `LOCK MOV` in 32-bit mode if CR8+ register is accessed (AMD extension).
1612             if ((opReg & 0x8) && is32Bit()) {
1613               writer.emit8(0xF0);
1614               opReg &= 0x7;
1615             }
1616             goto EmitX86R;
1617           }
1618 
1619           // GP <- DReg
1620           if (Reg::isDReg(o1)) {
1621             opcode = Opcode::k000F00 | 0x21;
1622             goto EmitX86R;
1623           }
1624         }
1625         else {
1626           opReg = o0.id();
1627           rbReg = o1.id();
1628 
1629           // ?? <- GP
1630           if (!Reg::isGp(o1))
1631             goto InvalidInstruction;
1632 
1633           // SReg <- GP
1634           if (Reg::isSReg(o0)) {
1635             opcode = 0x8E;
1636             opcode.addPrefixBySize(o1.size());
1637             opReg--;
1638             goto EmitX86R;
1639           }
1640 
1641           // CReg <- GP
1642           if (Reg::isCReg(o0)) {
1643             opcode = Opcode::k000F00 | 0x22;
1644 
1645             // Use `LOCK MOV` in 32-bit mode if CR8+ register is accessed (AMD extension).
1646             if ((opReg & 0x8) && is32Bit()) {
1647               writer.emit8(0xF0);
1648               opReg &= 0x7;
1649             }
1650             goto EmitX86R;
1651           }
1652 
1653           // DReg <- GP
1654           if (Reg::isDReg(o0)) {
1655             opcode = Opcode::k000F00 | 0x23;
1656             goto EmitX86R;
1657           }
1658         }
1659 
1660         goto InvalidInstruction;
1661       }
1662 
1663       if (isign3 == ENC_OPS2(Reg, Mem)) {
1664         opReg = o0.id();
1665         rmRel = &o1;
1666 
1667         // SReg <- Mem
1668         if (Reg::isSReg(o0)) {
1669           opcode = 0x8E;
1670           opcode.addPrefixBySize(o1.size());
1671           opReg--;
1672           goto EmitX86M;
1673         }
1674         // Reg <- Mem
1675         else {
1676           opcode = 0;
1677           opcode.addArithBySize(o0.size());
1678 
1679           // Handle a special form of `mov al|ax|eax|rax, [ptr64]` that doesn't use MOD.
1680           if (opReg == Gp::kIdAx && !rmRel->as<Mem>().hasBaseOrIndex()) {
1681             if (x86ShouldUseMovabs(this, writer, o0.size(), options, rmRel->as<Mem>())) {
1682               opcode += 0xA0;
1683               immValue = rmRel->as<Mem>().offset();
1684               goto EmitX86OpMovAbs;
1685             }
1686           }
1687 
1688           if (o0.size() == 1)
1689             FIXUP_GPB(o0, opReg);
1690 
1691           opcode += 0x8A;
1692           goto EmitX86M;
1693         }
1694       }
1695 
1696       if (isign3 == ENC_OPS2(Mem, Reg)) {
1697         opReg = o1.id();
1698         rmRel = &o0;
1699 
1700         // Mem <- SReg
1701         if (Reg::isSReg(o1)) {
1702           opcode = 0x8C;
1703           opcode.addPrefixBySize(o0.size());
1704           opReg--;
1705           goto EmitX86M;
1706         }
1707         // Mem <- Reg
1708         else {
1709           opcode = 0;
1710           opcode.addArithBySize(o1.size());
1711 
1712           // Handle a special form of `mov [ptr64], al|ax|eax|rax` that doesn't use MOD.
1713           if (opReg == Gp::kIdAx && !rmRel->as<Mem>().hasBaseOrIndex()) {
1714             if (x86ShouldUseMovabs(this, writer, o1.size(), options, rmRel->as<Mem>())) {
1715               opcode += 0xA2;
1716               immValue = rmRel->as<Mem>().offset();
1717               goto EmitX86OpMovAbs;
1718             }
1719           }
1720 
1721           if (o1.size() == 1)
1722             FIXUP_GPB(o1, opReg);
1723 
1724           opcode += 0x88;
1725           goto EmitX86M;
1726         }
1727       }
1728 
1729       if (isign3 == ENC_OPS2(Reg, Imm)) {
1730         opReg = o0.id();
1731         immSize = FastUInt8(o0.size());
1732 
1733         if (immSize == 1) {
1734           FIXUP_GPB(o0, opReg);
1735 
1736           opcode = 0xB0;
1737           immValue = o1.as<Imm>().valueAs<uint8_t>();
1738           goto EmitX86OpReg;
1739         }
1740         else {
1741           // 64-bit immediate in 64-bit mode is allowed.
1742           immValue = o1.as<Imm>().value();
1743 
1744           // Optimize the instruction size by using a 32-bit immediate if possible.
1745           if (immSize == 8 && !(options & Inst::kOptionLongForm)) {
1746             if (Support::isUInt32(immValue) && hasEncodingOption(kEncodingOptionOptimizeForSize)) {
1747               // Zero-extend by using a 32-bit GPD destination instead of a 64-bit GPQ.
1748               immSize = 4;
1749             }
1750             else if (Support::isInt32(immValue)) {
1751               // Sign-extend, uses 'C7 /0' opcode.
1752               rbReg = opReg;
1753 
1754               opcode = Opcode::kW | 0xC7;
1755               opReg = 0;
1756 
1757               immSize = 4;
1758               goto EmitX86R;
1759             }
1760           }
1761 
1762           opcode = 0xB8;
1763           opcode.addPrefixBySize(immSize);
1764           goto EmitX86OpReg;
1765         }
1766       }
1767 
1768       if (isign3 == ENC_OPS2(Mem, Imm)) {
1769         uint32_t memSize = o0.size();
1770         if (ASMJIT_UNLIKELY(memSize == 0))
1771           goto AmbiguousOperandSize;
1772 
1773         opcode = 0xC6 + (memSize != 1);
1774         opcode.addPrefixBySize(memSize);
1775         opReg = 0;
1776         rmRel = &o0;
1777 
1778         immValue = o1.as<Imm>().value();
1779         immSize = FastUInt8(Support::min<uint32_t>(memSize, 4));
1780         goto EmitX86M;
1781       }
1782       break;
1783 
1784     case InstDB::kEncodingX86Movabs:
1785       // Reg <- Mem
1786       if (isign3 == ENC_OPS2(Reg, Mem)) {
1787         opReg = o0.id();
1788         rmRel = &o1;
1789 
1790         opcode = 0xA0;
1791         opcode.addArithBySize(o0.size());
1792 
1793         if (ASMJIT_UNLIKELY(!o0.as<Reg>().isGp()) || opReg != Gp::kIdAx)
1794           goto InvalidInstruction;
1795 
1796         if (ASMJIT_UNLIKELY(rmRel->as<Mem>().hasBaseOrIndex()))
1797           goto InvalidAddress;
1798 
1799         if (ASMJIT_UNLIKELY(rmRel->as<Mem>().addrType() == Mem::kAddrTypeRel))
1800           goto InvalidAddress;
1801 
1802         immValue = rmRel->as<Mem>().offset();
1803         goto EmitX86OpMovAbs;
1804       }
1805 
1806       // Mem <- Reg
1807       if (isign3 == ENC_OPS2(Mem, Reg)) {
1808         opReg = o1.id();
1809         rmRel = &o0;
1810 
1811         opcode = 0xA2;
1812         opcode.addArithBySize(o1.size());
1813 
1814         if (ASMJIT_UNLIKELY(!o1.as<Reg>().isGp()) || opReg != Gp::kIdAx)
1815           goto InvalidInstruction;
1816 
1817         if (ASMJIT_UNLIKELY(rmRel->as<Mem>().hasBaseOrIndex()))
1818           goto InvalidAddress;
1819 
1820         immValue = rmRel->as<Mem>().offset();
1821         goto EmitX86OpMovAbs;
1822       }
1823 
1824       // Reg <- Imm.
1825       if (isign3 == ENC_OPS2(Reg, Imm)) {
1826         if (ASMJIT_UNLIKELY(!o0.as<Reg>().isGpq()))
1827           goto InvalidInstruction;
1828 
1829         opReg = o0.id();
1830         opcode = 0xB8;
1831 
1832         immSize = 8;
1833         immValue = o1.as<Imm>().value();
1834 
1835         opcode.addPrefixBySize(8);
1836         goto EmitX86OpReg;
1837       }
1838       break;
1839 
1840     case InstDB::kEncodingX86MovsxMovzx:
1841       opcode.add(o1.size() != 1);
1842       opcode.addPrefixBySize(o0.size());
1843 
1844       if (isign3 == ENC_OPS2(Reg, Reg)) {
1845         opReg = o0.id();
1846         rbReg = o1.id();
1847 
1848         if (o1.size() != 1)
1849           goto EmitX86R;
1850 
1851         FIXUP_GPB(o1, rbReg);
1852         goto EmitX86R;
1853       }
1854 
1855       if (isign3 == ENC_OPS2(Reg, Mem)) {
1856         opReg = o0.id();
1857         rmRel = &o1;
1858         goto EmitX86M;
1859       }
1860       break;
1861 
1862     case InstDB::kEncodingX86MovntiMovdiri:
1863       if (isign3 == ENC_OPS2(Mem, Reg)) {
1864         opcode.addWIf(Reg::isGpq(o1));
1865 
1866         opReg = o1.id();
1867         rmRel = &o0;
1868         goto EmitX86M;
1869       }
1870       break;
1871 
1872     case InstDB::kEncodingX86EnqcmdMovdir64b:
1873       if (isign3 == ENC_OPS2(Mem, Mem)) {
1874         const Mem& m0 = o0.as<Mem>();
1875         // This is the only required validation, the rest is handled afterwards.
1876         if (ASMJIT_UNLIKELY(m0.baseType() != o1.as<Mem>().baseType() ||
1877                             m0.hasIndex() ||
1878                             m0.hasOffset() ||
1879                             (m0.hasSegment() && m0.segmentId() != SReg::kIdEs)))
1880           goto InvalidInstruction;
1881 
1882         // The first memory operand is passed via register, the second memory operand is RM.
1883         opReg = o0.as<Mem>().baseId();
1884         rmRel = &o1;
1885         goto EmitX86M;
1886       }
1887       break;
1888 
1889     case InstDB::kEncodingX86Out:
1890       if (isign3 == ENC_OPS2(Imm, Reg)) {
1891         if (ASMJIT_UNLIKELY(o1.id() != Gp::kIdAx))
1892           goto InvalidInstruction;
1893 
1894         opcode = x86AltOpcodeOf(instInfo) + (o1.size() != 1);
1895         opcode.add66hBySize(o1.size());
1896 
1897         immValue = o0.as<Imm>().valueAs<uint8_t>();
1898         immSize = 1;
1899         goto EmitX86Op;
1900       }
1901 
1902       if (isign3 == ENC_OPS2(Reg, Reg)) {
1903         if (ASMJIT_UNLIKELY(o0.id() != Gp::kIdDx || o1.id() != Gp::kIdAx))
1904           goto InvalidInstruction;
1905 
1906         opcode.add(o1.size() != 1);
1907         opcode.add66hBySize(o1.size());
1908         goto EmitX86Op;
1909       }
1910       break;
1911 
1912     case InstDB::kEncodingX86Outs:
1913       if (isign3 == ENC_OPS2(Reg, Mem)) {
1914         if (ASMJIT_UNLIKELY(o0.id() != Gp::kIdDx || !x86IsImplicitMem(o1, Gp::kIdSi)))
1915           goto InvalidInstruction;
1916 
1917         uint32_t size = o1.size();
1918         if (ASMJIT_UNLIKELY(size == 0))
1919           goto AmbiguousOperandSize;
1920 
1921         rmRel = &o1;
1922         opcode.add(size != 1);
1923         opcode.add66hBySize(size);
1924         goto EmitX86OpImplicitMem;
1925       }
1926       break;
1927 
1928     case InstDB::kEncodingX86Push:
1929       if (isign3 == ENC_OPS1(Reg)) {
1930         if (Reg::isSReg(o0)) {
1931           uint32_t segment = o0.id();
1932           if (ASMJIT_UNLIKELY(segment >= SReg::kIdCount))
1933             goto InvalidSegment;
1934 
1935           opcode = x86OpcodePushSReg[segment];
1936           goto EmitX86Op;
1937         }
1938         else {
1939           goto CaseX86PushPop_Gp;
1940         }
1941       }
1942 
1943       if (isign3 == ENC_OPS1(Imm)) {
1944         immValue = o0.as<Imm>().value();
1945         immSize = 4;
1946 
1947         if (Support::isInt8(immValue) && !(options & Inst::kOptionLongForm))
1948           immSize = 1;
1949 
1950         opcode = immSize == 1 ? 0x6A : 0x68;
1951         goto EmitX86Op;
1952       }
1953       ASMJIT_FALLTHROUGH;
1954 
1955     case InstDB::kEncodingX86Pop:
1956       if (isign3 == ENC_OPS1(Reg)) {
1957         if (Reg::isSReg(o0)) {
1958           uint32_t segment = o0.id();
1959           if (ASMJIT_UNLIKELY(segment == SReg::kIdCs || segment >= SReg::kIdCount))
1960             goto InvalidSegment;
1961 
1962           opcode = x86OpcodePopSReg[segment];
1963           goto EmitX86Op;
1964         }
1965         else {
1966 CaseX86PushPop_Gp:
1967           // We allow 2 byte, 4 byte, and 8 byte register sizes, although PUSH
1968           // and POP only allow 2 bytes or native size. On 64-bit we simply
1969           // PUSH/POP 64-bit register even if 32-bit register was given.
1970           if (ASMJIT_UNLIKELY(o0.size() < 2))
1971             goto InvalidInstruction;
1972 
1973           opcode = x86AltOpcodeOf(instInfo);
1974           opcode.add66hBySize(o0.size());
1975           opReg = o0.id();
1976           goto EmitX86OpReg;
1977         }
1978       }
1979 
1980       if (isign3 == ENC_OPS1(Mem)) {
1981         if (ASMJIT_UNLIKELY(o0.size() == 0))
1982           goto AmbiguousOperandSize;
1983 
1984         if (ASMJIT_UNLIKELY(o0.size() != 2 && o0.size() != registerSize()))
1985           goto InvalidInstruction;
1986 
1987         opcode.add66hBySize(o0.size());
1988         rmRel = &o0;
1989         goto EmitX86M;
1990       }
1991       break;
1992 
1993     case InstDB::kEncodingX86Ret:
1994       if (isign3 == 0) {
1995         // 'ret' without immediate, change C2 to C3.
1996         opcode.add(1);
1997         goto EmitX86Op;
1998       }
1999 
2000       if (isign3 == ENC_OPS1(Imm)) {
2001         immValue = o0.as<Imm>().value();
2002         if (immValue == 0 && !(options & Inst::kOptionLongForm)) {
2003           // 'ret' without immediate, change C2 to C3.
2004           opcode.add(1);
2005           goto EmitX86Op;
2006         }
2007         else {
2008           immSize = 2;
2009           goto EmitX86Op;
2010         }
2011       }
2012       break;
2013 
2014     case InstDB::kEncodingX86Rot:
2015       if (o0.isReg()) {
2016         opcode.addArithBySize(o0.size());
2017         rbReg = o0.id();
2018 
2019         if (o0.size() == 1)
2020           FIXUP_GPB(o0, rbReg);
2021 
2022         if (isign3 == ENC_OPS2(Reg, Reg)) {
2023           if (ASMJIT_UNLIKELY(o1.id() != Gp::kIdCx))
2024             goto InvalidInstruction;
2025 
2026           opcode += 2;
2027           goto EmitX86R;
2028         }
2029 
2030         if (isign3 == ENC_OPS2(Reg, Imm)) {
2031           immValue = o1.as<Imm>().value() & 0xFF;
2032           immSize = 0;
2033 
2034           if (immValue == 1 && !(options & Inst::kOptionLongForm))
2035             goto EmitX86R;
2036 
2037           opcode -= 0x10;
2038           immSize = 1;
2039           goto EmitX86R;
2040         }
2041       }
2042       else {
2043         if (ASMJIT_UNLIKELY(o0.size() == 0))
2044           goto AmbiguousOperandSize;
2045         opcode.addArithBySize(o0.size());
2046 
2047         if (isign3 == ENC_OPS2(Mem, Reg)) {
2048           if (ASMJIT_UNLIKELY(o1.id() != Gp::kIdCx))
2049             goto InvalidInstruction;
2050 
2051           opcode += 2;
2052           rmRel = &o0;
2053           goto EmitX86M;
2054         }
2055 
2056         if (isign3 == ENC_OPS2(Mem, Imm)) {
2057           rmRel = &o0;
2058           immValue = o1.as<Imm>().value() & 0xFF;
2059           immSize = 0;
2060 
2061           if (immValue == 1 && !(options & Inst::kOptionLongForm))
2062             goto EmitX86M;
2063 
2064           opcode -= 0x10;
2065           immSize = 1;
2066           goto EmitX86M;
2067         }
2068       }
2069       break;
2070 
2071     case InstDB::kEncodingX86Set:
2072       if (isign3 == ENC_OPS1(Reg)) {
2073         rbReg = o0.id();
2074         FIXUP_GPB(o0, rbReg);
2075         goto EmitX86R;
2076       }
2077 
2078       if (isign3 == ENC_OPS1(Mem)) {
2079         rmRel = &o0;
2080         goto EmitX86M;
2081       }
2082       break;
2083 
2084     case InstDB::kEncodingX86ShldShrd:
2085       if (isign3 == ENC_OPS3(Reg, Reg, Imm)) {
2086         opcode.addPrefixBySize(o0.size());
2087         opReg = o1.id();
2088         rbReg = o0.id();
2089 
2090         immValue = o2.as<Imm>().value();
2091         immSize = 1;
2092         goto EmitX86R;
2093       }
2094 
2095       if (isign3 == ENC_OPS3(Mem, Reg, Imm)) {
2096         opcode.addPrefixBySize(o1.size());
2097         opReg = o1.id();
2098         rmRel = &o0;
2099 
2100         immValue = o2.as<Imm>().value();
2101         immSize = 1;
2102         goto EmitX86M;
2103       }
2104 
2105       // The following instructions use opcode + 1.
2106       opcode.add(1);
2107 
2108       if (isign3 == ENC_OPS3(Reg, Reg, Reg)) {
2109         if (ASMJIT_UNLIKELY(o2.id() != Gp::kIdCx))
2110           goto InvalidInstruction;
2111 
2112         opcode.addPrefixBySize(o0.size());
2113         opReg = o1.id();
2114         rbReg = o0.id();
2115         goto EmitX86R;
2116       }
2117 
2118       if (isign3 == ENC_OPS3(Mem, Reg, Reg)) {
2119         if (ASMJIT_UNLIKELY(o2.id() != Gp::kIdCx))
2120           goto InvalidInstruction;
2121 
2122         opcode.addPrefixBySize(o1.size());
2123         opReg = o1.id();
2124         rmRel = &o0;
2125         goto EmitX86M;
2126       }
2127       break;
2128 
2129     case InstDB::kEncodingX86StrRm:
2130       if (isign3 == ENC_OPS2(Reg, Mem)) {
2131         rmRel = &o1;
2132         if (ASMJIT_UNLIKELY(rmRel->as<Mem>().offsetLo32() || !Reg::isGp(o0.as<Reg>(), Gp::kIdAx)))
2133           goto InvalidInstruction;
2134 
2135         uint32_t size = o0.size();
2136         if (o1.hasSize() && ASMJIT_UNLIKELY(o1.size() != size))
2137           goto OperandSizeMismatch;
2138 
2139         opcode.addArithBySize(size);
2140         goto EmitX86OpImplicitMem;
2141       }
2142       break;
2143 
2144     case InstDB::kEncodingX86StrMr:
2145       if (isign3 == ENC_OPS2(Mem, Reg)) {
2146         rmRel = &o0;
2147         if (ASMJIT_UNLIKELY(rmRel->as<Mem>().offsetLo32() || !Reg::isGp(o1.as<Reg>(), Gp::kIdAx)))
2148           goto InvalidInstruction;
2149 
2150         uint32_t size = o1.size();
2151         if (o0.hasSize() && ASMJIT_UNLIKELY(o0.size() != size))
2152           goto OperandSizeMismatch;
2153 
2154         opcode.addArithBySize(size);
2155         goto EmitX86OpImplicitMem;
2156       }
2157       break;
2158 
2159     case InstDB::kEncodingX86StrMm:
2160       if (isign3 == ENC_OPS2(Mem, Mem)) {
2161         if (ASMJIT_UNLIKELY(o0.as<Mem>().baseAndIndexTypes() !=
2162                             o1.as<Mem>().baseAndIndexTypes()))
2163           goto InvalidInstruction;
2164 
2165         rmRel = &o1;
2166         if (ASMJIT_UNLIKELY(o0.as<Mem>().hasOffset()))
2167           goto InvalidInstruction;
2168 
2169         uint32_t size = o1.size();
2170         if (ASMJIT_UNLIKELY(size == 0))
2171           goto AmbiguousOperandSize;
2172 
2173         if (ASMJIT_UNLIKELY(o0.size() != size))
2174           goto OperandSizeMismatch;
2175 
2176         opcode.addArithBySize(size);
2177         goto EmitX86OpImplicitMem;
2178       }
2179       break;
2180 
2181     case InstDB::kEncodingX86Test:
2182       if (isign3 == ENC_OPS2(Reg, Reg)) {
2183         if (o0.size() != o1.size())
2184           goto OperandSizeMismatch;
2185 
2186         opcode.addArithBySize(o0.size());
2187         rbReg = o0.id();
2188         opReg = o1.id();
2189 
2190         if (o0.size() != 1)
2191           goto EmitX86R;
2192 
2193         FIXUP_GPB(o0, rbReg);
2194         FIXUP_GPB(o1, opReg);
2195         goto EmitX86R;
2196       }
2197 
2198       if (isign3 == ENC_OPS2(Mem, Reg)) {
2199         opcode.addArithBySize(o1.size());
2200         opReg = o1.id();
2201         rmRel = &o0;
2202 
2203         if (o1.size() != 1)
2204           goto EmitX86M;
2205 
2206         FIXUP_GPB(o1, opReg);
2207         goto EmitX86M;
2208       }
2209 
2210       // The following instructions use the secondary opcode.
2211       opcode = x86AltOpcodeOf(instInfo);
2212       opReg = opcode.extractModO();
2213 
2214       if (isign3 == ENC_OPS2(Reg, Imm)) {
2215         opcode.addArithBySize(o0.size());
2216         rbReg = o0.id();
2217 
2218         if (o0.size() == 1) {
2219           FIXUP_GPB(o0, rbReg);
2220           immValue = o1.as<Imm>().valueAs<uint8_t>();
2221           immSize = 1;
2222         }
2223         else {
2224           immValue = o1.as<Imm>().value();
2225           immSize = FastUInt8(Support::min<uint32_t>(o0.size(), 4));
2226         }
2227 
2228         // Short form - AL, AX, EAX, RAX.
2229         if (rbReg == 0 && !(options & Inst::kOptionLongForm)) {
2230           opcode &= Opcode::kPP_66 | Opcode::kW;
2231           opcode |= 0xA8 + (o0.size() != 1);
2232           goto EmitX86Op;
2233         }
2234 
2235         goto EmitX86R;
2236       }
2237 
2238       if (isign3 == ENC_OPS2(Mem, Imm)) {
2239         if (ASMJIT_UNLIKELY(o0.size() == 0))
2240           goto AmbiguousOperandSize;
2241 
2242         opcode.addArithBySize(o0.size());
2243         rmRel = &o0;
2244 
2245         immValue = o1.as<Imm>().value();
2246         immSize = FastUInt8(Support::min<uint32_t>(o0.size(), 4));
2247         goto EmitX86M;
2248       }
2249       break;
2250 
2251     case InstDB::kEncodingX86Xchg:
2252       if (isign3 == ENC_OPS2(Reg, Mem)) {
2253         opcode.addArithBySize(o0.size());
2254         opReg = o0.id();
2255         rmRel = &o1;
2256 
2257         if (o0.size() != 1)
2258           goto EmitX86M;
2259 
2260         FIXUP_GPB(o0, opReg);
2261         goto EmitX86M;
2262       }
2263       ASMJIT_FALLTHROUGH;
2264 
2265     case InstDB::kEncodingX86Xadd:
2266       if (isign3 == ENC_OPS2(Reg, Reg)) {
2267         rbReg = o0.id();
2268         opReg = o1.id();
2269 
2270         uint32_t opSize = o0.size();
2271         if (opSize != o1.size())
2272           goto OperandSizeMismatch;
2273 
2274         if (opSize == 1) {
2275           FIXUP_GPB(o0, rbReg);
2276           FIXUP_GPB(o1, opReg);
2277           goto EmitX86R;
2278         }
2279 
2280         // Special cases for 'xchg ?ax, reg'.
2281         if (instId == Inst::kIdXchg && (opReg == 0 || rbReg == 0)) {
2282           if (is64Bit() && opReg == rbReg && opSize >= 4) {
2283             if (opSize == 8) {
2284               // Encode 'xchg rax, rax' as '90' (REX and other prefixes are optional).
2285               opcode &= Opcode::kW;
2286               opcode |= 0x90;
2287               goto EmitX86OpReg;
2288             }
2289             else {
2290               // Encode 'xchg eax, eax' by by using a generic path.
2291             }
2292           }
2293           else if (!(options & Inst::kOptionLongForm)) {
2294             // The special encoding encodes only one register, which is non-zero.
2295             opReg += rbReg;
2296 
2297             opcode.addArithBySize(opSize);
2298             opcode &= Opcode::kW | Opcode::kPP_66;
2299             opcode |= 0x90;
2300             goto EmitX86OpReg;
2301           }
2302         }
2303 
2304         opcode.addArithBySize(opSize);
2305         goto EmitX86R;
2306       }
2307 
2308       if (isign3 == ENC_OPS2(Mem, Reg)) {
2309         opcode.addArithBySize(o1.size());
2310         opReg = o1.id();
2311         rmRel = &o0;
2312 
2313         if (o1.size() == 1) {
2314           FIXUP_GPB(o1, opReg);
2315         }
2316 
2317         goto EmitX86M;
2318       }
2319       break;
2320 
2321     case InstDB::kEncodingX86Fence:
2322       rbReg = 0;
2323       goto EmitX86R;
2324 
2325     case InstDB::kEncodingX86Bndmov:
2326       if (isign3 == ENC_OPS2(Reg, Reg)) {
2327         opReg = o0.id();
2328         rbReg = o1.id();
2329 
2330         // ModRM encoding:
2331         if (!(options & Inst::kOptionModMR))
2332           goto EmitX86R;
2333 
2334         // ModMR encoding:
2335         opcode = x86AltOpcodeOf(instInfo);
2336         std::swap(opReg, rbReg);
2337         goto EmitX86R;
2338       }
2339 
2340       if (isign3 == ENC_OPS2(Reg, Mem)) {
2341         opReg = o0.id();
2342         rmRel = &o1;
2343         goto EmitX86M;
2344       }
2345 
2346       if (isign3 == ENC_OPS2(Mem, Reg)) {
2347         opcode = x86AltOpcodeOf(instInfo);
2348 
2349         rmRel = &o0;
2350         opReg = o1.id();
2351         goto EmitX86M;
2352       }
2353       break;
2354 
2355     // ------------------------------------------------------------------------
2356     // [FPU]
2357     // ------------------------------------------------------------------------
2358 
2359     case InstDB::kEncodingFpuOp:
2360       goto EmitFpuOp;
2361 
2362     case InstDB::kEncodingFpuArith:
2363       if (isign3 == ENC_OPS2(Reg, Reg)) {
2364         opReg = o0.id();
2365         rbReg = o1.id();
2366 
2367         // We switch to the alternative opcode if the first operand is zero.
2368         if (opReg == 0) {
2369 CaseFpuArith_Reg:
2370           opcode = ((0xD8   << Opcode::kFPU_2B_Shift)       ) +
2371                    ((opcode >> Opcode::kFPU_2B_Shift) & 0xFF) + rbReg;
2372           goto EmitFpuOp;
2373         }
2374         else if (rbReg == 0) {
2375           rbReg = opReg;
2376           opcode = ((0xDC   << Opcode::kFPU_2B_Shift)       ) +
2377                    ((opcode                         ) & 0xFF) + rbReg;
2378           goto EmitFpuOp;
2379         }
2380         else {
2381           goto InvalidInstruction;
2382         }
2383       }
2384 
2385       if (isign3 == ENC_OPS1(Mem)) {
2386 CaseFpuArith_Mem:
2387         // 0xD8/0xDC, depends on the size of the memory operand; opReg is valid.
2388         opcode = (o0.size() == 4) ? 0xD8 : 0xDC;
2389         // Clear compressed displacement before going to EmitX86M.
2390         opcode &= ~uint32_t(Opcode::kCDSHL_Mask);
2391 
2392         rmRel = &o0;
2393         goto EmitX86M;
2394       }
2395       break;
2396 
2397     case InstDB::kEncodingFpuCom:
2398       if (isign3 == 0) {
2399         rbReg = 1;
2400         goto CaseFpuArith_Reg;
2401       }
2402 
2403       if (isign3 == ENC_OPS1(Reg)) {
2404         rbReg = o0.id();
2405         goto CaseFpuArith_Reg;
2406       }
2407 
2408       if (isign3 == ENC_OPS1(Mem)) {
2409         goto CaseFpuArith_Mem;
2410       }
2411       break;
2412 
2413     case InstDB::kEncodingFpuFldFst:
2414       if (isign3 == ENC_OPS1(Mem)) {
2415         rmRel = &o0;
2416 
2417         if (o0.size() == 4 && commonInfo->hasFlag(InstDB::kFlagFpuM32)) {
2418           goto EmitX86M;
2419         }
2420 
2421         if (o0.size() == 8 && commonInfo->hasFlag(InstDB::kFlagFpuM64)) {
2422           opcode += 4;
2423           goto EmitX86M;
2424         }
2425 
2426         if (o0.size() == 10 && commonInfo->hasFlag(InstDB::kFlagFpuM80)) {
2427           opcode = x86AltOpcodeOf(instInfo);
2428           opReg  = opcode.extractModO();
2429           goto EmitX86M;
2430         }
2431       }
2432 
2433       if (isign3 == ENC_OPS1(Reg)) {
2434         if (instId == Inst::kIdFld ) { opcode = (0xD9 << Opcode::kFPU_2B_Shift) + 0xC0 + o0.id(); goto EmitFpuOp; }
2435         if (instId == Inst::kIdFst ) { opcode = (0xDD << Opcode::kFPU_2B_Shift) + 0xD0 + o0.id(); goto EmitFpuOp; }
2436         if (instId == Inst::kIdFstp) { opcode = (0xDD << Opcode::kFPU_2B_Shift) + 0xD8 + o0.id(); goto EmitFpuOp; }
2437       }
2438       break;
2439 
2440     case InstDB::kEncodingFpuM:
2441       if (isign3 == ENC_OPS1(Mem)) {
2442         // Clear compressed displacement before going to EmitX86M.
2443         opcode &= ~uint32_t(Opcode::kCDSHL_Mask);
2444 
2445         rmRel = &o0;
2446         if (o0.size() == 2 && commonInfo->hasFlag(InstDB::kFlagFpuM16)) {
2447           opcode += 4;
2448           goto EmitX86M;
2449         }
2450 
2451         if (o0.size() == 4 && commonInfo->hasFlag(InstDB::kFlagFpuM32)) {
2452           goto EmitX86M;
2453         }
2454 
2455         if (o0.size() == 8 && commonInfo->hasFlag(InstDB::kFlagFpuM64)) {
2456           opcode = x86AltOpcodeOf(instInfo) & ~uint32_t(Opcode::kCDSHL_Mask);
2457           opReg  = opcode.extractModO();
2458           goto EmitX86M;
2459         }
2460       }
2461       break;
2462 
2463     case InstDB::kEncodingFpuRDef:
2464       if (isign3 == 0) {
2465         opcode += 1;
2466         goto EmitFpuOp;
2467       }
2468       ASMJIT_FALLTHROUGH;
2469 
2470     case InstDB::kEncodingFpuR:
2471       if (isign3 == ENC_OPS1(Reg)) {
2472         opcode += o0.id();
2473         goto EmitFpuOp;
2474       }
2475       break;
2476 
2477     case InstDB::kEncodingFpuStsw:
2478       if (isign3 == ENC_OPS1(Reg)) {
2479         if (ASMJIT_UNLIKELY(o0.id() != Gp::kIdAx))
2480           goto InvalidInstruction;
2481 
2482         opcode = x86AltOpcodeOf(instInfo);
2483         goto EmitFpuOp;
2484       }
2485 
2486       if (isign3 == ENC_OPS1(Mem)) {
2487         // Clear compressed displacement before going to EmitX86M.
2488         opcode &= ~uint32_t(Opcode::kCDSHL_Mask);
2489 
2490         rmRel = &o0;
2491         goto EmitX86M;
2492       }
2493       break;
2494 
2495     // ------------------------------------------------------------------------
2496     // [Ext]
2497     // ------------------------------------------------------------------------
2498 
2499     case InstDB::kEncodingExtPextrw:
2500       if (isign3 == ENC_OPS3(Reg, Reg, Imm)) {
2501         opcode.add66hIf(Reg::isXmm(o1));
2502 
2503         immValue = o2.as<Imm>().value();
2504         immSize = 1;
2505 
2506         opReg = o0.id();
2507         rbReg = o1.id();
2508         goto EmitX86R;
2509       }
2510 
2511       if (isign3 == ENC_OPS3(Mem, Reg, Imm)) {
2512         // Secondary opcode of 'pextrw' instruction (SSE4.1).
2513         opcode = x86AltOpcodeOf(instInfo);
2514         opcode.add66hIf(Reg::isXmm(o1));
2515 
2516         immValue = o2.as<Imm>().value();
2517         immSize = 1;
2518 
2519         opReg = o1.id();
2520         rmRel = &o0;
2521         goto EmitX86M;
2522       }
2523       break;
2524 
2525     case InstDB::kEncodingExtExtract:
2526       if (isign3 == ENC_OPS3(Reg, Reg, Imm)) {
2527         opcode.add66hIf(Reg::isXmm(o1));
2528 
2529         immValue = o2.as<Imm>().value();
2530         immSize = 1;
2531 
2532         opReg = o1.id();
2533         rbReg = o0.id();
2534         goto EmitX86R;
2535       }
2536 
2537       if (isign3 == ENC_OPS3(Mem, Reg, Imm)) {
2538         opcode.add66hIf(Reg::isXmm(o1));
2539 
2540         immValue = o2.as<Imm>().value();
2541         immSize = 1;
2542 
2543         opReg = o1.id();
2544         rmRel = &o0;
2545         goto EmitX86M;
2546       }
2547       break;
2548 
2549     case InstDB::kEncodingExtMov:
2550       // GP|MM|XMM <- GP|MM|XMM
2551       if (isign3 == ENC_OPS2(Reg, Reg)) {
2552         opReg = o0.id();
2553         rbReg = o1.id();
2554 
2555         if (!(options & Inst::kOptionModMR) || !instInfo->_altOpcodeIndex)
2556           goto EmitX86R;
2557 
2558         opcode = x86AltOpcodeOf(instInfo);
2559         std::swap(opReg, rbReg);
2560         goto EmitX86R;
2561       }
2562 
2563       // GP|MM|XMM <- Mem
2564       if (isign3 == ENC_OPS2(Reg, Mem)) {
2565         opReg = o0.id();
2566         rmRel = &o1;
2567         goto EmitX86M;
2568       }
2569 
2570       // The following instruction uses opcode[1].
2571       opcode = x86AltOpcodeOf(instInfo);
2572 
2573       // Mem <- GP|MM|XMM
2574       if (isign3 == ENC_OPS2(Mem, Reg)) {
2575         opReg = o1.id();
2576         rmRel = &o0;
2577         goto EmitX86M;
2578       }
2579       break;
2580 
2581     case InstDB::kEncodingExtMovbe:
2582       if (isign3 == ENC_OPS2(Reg, Mem)) {
2583         if (o0.size() == 1)
2584           goto InvalidInstruction;
2585 
2586         opcode.addPrefixBySize(o0.size());
2587         opReg = o0.id();
2588         rmRel = &o1;
2589         goto EmitX86M;
2590       }
2591 
2592       // The following instruction uses the secondary opcode.
2593       opcode = x86AltOpcodeOf(instInfo);
2594 
2595       if (isign3 == ENC_OPS2(Mem, Reg)) {
2596         if (o1.size() == 1)
2597           goto InvalidInstruction;
2598 
2599         opcode.addPrefixBySize(o1.size());
2600         opReg = o1.id();
2601         rmRel = &o0;
2602         goto EmitX86M;
2603       }
2604       break;
2605 
2606     case InstDB::kEncodingExtMovd:
2607 CaseExtMovd:
2608       opReg = o0.id();
2609       opcode.add66hIf(Reg::isXmm(o0));
2610 
2611       // MM/XMM <- Gp
2612       if (isign3 == ENC_OPS2(Reg, Reg) && Reg::isGp(o1)) {
2613         rbReg = o1.id();
2614         goto EmitX86R;
2615       }
2616 
2617       // MM/XMM <- Mem
2618       if (isign3 == ENC_OPS2(Reg, Mem)) {
2619         rmRel = &o1;
2620         goto EmitX86M;
2621       }
2622 
2623       // The following instructions use the secondary opcode.
2624       opcode &= Opcode::kW;
2625       opcode |= x86AltOpcodeOf(instInfo);
2626       opReg = o1.id();
2627       opcode.add66hIf(Reg::isXmm(o1));
2628 
2629       // GP <- MM/XMM
2630       if (isign3 == ENC_OPS2(Reg, Reg) && Reg::isGp(o0)) {
2631         rbReg = o0.id();
2632         goto EmitX86R;
2633       }
2634 
2635       // Mem <- MM/XMM
2636       if (isign3 == ENC_OPS2(Mem, Reg)) {
2637         rmRel = &o0;
2638         goto EmitX86M;
2639       }
2640       break;
2641 
2642     case InstDB::kEncodingExtMovq:
2643       if (isign3 == ENC_OPS2(Reg, Reg)) {
2644         opReg = o0.id();
2645         rbReg = o1.id();
2646 
2647         // MM <- MM
2648         if (Reg::isMm(o0) && Reg::isMm(o1)) {
2649           opcode = Opcode::k000F00 | 0x6F;
2650 
2651           if (!(options & Inst::kOptionModMR))
2652             goto EmitX86R;
2653 
2654           opcode += 0x10;
2655           std::swap(opReg, rbReg);
2656           goto EmitX86R;
2657         }
2658 
2659         // XMM <- XMM
2660         if (Reg::isXmm(o0) && Reg::isXmm(o1)) {
2661           opcode = Opcode::kF30F00 | 0x7E;
2662 
2663           if (!(options & Inst::kOptionModMR))
2664             goto EmitX86R;
2665 
2666           opcode = Opcode::k660F00 | 0xD6;
2667           std::swap(opReg, rbReg);
2668           goto EmitX86R;
2669         }
2670       }
2671 
2672       if (isign3 == ENC_OPS2(Reg, Mem)) {
2673         opReg = o0.id();
2674         rmRel = &o1;
2675 
2676         // MM <- Mem
2677         if (Reg::isMm(o0)) {
2678           opcode = Opcode::k000F00 | 0x6F;
2679           goto EmitX86M;
2680         }
2681 
2682         // XMM <- Mem
2683         if (Reg::isXmm(o0)) {
2684           opcode = Opcode::kF30F00 | 0x7E;
2685           goto EmitX86M;
2686         }
2687       }
2688 
2689       if (isign3 == ENC_OPS2(Mem, Reg)) {
2690         opReg = o1.id();
2691         rmRel = &o0;
2692 
2693         // Mem <- MM
2694         if (Reg::isMm(o1)) {
2695           opcode = Opcode::k000F00 | 0x7F;
2696           goto EmitX86M;
2697         }
2698 
2699         // Mem <- XMM
2700         if (Reg::isXmm(o1)) {
2701           opcode = Opcode::k660F00 | 0xD6;
2702           goto EmitX86M;
2703         }
2704       }
2705 
2706       // MOVQ in other case is simply a MOVD instruction promoted to 64-bit.
2707       opcode |= Opcode::kW;
2708       goto CaseExtMovd;
2709 
2710     case InstDB::kEncodingExtRm_XMM0:
2711       if (ASMJIT_UNLIKELY(!o2.isNone() && !Reg::isXmm(o2, 0)))
2712         goto InvalidInstruction;
2713 
2714       isign3 &= 0x3F;
2715       goto CaseExtRm;
2716 
2717     case InstDB::kEncodingExtRm_ZDI:
2718       if (ASMJIT_UNLIKELY(!o2.isNone() && !x86IsImplicitMem(o2, Gp::kIdDi)))
2719         goto InvalidInstruction;
2720 
2721       isign3 &= 0x3F;
2722       goto CaseExtRm;
2723 
2724     case InstDB::kEncodingExtRm_Wx:
2725       opcode.addWIf(o1.size() == 8);
2726       ASMJIT_FALLTHROUGH;
2727 
2728     case InstDB::kEncodingExtRm_Wx_GpqOnly:
2729       opcode.addWIf(Reg::isGpq(o0));
2730       ASMJIT_FALLTHROUGH;
2731 
2732     case InstDB::kEncodingExtRm:
2733 CaseExtRm:
2734       if (isign3 == ENC_OPS2(Reg, Reg)) {
2735         opReg = o0.id();
2736         rbReg = o1.id();
2737         goto EmitX86R;
2738       }
2739 
2740       if (isign3 == ENC_OPS2(Reg, Mem)) {
2741         opReg = o0.id();
2742         rmRel = &o1;
2743         goto EmitX86M;
2744       }
2745       break;
2746 
2747     case InstDB::kEncodingExtRm_P:
2748       if (isign3 == ENC_OPS2(Reg, Reg)) {
2749         opcode.add66hIf(Reg::isXmm(o0) | Reg::isXmm(o1));
2750 
2751         opReg = o0.id();
2752         rbReg = o1.id();
2753         goto EmitX86R;
2754       }
2755 
2756       if (isign3 == ENC_OPS2(Reg, Mem)) {
2757         opcode.add66hIf(Reg::isXmm(o0));
2758 
2759         opReg = o0.id();
2760         rmRel = &o1;
2761         goto EmitX86M;
2762       }
2763       break;
2764 
2765     case InstDB::kEncodingExtRmRi:
2766       if (isign3 == ENC_OPS2(Reg, Reg)) {
2767         opReg = o0.id();
2768         rbReg = o1.id();
2769         goto EmitX86R;
2770       }
2771 
2772       if (isign3 == ENC_OPS2(Reg, Mem)) {
2773         opReg = o0.id();
2774         rmRel = &o1;
2775         goto EmitX86M;
2776       }
2777 
2778       // The following instruction uses the secondary opcode.
2779       opcode = x86AltOpcodeOf(instInfo);
2780       opReg  = opcode.extractModO();
2781 
2782       if (isign3 == ENC_OPS2(Reg, Imm)) {
2783         immValue = o1.as<Imm>().value();
2784         immSize = 1;
2785 
2786         rbReg = o0.id();
2787         goto EmitX86R;
2788       }
2789       break;
2790 
2791     case InstDB::kEncodingExtRmRi_P:
2792       if (isign3 == ENC_OPS2(Reg, Reg)) {
2793         opcode.add66hIf(Reg::isXmm(o0) | Reg::isXmm(o1));
2794 
2795         opReg = o0.id();
2796         rbReg = o1.id();
2797         goto EmitX86R;
2798       }
2799 
2800       if (isign3 == ENC_OPS2(Reg, Mem)) {
2801         opcode.add66hIf(Reg::isXmm(o0));
2802 
2803         opReg = o0.id();
2804         rmRel = &o1;
2805         goto EmitX86M;
2806       }
2807 
2808       // The following instruction uses the secondary opcode.
2809       opcode = x86AltOpcodeOf(instInfo);
2810       opReg  = opcode.extractModO();
2811 
2812       if (isign3 == ENC_OPS2(Reg, Imm)) {
2813         opcode.add66hIf(Reg::isXmm(o0));
2814 
2815         immValue = o1.as<Imm>().value();
2816         immSize = 1;
2817 
2818         rbReg = o0.id();
2819         goto EmitX86R;
2820       }
2821       break;
2822 
2823     case InstDB::kEncodingExtRmi:
2824       immValue = o2.as<Imm>().value();
2825       immSize = 1;
2826 
2827       if (isign3 == ENC_OPS3(Reg, Reg, Imm)) {
2828         opReg = o0.id();
2829         rbReg = o1.id();
2830         goto EmitX86R;
2831       }
2832 
2833       if (isign3 == ENC_OPS3(Reg, Mem, Imm)) {
2834         opReg = o0.id();
2835         rmRel = &o1;
2836         goto EmitX86M;
2837       }
2838       break;
2839 
2840     case InstDB::kEncodingExtRmi_P:
2841       immValue = o2.as<Imm>().value();
2842       immSize = 1;
2843 
2844       if (isign3 == ENC_OPS3(Reg, Reg, Imm)) {
2845         opcode.add66hIf(Reg::isXmm(o0) | Reg::isXmm(o1));
2846 
2847         opReg = o0.id();
2848         rbReg = o1.id();
2849         goto EmitX86R;
2850       }
2851 
2852       if (isign3 == ENC_OPS3(Reg, Mem, Imm)) {
2853         opcode.add66hIf(Reg::isXmm(o0));
2854 
2855         opReg = o0.id();
2856         rmRel = &o1;
2857         goto EmitX86M;
2858       }
2859       break;
2860 
2861     // ------------------------------------------------------------------------
2862     // [Extrq / Insertq (SSE4A)]
2863     // ------------------------------------------------------------------------
2864 
2865     case InstDB::kEncodingExtExtrq:
2866       opReg = o0.id();
2867       rbReg = o1.id();
2868 
2869       if (isign3 == ENC_OPS2(Reg, Reg))
2870         goto EmitX86R;
2871 
2872       if (isign3 == ENC_OPS3(Reg, Imm, Imm)) {
2873         // This variant of the instruction uses the secondary opcode.
2874         opcode = x86AltOpcodeOf(instInfo);
2875         rbReg = opReg;
2876         opReg = opcode.extractModO();
2877 
2878         immValue = (uint32_t(o1.as<Imm>().valueAs<uint8_t>())     ) +
2879                    (uint32_t(o2.as<Imm>().valueAs<uint8_t>()) << 8) ;
2880         immSize = 2;
2881         goto EmitX86R;
2882       }
2883       break;
2884 
2885     case InstDB::kEncodingExtInsertq: {
2886       const Operand_& o3 = opExt[EmitterUtils::kOp3];
2887       const uint32_t isign4 = isign3 + (o3.opType() << 9);
2888 
2889       opReg = o0.id();
2890       rbReg = o1.id();
2891 
2892       if (isign4 == ENC_OPS2(Reg, Reg))
2893         goto EmitX86R;
2894 
2895       if (isign4 == ENC_OPS4(Reg, Reg, Imm, Imm)) {
2896         // This variant of the instruction uses the secondary opcode.
2897         opcode = x86AltOpcodeOf(instInfo);
2898 
2899         immValue = (uint32_t(o2.as<Imm>().valueAs<uint8_t>())     ) +
2900                    (uint32_t(o3.as<Imm>().valueAs<uint8_t>()) << 8) ;
2901         immSize = 2;
2902         goto EmitX86R;
2903       }
2904       break;
2905     }
2906 
2907     // ------------------------------------------------------------------------
2908     // [3dNow]
2909     // ------------------------------------------------------------------------
2910 
2911     case InstDB::kEncodingExt3dNow:
2912       // Every 3dNow instruction starts with 0x0F0F and the actual opcode is
2913       // stored as 8-bit immediate.
2914       immValue = opcode.v & 0xFFu;
2915       immSize = 1;
2916 
2917       opcode = Opcode::k000F00 | 0x0F;
2918       opReg = o0.id();
2919 
2920       if (isign3 == ENC_OPS2(Reg, Reg)) {
2921         rbReg = o1.id();
2922         goto EmitX86R;
2923       }
2924 
2925       if (isign3 == ENC_OPS2(Reg, Mem)) {
2926         rmRel = &o1;
2927         goto EmitX86M;
2928       }
2929       break;
2930 
2931     // ------------------------------------------------------------------------
2932     // [VEX/EVEX]
2933     // ------------------------------------------------------------------------
2934 
2935     case InstDB::kEncodingVexOp:
2936       goto EmitVexEvexOp;
2937 
2938     case InstDB::kEncodingVexOpMod:
2939       rbReg = 0;
2940       goto EmitVexEvexR;
2941 
2942     case InstDB::kEncodingVexKmov:
2943       if (isign3 == ENC_OPS2(Reg, Reg)) {
2944         opReg = o0.id();
2945         rbReg = o1.id();
2946 
2947         // Form 'k, reg'.
2948         if (Reg::isGp(o1)) {
2949           opcode = x86AltOpcodeOf(instInfo);
2950           goto EmitVexEvexR;
2951         }
2952 
2953         // Form 'reg, k'.
2954         if (Reg::isGp(o0)) {
2955           opcode = x86AltOpcodeOf(instInfo) + 1;
2956           goto EmitVexEvexR;
2957         }
2958 
2959         // Form 'k, k'.
2960         if (!(options & Inst::kOptionModMR))
2961           goto EmitVexEvexR;
2962 
2963         opcode.add(1);
2964         std::swap(opReg, rbReg);
2965         goto EmitVexEvexR;
2966       }
2967 
2968       if (isign3 == ENC_OPS2(Reg, Mem)) {
2969         opReg = o0.id();
2970         rmRel = &o1;
2971 
2972         goto EmitVexEvexM;
2973       }
2974 
2975       if (isign3 == ENC_OPS2(Mem, Reg)) {
2976         opcode.add(1);
2977         opReg = o1.id();
2978         rmRel = &o0;
2979         goto EmitVexEvexM;
2980       }
2981       break;
2982 
2983     case InstDB::kEncodingVexR_Wx:
2984       if (isign3 == ENC_OPS1(Reg)) {
2985         rbReg = o0.id();
2986         opcode.addWIf(o0.as<Reg>().isGpq());
2987         goto EmitVexEvexR;
2988       }
2989       break;
2990 
2991     case InstDB::kEncodingVexM:
2992       if (isign3 == ENC_OPS1(Mem)) {
2993         rmRel = &o0;
2994         goto EmitVexEvexM;
2995       }
2996       break;
2997 
2998     case InstDB::kEncodingVexM_VM:
2999       if (isign3 == ENC_OPS1(Mem)) {
3000         rmRel = &o0;
3001         goto EmitVexEvexM;
3002       }
3003       break;
3004 
3005     case InstDB::kEncodingVexMr_Lx:
3006       opcode |= x86OpcodeLBySize(o0.size() | o1.size());
3007 
3008       if (isign3 == ENC_OPS2(Reg, Reg)) {
3009         opReg = o1.id();
3010         rbReg = o0.id();
3011         goto EmitVexEvexR;
3012       }
3013 
3014       if (isign3 == ENC_OPS2(Mem, Reg)) {
3015         opReg = o1.id();
3016         rmRel = &o0;
3017         goto EmitVexEvexM;
3018       }
3019       break;
3020 
3021     case InstDB::kEncodingVexMr_VM:
3022       if (isign3 == ENC_OPS2(Mem, Reg)) {
3023         opcode |= Support::max(x86OpcodeLByVMem(o0), x86OpcodeLBySize(o1.size()));
3024 
3025         opReg = o1.id();
3026         rmRel = &o0;
3027         goto EmitVexEvexM;
3028       }
3029       break;
3030 
3031     case InstDB::kEncodingVexMri_Vpextrw:
3032       // Use 'vpextrw reg, xmm1, i8' when possible.
3033       if (isign3 == ENC_OPS3(Reg, Reg, Imm)) {
3034         opcode = Opcode::k660F00 | 0xC5;
3035 
3036         opReg = o0.id();
3037         rbReg = o1.id();
3038 
3039         immValue = o2.as<Imm>().value();
3040         immSize = 1;
3041         goto EmitVexEvexR;
3042       }
3043 
3044       goto CaseVexMri;
3045 
3046     case InstDB::kEncodingVexMri_Lx:
3047       opcode |= x86OpcodeLBySize(o0.size() | o1.size());
3048       ASMJIT_FALLTHROUGH;
3049 
3050     case InstDB::kEncodingVexMri:
3051 CaseVexMri:
3052       immValue = o2.as<Imm>().value();
3053       immSize = 1;
3054 
3055       if (isign3 == ENC_OPS3(Reg, Reg, Imm)) {
3056         opReg = o1.id();
3057         rbReg = o0.id();
3058         goto EmitVexEvexR;
3059       }
3060 
3061       if (isign3 == ENC_OPS3(Mem, Reg, Imm)) {
3062         opReg = o1.id();
3063         rmRel = &o0;
3064         goto EmitVexEvexM;
3065       }
3066       break;
3067 
3068     case InstDB::kEncodingVexRm_ZDI:
3069       if (ASMJIT_UNLIKELY(!o2.isNone() && !x86IsImplicitMem(o2, Gp::kIdDi)))
3070         goto InvalidInstruction;
3071 
3072       isign3 &= 0x3F;
3073       goto CaseVexRm;
3074 
3075     case InstDB::kEncodingVexRm_Wx:
3076       opcode.addWIf(Reg::isGpq(o0) | Reg::isGpq(o1));
3077       goto CaseVexRm;
3078 
3079     case InstDB::kEncodingVexRm_Lx_Narrow:
3080       if (o1.size())
3081         opcode |= x86OpcodeLBySize(o1.size());
3082       else if (o0.size() == 32)
3083         opcode |= Opcode::kLL_2;
3084       goto CaseVexRm;
3085 
3086     case InstDB::kEncodingVexRm_Lx_Bcst:
3087       if (isign3 == ENC_OPS2(Reg, Reg) && Reg::isGp(o1.as<Reg>())) {
3088         opcode = x86AltOpcodeOf(instInfo) | x86OpcodeLBySize(o0.size() | o1.size());
3089         opReg = o0.id();
3090         rbReg = o1.id();
3091         goto EmitVexEvexR;
3092       }
3093       ASMJIT_FALLTHROUGH;
3094 
3095     case InstDB::kEncodingVexRm_Lx:
3096       opcode |= x86OpcodeLBySize(o0.size() | o1.size());
3097       ASMJIT_FALLTHROUGH;
3098 
3099     case InstDB::kEncodingVexRm:
3100 CaseVexRm:
3101       if (isign3 == ENC_OPS2(Reg, Reg)) {
3102         opReg = o0.id();
3103         rbReg = o1.id();
3104         goto EmitVexEvexR;
3105       }
3106 
3107       if (isign3 == ENC_OPS2(Reg, Mem)) {
3108         opReg = o0.id();
3109         rmRel = &o1;
3110         goto EmitVexEvexM;
3111       }
3112       break;
3113 
3114     case InstDB::kEncodingVexRm_VM:
3115       if (isign3 == ENC_OPS2(Reg, Mem)) {
3116         opcode |= Support::max(x86OpcodeLByVMem(o1), x86OpcodeLBySize(o0.size()));
3117         opReg = o0.id();
3118         rmRel = &o1;
3119         goto EmitVexEvexM;
3120       }
3121       break;
3122 
3123     case InstDB::kEncodingVexRm_T1_4X: {
3124       const Operand_& o3 = opExt[EmitterUtils::kOp3];
3125       const Operand_& o4 = opExt[EmitterUtils::kOp4];
3126       const Operand_& o5 = opExt[EmitterUtils::kOp5];
3127 
3128       if (Reg::isVec(o0) && Reg::isVec(o1) && Reg::isVec(o2) && Reg::isVec(o3) && Reg::isVec(o4) && o5.isMem()) {
3129         // Registers [o1, o2, o3, o4] must start aligned and must be consecutive.
3130         uint32_t i1 = o1.id();
3131         uint32_t i2 = o2.id();
3132         uint32_t i3 = o3.id();
3133         uint32_t i4 = o4.id();
3134 
3135         if (ASMJIT_UNLIKELY((i1 & 0x3) != 0 || i2 != i1 + 1 || i3 != i1 + 2 || i4 != i1 + 3))
3136           goto NotConsecutiveRegs;
3137 
3138         opReg = x86PackRegAndVvvvv(o0.id(), i1);
3139         rmRel = &o5;
3140         goto EmitVexEvexM;
3141       }
3142       break;
3143     }
3144 
3145     case InstDB::kEncodingVexRmi_Wx:
3146       opcode.addWIf(Reg::isGpq(o0) | Reg::isGpq(o1));
3147       goto CaseVexRmi;
3148 
3149     case InstDB::kEncodingVexRmi_Lx:
3150       opcode |= x86OpcodeLBySize(o0.size() | o1.size());
3151       ASMJIT_FALLTHROUGH;
3152 
3153     case InstDB::kEncodingVexRmi:
3154 CaseVexRmi:
3155       immValue = o2.as<Imm>().value();
3156       immSize = 1;
3157 
3158       if (isign3 == ENC_OPS3(Reg, Reg, Imm)) {
3159         opReg = o0.id();
3160         rbReg = o1.id();
3161         goto EmitVexEvexR;
3162       }
3163 
3164       if (isign3 == ENC_OPS3(Reg, Mem, Imm)) {
3165         opReg = o0.id();
3166         rmRel = &o1;
3167         goto EmitVexEvexM;
3168       }
3169       break;
3170 
3171     case InstDB::kEncodingVexRvm:
3172 CaseVexRvm:
3173       if (isign3 == ENC_OPS3(Reg, Reg, Reg)) {
3174 CaseVexRvm_R:
3175         opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
3176         rbReg = o2.id();
3177         goto EmitVexEvexR;
3178       }
3179 
3180       if (isign3 == ENC_OPS3(Reg, Reg, Mem)) {
3181         opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
3182         rmRel = &o2;
3183         goto EmitVexEvexM;
3184       }
3185       break;
3186 
3187     case InstDB::kEncodingVexRvm_ZDX_Wx: {
3188       const Operand_& o3 = opExt[EmitterUtils::kOp3];
3189       if (ASMJIT_UNLIKELY(!o3.isNone() && !Reg::isGp(o3, Gp::kIdDx)))
3190         goto InvalidInstruction;
3191       ASMJIT_FALLTHROUGH;
3192     }
3193 
3194     case InstDB::kEncodingVexRvm_Wx: {
3195       opcode.addWIf(Reg::isGpq(o0) | (o2.size() == 8));
3196       goto CaseVexRvm;
3197     }
3198 
3199     case InstDB::kEncodingVexRvm_Lx_KEvex: {
3200       opcode.forceEvexIf(Reg::isKReg(o0));
3201       ASMJIT_FALLTHROUGH;
3202     }
3203 
3204     case InstDB::kEncodingVexRvm_Lx: {
3205       opcode |= x86OpcodeLBySize(o0.size() | o1.size());
3206       goto CaseVexRvm;
3207     }
3208 
3209     case InstDB::kEncodingVexRvm_Lx_2xK: {
3210       if (isign3 == ENC_OPS3(Reg, Reg, Reg)) {
3211         // Two registers are encoded as a single register.
3212         //   - First K register must be even.
3213         //   - Second K register must be first+1.
3214         if ((o0.id() & 1) != 0 || o0.id() + 1 != o1.id())
3215           goto InvalidPhysId;
3216 
3217         const Operand_& o3 = opExt[EmitterUtils::kOp3];
3218 
3219         opcode |= x86OpcodeLBySize(o2.size());
3220         opReg = x86PackRegAndVvvvv(o0.id(), o2.id());
3221 
3222         if (o3.isReg()) {
3223           rbReg = o3.id();
3224           goto EmitVexEvexR;
3225         }
3226 
3227         if (o3.isMem()) {
3228           rmRel = &o3;
3229           goto EmitVexEvexM;
3230         }
3231       }
3232       break;
3233     }
3234 
3235     case InstDB::kEncodingVexRvmr_Lx: {
3236       opcode |= x86OpcodeLBySize(o0.size() | o1.size());
3237       ASMJIT_FALLTHROUGH;
3238     }
3239 
3240     case InstDB::kEncodingVexRvmr: {
3241       const Operand_& o3 = opExt[EmitterUtils::kOp3];
3242       const uint32_t isign4 = isign3 + (o3.opType() << 9);
3243 
3244       immValue = o3.id() << 4;
3245       immSize = 1;
3246 
3247       if (isign4 == ENC_OPS4(Reg, Reg, Reg, Reg)) {
3248         opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
3249         rbReg = o2.id();
3250         goto EmitVexEvexR;
3251       }
3252 
3253       if (isign4 == ENC_OPS4(Reg, Reg, Mem, Reg)) {
3254         opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
3255         rmRel = &o2;
3256         goto EmitVexEvexM;
3257       }
3258       break;
3259     }
3260 
3261     case InstDB::kEncodingVexRvmi_KEvex:
3262       opcode.forceEvexIf(Reg::isKReg(o0));
3263       goto VexRvmi;
3264 
3265     case InstDB::kEncodingVexRvmi_Lx_KEvex:
3266       opcode.forceEvexIf(Reg::isKReg(o0));
3267       ASMJIT_FALLTHROUGH;
3268 
3269     case InstDB::kEncodingVexRvmi_Lx:
3270       opcode |= x86OpcodeLBySize(o0.size() | o1.size());
3271       ASMJIT_FALLTHROUGH;
3272 
3273     case InstDB::kEncodingVexRvmi:
3274 VexRvmi:
3275     {
3276       const Operand_& o3 = opExt[EmitterUtils::kOp3];
3277       const uint32_t isign4 = isign3 + (o3.opType() << 9);
3278 
3279       immValue = o3.as<Imm>().value();
3280       immSize = 1;
3281 
3282       if (isign4 == ENC_OPS4(Reg, Reg, Reg, Imm)) {
3283         opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
3284         rbReg = o2.id();
3285         goto EmitVexEvexR;
3286       }
3287 
3288       if (isign4 == ENC_OPS4(Reg, Reg, Mem, Imm)) {
3289         opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
3290         rmRel = &o2;
3291         goto EmitVexEvexM;
3292       }
3293       break;
3294     }
3295 
3296     case InstDB::kEncodingVexRmv_Wx:
3297       opcode.addWIf(Reg::isGpq(o0) | Reg::isGpq(o2));
3298       ASMJIT_FALLTHROUGH;
3299 
3300     case InstDB::kEncodingVexRmv:
3301       if (isign3 == ENC_OPS3(Reg, Reg, Reg)) {
3302         opReg = x86PackRegAndVvvvv(o0.id(), o2.id());
3303         rbReg = o1.id();
3304         goto EmitVexEvexR;
3305       }
3306 
3307       if (isign3 == ENC_OPS3(Reg, Mem, Reg)) {
3308         opReg = x86PackRegAndVvvvv(o0.id(), o2.id());
3309         rmRel = &o1;
3310         goto EmitVexEvexM;
3311       }
3312       break;
3313 
3314     case InstDB::kEncodingVexRmvRm_VM:
3315       if (isign3 == ENC_OPS2(Reg, Mem)) {
3316         opcode  = x86AltOpcodeOf(instInfo);
3317         opcode |= Support::max(x86OpcodeLByVMem(o1), x86OpcodeLBySize(o0.size()));
3318 
3319         opReg = o0.id();
3320         rmRel = &o1;
3321         goto EmitVexEvexM;
3322       }
3323       ASMJIT_FALLTHROUGH;
3324 
3325     case InstDB::kEncodingVexRmv_VM:
3326       if (isign3 == ENC_OPS3(Reg, Mem, Reg)) {
3327         opcode |= Support::max(x86OpcodeLByVMem(o1), x86OpcodeLBySize(o0.size() | o2.size()));
3328 
3329         opReg = x86PackRegAndVvvvv(o0.id(), o2.id());
3330         rmRel = &o1;
3331         goto EmitVexEvexM;
3332       }
3333       break;
3334 
3335 
3336     case InstDB::kEncodingVexRmvi: {
3337       const Operand_& o3 = opExt[EmitterUtils::kOp3];
3338       const uint32_t isign4 = isign3 + (o3.opType() << 9);
3339 
3340       immValue = o3.as<Imm>().value();
3341       immSize = 1;
3342 
3343       if (isign4 == ENC_OPS4(Reg, Reg, Reg, Imm)) {
3344         opReg = x86PackRegAndVvvvv(o0.id(), o2.id());
3345         rbReg = o1.id();
3346         goto EmitVexEvexR;
3347       }
3348 
3349       if (isign4 == ENC_OPS4(Reg, Mem, Reg, Imm)) {
3350         opReg = x86PackRegAndVvvvv(o0.id(), o2.id());
3351         rmRel = &o1;
3352         goto EmitVexEvexM;
3353       }
3354       break;
3355     }
3356 
3357     case InstDB::kEncodingVexMovdMovq:
3358       if (isign3 == ENC_OPS2(Reg, Reg)) {
3359         if (Reg::isGp(o0)) {
3360           opcode = x86AltOpcodeOf(instInfo);
3361           opcode.addWBySize(o0.size());
3362           opReg = o1.id();
3363           rbReg = o0.id();
3364           goto EmitVexEvexR;
3365         }
3366 
3367         if (Reg::isGp(o1)) {
3368           opcode.addWBySize(o1.size());
3369           opReg = o0.id();
3370           rbReg = o1.id();
3371           goto EmitVexEvexR;
3372         }
3373 
3374         // If this is a 'W' version (movq) then allow also vmovq 'xmm|xmm' form.
3375         if (opcode & Opcode::kEvex_W_1) {
3376           opcode &= ~(Opcode::kPP_VEXMask | Opcode::kMM_Mask | 0xFF);
3377           opcode |=  (Opcode::kF30F00 | 0x7E);
3378 
3379           opReg = o0.id();
3380           rbReg = o1.id();
3381           goto EmitVexEvexR;
3382         }
3383       }
3384 
3385       if (isign3 == ENC_OPS2(Reg, Mem)) {
3386         if (opcode & Opcode::kEvex_W_1) {
3387           opcode &= ~(Opcode::kPP_VEXMask | Opcode::kMM_Mask | 0xFF);
3388           opcode |=  (Opcode::kF30F00 | 0x7E);
3389         }
3390 
3391         opReg = o0.id();
3392         rmRel = &o1;
3393         goto EmitVexEvexM;
3394       }
3395 
3396       // The following instruction uses the secondary opcode.
3397       opcode = x86AltOpcodeOf(instInfo);
3398 
3399       if (isign3 == ENC_OPS2(Mem, Reg)) {
3400         if (opcode & Opcode::kEvex_W_1) {
3401           opcode &= ~(Opcode::kPP_VEXMask | Opcode::kMM_Mask | 0xFF);
3402           opcode |=  (Opcode::k660F00 | 0xD6);
3403         }
3404 
3405         opReg = o1.id();
3406         rmRel = &o0;
3407         goto EmitVexEvexM;
3408       }
3409       break;
3410 
3411     case InstDB::kEncodingVexRmMr_Lx:
3412       opcode |= x86OpcodeLBySize(o0.size() | o1.size());
3413       ASMJIT_FALLTHROUGH;
3414 
3415     case InstDB::kEncodingVexRmMr:
3416       if (isign3 == ENC_OPS2(Reg, Reg)) {
3417         opReg = o0.id();
3418         rbReg = o1.id();
3419         goto EmitVexEvexR;
3420       }
3421 
3422       if (isign3 == ENC_OPS2(Reg, Mem)) {
3423         opReg = o0.id();
3424         rmRel = &o1;
3425         goto EmitVexEvexM;
3426       }
3427 
3428       // The following instruction uses the secondary opcode.
3429       opcode &= Opcode::kLL_Mask;
3430       opcode |= x86AltOpcodeOf(instInfo);
3431 
3432       if (isign3 == ENC_OPS2(Mem, Reg)) {
3433         opReg = o1.id();
3434         rmRel = &o0;
3435         goto EmitVexEvexM;
3436       }
3437       break;
3438 
3439     case InstDB::kEncodingVexRvmRmv:
3440       if (isign3 == ENC_OPS3(Reg, Reg, Reg)) {
3441         opReg = x86PackRegAndVvvvv(o0.id(), o2.id());
3442         rbReg = o1.id();
3443 
3444         if (!(options & Inst::kOptionModMR))
3445           goto EmitVexEvexR;
3446 
3447         opcode.addW();
3448         opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
3449         rbReg = o2.id();
3450         goto EmitVexEvexR;
3451       }
3452 
3453       if (isign3 == ENC_OPS3(Reg, Mem, Reg)) {
3454         opReg = x86PackRegAndVvvvv(o0.id(), o2.id());
3455         rmRel = &o1;
3456         goto EmitVexEvexM;
3457       }
3458 
3459       if (isign3 == ENC_OPS3(Reg, Reg, Mem)) {
3460         opcode.addW();
3461         opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
3462         rmRel = &o2;
3463         goto EmitVexEvexM;
3464       }
3465       break;
3466 
3467     case InstDB::kEncodingVexRvmRmi_Lx:
3468       opcode |= x86OpcodeLBySize(o0.size() | o1.size());
3469       ASMJIT_FALLTHROUGH;
3470 
3471     case InstDB::kEncodingVexRvmRmi:
3472       if (isign3 == ENC_OPS3(Reg, Reg, Reg)) {
3473         opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
3474         rbReg = o2.id();
3475         goto EmitVexEvexR;
3476       }
3477 
3478       if (isign3 == ENC_OPS3(Reg, Reg, Mem)) {
3479         opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
3480         rmRel = &o2;
3481         goto EmitVexEvexM;
3482       }
3483 
3484       // The following instructions use the secondary opcode.
3485       opcode &= Opcode::kLL_Mask;
3486       opcode |= x86AltOpcodeOf(instInfo);
3487 
3488       immValue = o2.as<Imm>().value();
3489       immSize = 1;
3490 
3491       if (isign3 == ENC_OPS3(Reg, Reg, Imm)) {
3492         opReg = o0.id();
3493         rbReg = o1.id();
3494         goto EmitVexEvexR;
3495       }
3496 
3497       if (isign3 == ENC_OPS3(Reg, Mem, Imm)) {
3498         opReg = o0.id();
3499         rmRel = &o1;
3500         goto EmitVexEvexM;
3501       }
3502       break;
3503 
3504     case InstDB::kEncodingVexRvmRmvRmi:
3505       if (isign3 == ENC_OPS3(Reg, Reg, Reg)) {
3506         opReg = x86PackRegAndVvvvv(o0.id(), o2.id());
3507         rbReg = o1.id();
3508 
3509         if (!(options & Inst::kOptionModMR))
3510           goto EmitVexEvexR;
3511 
3512         opcode.addW();
3513         opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
3514         rbReg = o2.id();
3515         goto EmitVexEvexR;
3516       }
3517 
3518       if (isign3 == ENC_OPS3(Reg, Mem, Reg)) {
3519         opReg = x86PackRegAndVvvvv(o0.id(), o2.id());
3520         rmRel = &o1;
3521         goto EmitVexEvexM;
3522       }
3523 
3524       if (isign3 == ENC_OPS3(Reg, Reg, Mem)) {
3525         opcode.addW();
3526         opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
3527         rmRel = &o2;
3528         goto EmitVexEvexM;
3529       }
3530 
3531       // The following instructions use the secondary opcode.
3532       opcode = x86AltOpcodeOf(instInfo);
3533 
3534       immValue = o2.as<Imm>().value();
3535       immSize = 1;
3536 
3537       if (isign3 == ENC_OPS3(Reg, Reg, Imm)) {
3538         opReg = o0.id();
3539         rbReg = o1.id();
3540         goto EmitVexEvexR;
3541       }
3542 
3543       if (isign3 == ENC_OPS3(Reg, Mem, Imm)) {
3544         opReg = o0.id();
3545         rmRel = &o1;
3546         goto EmitVexEvexM;
3547       }
3548       break;
3549 
3550     case InstDB::kEncodingVexRvmMr:
3551       if (isign3 == ENC_OPS3(Reg, Reg, Reg)) {
3552         opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
3553         rbReg = o2.id();
3554         goto EmitVexEvexR;
3555       }
3556 
3557       if (isign3 == ENC_OPS3(Reg, Reg, Mem)) {
3558         opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
3559         rmRel = &o2;
3560         goto EmitVexEvexM;
3561       }
3562 
3563       // The following instructions use the secondary opcode.
3564       opcode = x86AltOpcodeOf(instInfo);
3565 
3566       if (isign3 == ENC_OPS2(Reg, Reg)) {
3567         opReg = o1.id();
3568         rbReg = o0.id();
3569         goto EmitVexEvexR;
3570       }
3571 
3572       if (isign3 == ENC_OPS2(Mem, Reg)) {
3573         opReg = o1.id();
3574         rmRel = &o0;
3575         goto EmitVexEvexM;
3576       }
3577       break;
3578 
3579     case InstDB::kEncodingVexRvmMvr_Lx:
3580       opcode |= x86OpcodeLBySize(o0.size() | o1.size());
3581       ASMJIT_FALLTHROUGH;
3582 
3583     case InstDB::kEncodingVexRvmMvr:
3584       if (isign3 == ENC_OPS3(Reg, Reg, Reg)) {
3585         opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
3586         rbReg = o2.id();
3587         goto EmitVexEvexR;
3588       }
3589 
3590       if (isign3 == ENC_OPS3(Reg, Reg, Mem)) {
3591         opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
3592         rmRel = &o2;
3593         goto EmitVexEvexM;
3594       }
3595 
3596       // The following instruction uses the secondary opcode.
3597       opcode &= Opcode::kLL_Mask;
3598       opcode |= x86AltOpcodeOf(instInfo);
3599 
3600       if (isign3 == ENC_OPS3(Mem, Reg, Reg)) {
3601         opReg = x86PackRegAndVvvvv(o2.id(), o1.id());
3602         rmRel = &o0;
3603         goto EmitVexEvexM;
3604       }
3605       break;
3606 
3607     case InstDB::kEncodingVexRvmVmi_Lx_MEvex:
3608       opcode.forceEvexIf(o1.isMem());
3609       ASMJIT_FALLTHROUGH;
3610 
3611     case InstDB::kEncodingVexRvmVmi_Lx:
3612       opcode |= x86OpcodeLBySize(o0.size() | o1.size());
3613       ASMJIT_FALLTHROUGH;
3614 
3615     case InstDB::kEncodingVexRvmVmi:
3616       if (isign3 == ENC_OPS3(Reg, Reg, Reg)) {
3617         opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
3618         rbReg = o2.id();
3619         goto EmitVexEvexR;
3620       }
3621 
3622       if (isign3 == ENC_OPS3(Reg, Reg, Mem)) {
3623         opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
3624         rmRel = &o2;
3625         goto EmitVexEvexM;
3626       }
3627 
3628       // The following instruction uses the secondary opcode.
3629       opcode &= Opcode::kLL_Mask | Opcode::kMM_ForceEvex;
3630       opcode |= x86AltOpcodeOf(instInfo);
3631       opReg = opcode.extractModO();
3632 
3633       immValue = o2.as<Imm>().value();
3634       immSize = 1;
3635 
3636       if (isign3 == ENC_OPS3(Reg, Reg, Imm)) {
3637         opReg = x86PackRegAndVvvvv(opReg, o0.id());
3638         rbReg = o1.id();
3639         goto EmitVexEvexR;
3640       }
3641 
3642       if (isign3 == ENC_OPS3(Reg, Mem, Imm)) {
3643         opReg = x86PackRegAndVvvvv(opReg, o0.id());
3644         rmRel = &o1;
3645         goto EmitVexEvexM;
3646       }
3647       break;
3648 
3649     case InstDB::kEncodingVexVm_Wx:
3650       opcode.addWIf(Reg::isGpq(o0) | Reg::isGpq(o1));
3651       ASMJIT_FALLTHROUGH;
3652 
3653     case InstDB::kEncodingVexVm:
3654       if (isign3 == ENC_OPS2(Reg, Reg)) {
3655         opReg = x86PackRegAndVvvvv(opReg, o0.id());
3656         rbReg = o1.id();
3657         goto EmitVexEvexR;
3658       }
3659 
3660       if (isign3 == ENC_OPS2(Reg, Mem)) {
3661         opReg = x86PackRegAndVvvvv(opReg, o0.id());
3662         rmRel = &o1;
3663         goto EmitVexEvexM;
3664       }
3665       break;
3666 
3667     case InstDB::kEncodingVexVmi_Lx_MEvex:
3668       if (isign3 == ENC_OPS3(Reg, Mem, Imm))
3669         opcode.forceEvex();
3670       ASMJIT_FALLTHROUGH;
3671 
3672     case InstDB::kEncodingVexVmi_Lx:
3673       opcode |= x86OpcodeLBySize(o0.size() | o1.size());
3674       ASMJIT_FALLTHROUGH;
3675 
3676     case InstDB::kEncodingVexVmi:
3677       immValue = o2.as<Imm>().value();
3678       immSize = 1;
3679 
3680 CaseVexVmi_AfterImm:
3681       if (isign3 == ENC_OPS3(Reg, Reg, Imm)) {
3682         opReg = x86PackRegAndVvvvv(opReg, o0.id());
3683         rbReg = o1.id();
3684         goto EmitVexEvexR;
3685       }
3686 
3687       if (isign3 == ENC_OPS3(Reg, Mem, Imm)) {
3688         opReg = x86PackRegAndVvvvv(opReg, o0.id());
3689         rmRel = &o1;
3690         goto EmitVexEvexM;
3691       }
3692       break;
3693 
3694     case InstDB::kEncodingVexVmi4_Wx:
3695       opcode.addWIf(Reg::isGpq(o0) || o1.size() == 8);
3696       immValue = o2.as<Imm>().value();
3697       immSize = 4;
3698       goto CaseVexVmi_AfterImm;
3699 
3700     case InstDB::kEncodingVexRvrmRvmr_Lx:
3701       opcode |= x86OpcodeLBySize(o0.size() | o1.size());
3702       ASMJIT_FALLTHROUGH;
3703 
3704     case InstDB::kEncodingVexRvrmRvmr: {
3705       const Operand_& o3 = opExt[EmitterUtils::kOp3];
3706       const uint32_t isign4 = isign3 + (o3.opType() << 9);
3707 
3708       if (isign4 == ENC_OPS4(Reg, Reg, Reg, Reg)) {
3709         opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
3710         rbReg = o2.id();
3711 
3712         immValue = o3.id() << 4;
3713         immSize = 1;
3714         goto EmitVexEvexR;
3715       }
3716 
3717       if (isign4 == ENC_OPS4(Reg, Reg, Reg, Mem)) {
3718         opcode.addW();
3719         opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
3720         rmRel = &o3;
3721 
3722         immValue = o2.id() << 4;
3723         immSize = 1;
3724         goto EmitVexEvexM;
3725       }
3726 
3727       if (isign4 == ENC_OPS4(Reg, Reg, Mem, Reg)) {
3728         opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
3729         rmRel = &o2;
3730 
3731         immValue = o3.id() << 4;
3732         immSize = 1;
3733         goto EmitVexEvexM;
3734       }
3735       break;
3736     }
3737 
3738     case InstDB::kEncodingVexRvrmiRvmri_Lx: {
3739       const Operand_& o3 = opExt[EmitterUtils::kOp3];
3740       const Operand_& o4 = opExt[EmitterUtils::kOp4];
3741 
3742       if (ASMJIT_UNLIKELY(!o4.isImm()))
3743         goto InvalidInstruction;
3744 
3745       const uint32_t isign4 = isign3 + (o3.opType() << 9);
3746       opcode |= x86OpcodeLBySize(o0.size() | o1.size() | o2.size() | o3.size());
3747 
3748       immValue = o4.as<Imm>().valueAs<uint8_t>() & 0x0F;
3749       immSize = 1;
3750 
3751       if (isign4 == ENC_OPS4(Reg, Reg, Reg, Reg)) {
3752         opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
3753         rbReg = o2.id();
3754 
3755         immValue |= o3.id() << 4;
3756         goto EmitVexEvexR;
3757       }
3758 
3759       if (isign4 == ENC_OPS4(Reg, Reg, Reg, Mem)) {
3760         opcode.addW();
3761         opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
3762         rmRel = &o3;
3763 
3764         immValue |= o2.id() << 4;
3765         goto EmitVexEvexM;
3766       }
3767 
3768       if (isign4 == ENC_OPS4(Reg, Reg, Mem, Reg)) {
3769         opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
3770         rmRel = &o2;
3771 
3772         immValue |= o3.id() << 4;
3773         goto EmitVexEvexM;
3774       }
3775       break;
3776     }
3777 
3778     case InstDB::kEncodingVexMovssMovsd:
3779       if (isign3 == ENC_OPS3(Reg, Reg, Reg)) {
3780         goto CaseVexRvm_R;
3781       }
3782 
3783       if (isign3 == ENC_OPS2(Reg, Mem)) {
3784         opReg = o0.id();
3785         rmRel = &o1;
3786         goto EmitVexEvexM;
3787       }
3788 
3789       if (isign3 == ENC_OPS2(Mem, Reg)) {
3790         opcode = x86AltOpcodeOf(instInfo);
3791         opReg = o1.id();
3792         rmRel = &o0;
3793         goto EmitVexEvexM;
3794       }
3795       break;
3796 
3797     // ------------------------------------------------------------------------
3798     // [FMA4]
3799     // ------------------------------------------------------------------------
3800 
3801     case InstDB::kEncodingFma4_Lx:
3802       // It's fine to just check the first operand, second is just for sanity.
3803       opcode |= x86OpcodeLBySize(o0.size() | o1.size());
3804       ASMJIT_FALLTHROUGH;
3805 
3806     case InstDB::kEncodingFma4: {
3807       const Operand_& o3 = opExt[EmitterUtils::kOp3];
3808       const uint32_t isign4 = isign3 + (o3.opType() << 9);
3809 
3810       if (isign4 == ENC_OPS4(Reg, Reg, Reg, Reg)) {
3811         opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
3812 
3813         if (!(options & Inst::kOptionModMR)) {
3814           // MOD/RM - Encoding preferred by LLVM.
3815           opcode.addW();
3816           rbReg = o3.id();
3817 
3818           immValue = o2.id() << 4;
3819           immSize = 1;
3820           goto EmitVexEvexR;
3821         }
3822         else {
3823           // MOD/MR - Alternative encoding.
3824           rbReg = o2.id();
3825 
3826           immValue = o3.id() << 4;
3827           immSize = 1;
3828           goto EmitVexEvexR;
3829         }
3830       }
3831 
3832       if (isign4 == ENC_OPS4(Reg, Reg, Reg, Mem)) {
3833         opcode.addW();
3834         opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
3835         rmRel = &o3;
3836 
3837         immValue = o2.id() << 4;
3838         immSize = 1;
3839         goto EmitVexEvexM;
3840       }
3841 
3842       if (isign4 == ENC_OPS4(Reg, Reg, Mem, Reg)) {
3843         opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
3844         rmRel = &o2;
3845 
3846         immValue = o3.id() << 4;
3847         immSize = 1;
3848         goto EmitVexEvexM;
3849       }
3850       break;
3851     }
3852 
3853     // ------------------------------------------------------------------------
3854     // [AMX]
3855     // ------------------------------------------------------------------------
3856 
3857     case InstDB::kEncodingAmxCfg:
3858       if (isign3 == ENC_OPS1(Mem)) {
3859         rmRel = &o0;
3860         goto EmitVexEvexM;
3861       }
3862       break;
3863 
3864     case InstDB::kEncodingAmxR:
3865       if (isign3 == ENC_OPS1(Reg)) {
3866         opReg = o0.id();
3867         rbReg = 0;
3868         goto EmitVexEvexR;
3869       }
3870       break;
3871 
3872     case InstDB::kEncodingAmxRm:
3873       if (isign3 == ENC_OPS2(Reg, Mem)) {
3874         opReg = o0.id();
3875         rmRel = &o1;
3876         goto EmitVexEvexM;
3877       }
3878       break;
3879 
3880     case InstDB::kEncodingAmxMr:
3881       if (isign3 == ENC_OPS2(Mem, Reg)) {
3882         opReg = o1.id();
3883         rmRel = &o0;
3884         goto EmitVexEvexM;
3885       }
3886       break;
3887 
3888     case InstDB::kEncodingAmxRmv:
3889       if (isign3 == ENC_OPS3(Reg, Reg, Reg)) {
3890         opReg = x86PackRegAndVvvvv(o0.id(), o2.id());
3891         rbReg = o1.id();
3892         goto EmitVexEvexR;
3893       }
3894       break;
3895   }
3896 
3897   goto InvalidInstruction;
3898 
3899   // --------------------------------------------------------------------------
3900   // [Emit - X86]
3901   // --------------------------------------------------------------------------
3902 
3903 EmitX86OpMovAbs:
3904   immSize = FastUInt8(registerSize());
3905   writer.emitSegmentOverride(rmRel->as<Mem>().segmentId());
3906 
3907 EmitX86Op:
3908   // Emit mandatory instruction prefix.
3909   writer.emitPP(opcode.v);
3910 
3911   // Emit REX prefix (64-bit only).
3912   {
3913     uint32_t rex = opcode.extractRex(options);
3914     if (ASMJIT_UNLIKELY(x86IsRexInvalid(rex)))
3915       goto InvalidRexPrefix;
3916     rex &= ~kX86ByteInvalidRex & 0xFF;
3917     writer.emit8If(rex | kX86ByteRex, rex != 0);
3918   }
3919 
3920   // Emit instruction opcodes.
3921   writer.emitMMAndOpcode(opcode.v);
3922   writer.emitImmediate(uint64_t(immValue), immSize);
3923   goto EmitDone;
3924 
3925   // --------------------------------------------------------------------------
3926   // [Emit - X86 - Opcode + Reg]
3927   // --------------------------------------------------------------------------
3928 
3929 EmitX86OpReg:
3930   // Emit mandatory instruction prefix.
3931   writer.emitPP(opcode.v);
3932 
3933   // Emit REX prefix (64-bit only).
3934   {
3935     uint32_t rex = opcode.extractRex(options) | (opReg >> 3); // Rex.B (0x01).
3936     if (ASMJIT_UNLIKELY(x86IsRexInvalid(rex)))
3937       goto InvalidRexPrefix;
3938     rex &= ~kX86ByteInvalidRex & 0xFF;
3939     writer.emit8If(rex | kX86ByteRex, rex != 0);
3940 
3941     opReg &= 0x7;
3942   }
3943 
3944   // Emit instruction opcodes.
3945   opcode += opReg;
3946   writer.emitMMAndOpcode(opcode.v);
3947   writer.emitImmediate(uint64_t(immValue), immSize);
3948   goto EmitDone;
3949 
3950   // --------------------------------------------------------------------------
3951   // [Emit - X86 - Opcode with implicit <mem> operand]
3952   // --------------------------------------------------------------------------
3953 
3954 EmitX86OpImplicitMem:
3955   rmInfo = x86MemInfo[rmRel->as<Mem>().baseAndIndexTypes()];
3956   if (ASMJIT_UNLIKELY(rmRel->as<Mem>().hasOffset() || (rmInfo & kX86MemInfo_Index)))
3957     goto InvalidInstruction;
3958 
3959   // Emit mandatory instruction prefix.
3960   writer.emitPP(opcode.v);
3961 
3962   // Emit REX prefix (64-bit only).
3963   {
3964     uint32_t rex = opcode.extractRex(options);
3965     if (ASMJIT_UNLIKELY(x86IsRexInvalid(rex)))
3966       goto InvalidRexPrefix;
3967     rex &= ~kX86ByteInvalidRex & 0xFF;
3968     writer.emit8If(rex | kX86ByteRex, rex != 0);
3969   }
3970 
3971   // Emit override prefixes.
3972   writer.emitSegmentOverride(rmRel->as<Mem>().segmentId());
3973   writer.emitAddressOverride((rmInfo & _addressOverrideMask()) != 0);
3974 
3975   // Emit instruction opcodes.
3976   writer.emitMMAndOpcode(opcode.v);
3977 
3978   // Emit immediate value.
3979   writer.emitImmediate(uint64_t(immValue), immSize);
3980   goto EmitDone;
3981 
3982   // --------------------------------------------------------------------------
3983   // [Emit - X86 - Opcode /r - register]
3984   // --------------------------------------------------------------------------
3985 
3986 EmitX86R:
3987   // Mandatory instruction prefix.
3988   writer.emitPP(opcode.v);
3989 
3990   // Emit REX prefix (64-bit only).
3991   {
3992     uint32_t rex = opcode.extractRex(options) |
3993                    ((opReg & 0x08) >> 1) | // REX.R (0x04).
3994                    ((rbReg & 0x08) >> 3) ; // REX.B (0x01).
3995 
3996     if (ASMJIT_UNLIKELY(x86IsRexInvalid(rex)))
3997       goto InvalidRexPrefix;
3998     rex &= ~kX86ByteInvalidRex & 0xFF;
3999     writer.emit8If(rex | kX86ByteRex, rex != 0);
4000 
4001     opReg &= 0x07;
4002     rbReg &= 0x07;
4003   }
4004 
4005   // Emit instruction opcodes.
4006   writer.emitMMAndOpcode(opcode.v);
4007 
4008   // Emit ModR.
4009   writer.emit8(x86EncodeMod(3, opReg, rbReg));
4010 
4011   // Emit immediate value.
4012   writer.emitImmediate(uint64_t(immValue), immSize);
4013   goto EmitDone;
4014 
4015   // --------------------------------------------------------------------------
4016   // [Emit - X86 - Opcode /r - memory base]
4017   // --------------------------------------------------------------------------
4018 
4019 EmitX86RFromM:
4020   rmInfo = x86MemInfo[rmRel->as<Mem>().baseAndIndexTypes()];
4021   if (ASMJIT_UNLIKELY(rmRel->as<Mem>().hasOffset() || (rmInfo & kX86MemInfo_Index)))
4022     goto InvalidInstruction;
4023 
4024   // Emit mandatory instruction prefix.
4025   writer.emitPP(opcode.v);
4026 
4027   // Emit REX prefix (64-bit only).
4028   {
4029     uint32_t rex = opcode.extractRex(options) |
4030                    ((opReg & 0x08) >> 1) | // REX.R (0x04).
4031                    ((rbReg       ) >> 3) ; // REX.B (0x01).
4032 
4033     if (ASMJIT_UNLIKELY(x86IsRexInvalid(rex)))
4034       goto InvalidRexPrefix;
4035     rex &= ~kX86ByteInvalidRex & 0xFF;
4036     writer.emit8If(rex | kX86ByteRex, rex != 0);
4037 
4038     opReg &= 0x07;
4039     rbReg &= 0x07;
4040   }
4041 
4042   // Emit override prefixes.
4043   writer.emitSegmentOverride(rmRel->as<Mem>().segmentId());
4044   writer.emitAddressOverride((rmInfo & _addressOverrideMask()) != 0);
4045 
4046   // Emit instruction opcodes.
4047   writer.emitMMAndOpcode(opcode.v);
4048 
4049   // Emit ModR/M.
4050   writer.emit8(x86EncodeMod(3, opReg, rbReg));
4051 
4052   // Emit immediate value.
4053   writer.emitImmediate(uint64_t(immValue), immSize);
4054   goto EmitDone;
4055 
4056   // --------------------------------------------------------------------------
4057   // [Emit - X86 - Opcode /r - memory operand]
4058   // --------------------------------------------------------------------------
4059 
4060 EmitX86M:
4061   // `rmRel` operand must be memory.
4062   ASMJIT_ASSERT(rmRel != nullptr);
4063   ASMJIT_ASSERT(rmRel->opType() == Operand::kOpMem);
4064   ASMJIT_ASSERT((opcode & Opcode::kCDSHL_Mask) == 0);
4065 
4066   // Emit override prefixes.
4067   rmInfo = x86MemInfo[rmRel->as<Mem>().baseAndIndexTypes()];
4068   writer.emitSegmentOverride(rmRel->as<Mem>().segmentId());
4069 
4070   memOpAOMark = writer.cursor();
4071   writer.emitAddressOverride((rmInfo & _addressOverrideMask()) != 0);
4072 
4073   // Emit mandatory instruction prefix.
4074   writer.emitPP(opcode.v);
4075 
4076   // Emit REX prefix (64-bit only).
4077   rbReg = rmRel->as<Mem>().baseId();
4078   rxReg = rmRel->as<Mem>().indexId();
4079   {
4080     uint32_t rex;
4081 
4082     rex  = (rbReg >> 3) & 0x01; // REX.B (0x01).
4083     rex |= (rxReg >> 2) & 0x02; // REX.X (0x02).
4084     rex |= (opReg >> 1) & 0x04; // REX.R (0x04).
4085 
4086     rex &= rmInfo;
4087     rex |= opcode.extractRex(options);
4088 
4089     if (ASMJIT_UNLIKELY(x86IsRexInvalid(rex)))
4090       goto InvalidRexPrefix;
4091     rex &= ~kX86ByteInvalidRex & 0xFF;
4092     writer.emit8If(rex | kX86ByteRex, rex != 0);
4093 
4094     opReg &= 0x07;
4095   }
4096 
4097   // Emit instruction opcodes.
4098   writer.emitMMAndOpcode(opcode.v);
4099 
4100   // ... Fall through ...
4101 
4102   // --------------------------------------------------------------------------
4103   // [Emit - MOD/SIB]
4104   // --------------------------------------------------------------------------
4105 
4106 EmitModSib:
4107   if (!(rmInfo & (kX86MemInfo_Index | kX86MemInfo_67H_X86))) {
4108     // ==========|> [BASE + DISP8|DISP32].
4109     if (rmInfo & kX86MemInfo_BaseGp) {
4110       rbReg &= 0x7;
4111       relOffset = rmRel->as<Mem>().offsetLo32();
4112 
4113       uint32_t mod = x86EncodeMod(0, opReg, rbReg);
4114       bool forceSIB = commonInfo->isTsibOp();
4115 
4116       if (rbReg == Gp::kIdSp || forceSIB) {
4117         // TSIB or [XSP|R12].
4118         mod = (mod & 0xF8u) | 0x04u;
4119         if (rbReg != Gp::kIdBp && relOffset == 0) {
4120           writer.emit8(mod);
4121           writer.emit8(x86EncodeSib(0, 4, rbReg));
4122         }
4123         // TSIB or [XSP|R12 + DISP8|DISP32].
4124         else {
4125           uint32_t cdShift = (opcode & Opcode::kCDSHL_Mask) >> Opcode::kCDSHL_Shift;
4126           int32_t cdOffset = relOffset >> cdShift;
4127 
4128           if (Support::isInt8(cdOffset) && relOffset == int32_t(uint32_t(cdOffset) << cdShift)) {
4129             writer.emit8(mod + 0x40); // <- MOD(1, opReg, rbReg).
4130             writer.emit8(x86EncodeSib(0, 4, rbReg));
4131             writer.emit8(cdOffset & 0xFF);
4132           }
4133           else {
4134             writer.emit8(mod + 0x80); // <- MOD(2, opReg, rbReg).
4135             writer.emit8(x86EncodeSib(0, 4, rbReg));
4136             writer.emit32uLE(uint32_t(relOffset));
4137           }
4138         }
4139       }
4140       else if (rbReg != Gp::kIdBp && relOffset == 0) {
4141         // [BASE].
4142         writer.emit8(mod);
4143       }
4144       else {
4145         // [BASE + DISP8|DISP32].
4146         uint32_t cdShift = (opcode & Opcode::kCDSHL_Mask) >> Opcode::kCDSHL_Shift;
4147         int32_t cdOffset = relOffset >> cdShift;
4148 
4149         if (Support::isInt8(cdOffset) && relOffset == int32_t(uint32_t(cdOffset) << cdShift)) {
4150           writer.emit8(mod + 0x40);
4151           writer.emit8(cdOffset & 0xFF);
4152         }
4153         else {
4154           writer.emit8(mod + 0x80);
4155           writer.emit32uLE(uint32_t(relOffset));
4156         }
4157       }
4158     }
4159     // ==========|> [ABSOLUTE | DISP32].
4160     else if (!(rmInfo & (kX86MemInfo_BaseLabel | kX86MemInfo_BaseRip))) {
4161       uint32_t addrType = rmRel->as<Mem>().addrType();
4162       relOffset = rmRel->as<Mem>().offsetLo32();
4163 
4164       if (is32Bit()) {
4165         // Explicit relative addressing doesn't work in 32-bit mode.
4166         if (ASMJIT_UNLIKELY(addrType == Mem::kAddrTypeRel))
4167           goto InvalidAddress;
4168 
4169         writer.emit8(x86EncodeMod(0, opReg, 5));
4170         writer.emit32uLE(uint32_t(relOffset));
4171       }
4172       else {
4173         bool isOffsetI32 = rmRel->as<Mem>().offsetHi32() == (relOffset >> 31);
4174         bool isOffsetU32 = rmRel->as<Mem>().offsetHi32() == 0;
4175         uint64_t baseAddress = code()->baseAddress();
4176 
4177         // If relative addressing was not explicitly set then we can try to guess.
4178         // By guessing we check some properties of the memory operand and try to
4179         // base the decision on the segment prefix and the address type.
4180         if (addrType == Mem::kAddrTypeDefault) {
4181           if (baseAddress == Globals::kNoBaseAddress) {
4182             // Prefer absolute addressing mode if the offset is 32-bit.
4183             addrType = isOffsetI32 || isOffsetU32 ? Mem::kAddrTypeAbs
4184                                                   : Mem::kAddrTypeRel;
4185           }
4186           else {
4187             // Prefer absolute addressing mode if FS|GS segment override is present.
4188             bool hasFsGs = rmRel->as<Mem>().segmentId() >= SReg::kIdFs;
4189             // Prefer absolute addressing mode if this is LEA with 32-bit immediate.
4190             bool isLea32 = (instId == Inst::kIdLea) && (isOffsetI32 || isOffsetU32);
4191 
4192             addrType = hasFsGs || isLea32 ? Mem::kAddrTypeAbs
4193                                           : Mem::kAddrTypeRel;
4194           }
4195         }
4196 
4197         if (addrType == Mem::kAddrTypeRel) {
4198           uint32_t kModRel32Size = 5;
4199           uint64_t virtualOffset = uint64_t(writer.offsetFrom(_bufferData)) + immSize + kModRel32Size;
4200 
4201           if (baseAddress == Globals::kNoBaseAddress || _section->id() != 0) {
4202             // Create a new RelocEntry as we cannot calculate the offset right now.
4203             err = _code->newRelocEntry(&re, RelocEntry::kTypeAbsToRel);
4204             if (ASMJIT_UNLIKELY(err))
4205               goto Failed;
4206 
4207             writer.emit8(x86EncodeMod(0, opReg, 5));
4208 
4209             re->_sourceSectionId = _section->id();
4210             re->_sourceOffset = offset();
4211             re->_format.resetToDataValue(4);
4212             re->_format.setLeadingAndTrailingSize(writer.offsetFrom(_bufferPtr), immSize);
4213             re->_payload = uint64_t(rmRel->as<Mem>().offset());
4214 
4215             writer.emit32uLE(0);
4216             writer.emitImmediate(uint64_t(immValue), immSize);
4217             goto EmitDone;
4218           }
4219           else {
4220             uint64_t rip64 = baseAddress + _section->offset() + virtualOffset;
4221             uint64_t rel64 = uint64_t(rmRel->as<Mem>().offset()) - rip64;
4222 
4223             if (Support::isInt32(int64_t(rel64))) {
4224               writer.emit8(x86EncodeMod(0, opReg, 5));
4225               writer.emit32uLE(uint32_t(rel64 & 0xFFFFFFFFu));
4226               writer.emitImmediate(uint64_t(immValue), immSize);
4227               goto EmitDone;
4228             }
4229             else {
4230               // We must check the original address type as we have modified
4231               // `addrType`. We failed if the original address type is 'rel'.
4232               if (ASMJIT_UNLIKELY(rmRel->as<Mem>().isRel()))
4233                 goto InvalidAddress;
4234             }
4235           }
4236         }
4237 
4238         // Handle unsigned 32-bit address that doesn't work with sign extension.
4239         // Consider the following instructions:
4240         //
4241         //   1. lea rax, [-1]         - Sign extended to 0xFFFFFFFFFFFFFFFF
4242         //   2. lea rax, [0xFFFFFFFF] - Zero extended to 0x00000000FFFFFFFF
4243         //   3. add rax, [-1]         - Sign extended to 0xFFFFFFFFFFFFFFFF
4244         //   4. add rax, [0xFFFFFFFF] - Zero extended to 0x00000000FFFFFFFF
4245         //
4246         // Sign extension is naturally performed by the CPU so we don't have to
4247         // bother, however, zero extension requires address-size override prefix,
4248         // which we probably don't have at this moment. So to make the address
4249         // valid we need to insert it at `memOpAOMark` if it's not already there.
4250         //
4251         // If this is 'lea' instruction then it's possible to remove REX.W part
4252         // from REX prefix (if it's there), which would be one-byte shorter than
4253         // inserting address-size override.
4254         //
4255         // NOTE: If we don't do this then these instructions are unencodable.
4256         if (!isOffsetI32) {
4257           // 64-bit absolute address is unencodable.
4258           if (ASMJIT_UNLIKELY(!isOffsetU32))
4259             goto InvalidAddress64Bit;
4260 
4261           // We only patch the existing code if we don't have address-size override.
4262           if (*memOpAOMark != 0x67) {
4263             if (instId == Inst::kIdLea) {
4264               // LEA: Remove REX.W, if present. This is easy as we know that 'lea'
4265               // doesn't use any PP prefix so if REX prefix was emitted it would be
4266               // at `memOpAOMark`.
4267               uint32_t rex = *memOpAOMark;
4268               if (rex & kX86ByteRex) {
4269                 rex &= (~kX86ByteRexW) & 0xFF;
4270                 *memOpAOMark = uint8_t(rex);
4271 
4272                 // We can remove the REX prefix completely if it was not forced.
4273                 if (rex == kX86ByteRex && !(options & Inst::kOptionRex))
4274                   writer.remove8(memOpAOMark);
4275               }
4276             }
4277             else {
4278               // Any other instruction: Insert address-size override prefix.
4279               writer.insert8(memOpAOMark, 0x67);
4280             }
4281           }
4282         }
4283 
4284         // Emit 32-bit absolute address.
4285         writer.emit8(x86EncodeMod(0, opReg, 4));
4286         writer.emit8(x86EncodeSib(0, 4, 5));
4287         writer.emit32uLE(uint32_t(relOffset));
4288       }
4289     }
4290     // ==========|> [LABEL|RIP + DISP32]
4291     else {
4292       writer.emit8(x86EncodeMod(0, opReg, 5));
4293 
4294       if (is32Bit()) {
4295 EmitModSib_LabelRip_X86:
4296         if (ASMJIT_UNLIKELY(_code->_relocations.willGrow(_code->allocator()) != kErrorOk))
4297           goto OutOfMemory;
4298 
4299         relOffset = rmRel->as<Mem>().offsetLo32();
4300         if (rmInfo & kX86MemInfo_BaseLabel) {
4301           // [LABEL->ABS].
4302           label = _code->labelEntry(rmRel->as<Mem>().baseId());
4303           if (ASMJIT_UNLIKELY(!label))
4304             goto InvalidLabel;
4305 
4306           err = _code->newRelocEntry(&re, RelocEntry::kTypeRelToAbs);
4307           if (ASMJIT_UNLIKELY(err))
4308             goto Failed;
4309 
4310           re->_sourceSectionId = _section->id();
4311           re->_sourceOffset = offset();
4312           re->_format.resetToDataValue(4);
4313           re->_format.setLeadingAndTrailingSize(writer.offsetFrom(_bufferPtr), immSize);
4314           re->_payload = uint64_t(int64_t(relOffset));
4315 
4316           if (label->isBound()) {
4317             // Label bound to the current section.
4318             re->_payload += label->offset();
4319             re->_targetSectionId = label->section()->id();
4320             writer.emit32uLE(0);
4321           }
4322           else {
4323             // Non-bound label or label bound to a different section.
4324             relOffset = -4 - immSize;
4325             relSize = 4;
4326             goto EmitRel;
4327           }
4328         }
4329         else {
4330           // [RIP->ABS].
4331           err = _code->newRelocEntry(&re, RelocEntry::kTypeRelToAbs);
4332           if (ASMJIT_UNLIKELY(err))
4333             goto Failed;
4334 
4335           re->_sourceSectionId = _section->id();
4336           re->_targetSectionId = _section->id();
4337           re->_format.resetToDataValue(4);
4338           re->_format.setLeadingAndTrailingSize(writer.offsetFrom(_bufferPtr), immSize);
4339           re->_sourceOffset = offset();
4340           re->_payload = re->_sourceOffset + re->_format.regionSize() + uint64_t(int64_t(relOffset));
4341 
4342           writer.emit32uLE(0);
4343         }
4344       }
4345       else {
4346         relOffset = rmRel->as<Mem>().offsetLo32();
4347         if (rmInfo & kX86MemInfo_BaseLabel) {
4348           // [RIP].
4349           label = _code->labelEntry(rmRel->as<Mem>().baseId());
4350           if (ASMJIT_UNLIKELY(!label))
4351             goto InvalidLabel;
4352 
4353           relOffset -= (4 + immSize);
4354           if (label->isBoundTo(_section)) {
4355             // Label bound to the current section.
4356             relOffset += int32_t(label->offset() - writer.offsetFrom(_bufferData));
4357             writer.emit32uLE(uint32_t(relOffset));
4358           }
4359           else {
4360             // Non-bound label or label bound to a different section.
4361             relSize = 4;
4362             goto EmitRel;
4363           }
4364         }
4365         else {
4366           // [RIP].
4367           writer.emit32uLE(uint32_t(relOffset));
4368         }
4369       }
4370     }
4371   }
4372   else if (!(rmInfo & kX86MemInfo_67H_X86)) {
4373     // ESP|RSP can't be used as INDEX in pure SIB mode, however, VSIB mode
4374     // allows XMM4|YMM4|ZMM4 (that's why the check is before the label).
4375     if (ASMJIT_UNLIKELY(rxReg == Gp::kIdSp))
4376       goto InvalidAddressIndex;
4377 
4378 EmitModVSib:
4379     rxReg &= 0x7;
4380 
4381     // ==========|> [BASE + INDEX + DISP8|DISP32].
4382     if (rmInfo & kX86MemInfo_BaseGp) {
4383       rbReg &= 0x7;
4384       relOffset = rmRel->as<Mem>().offsetLo32();
4385 
4386       uint32_t mod = x86EncodeMod(0, opReg, 4);
4387       uint32_t sib = x86EncodeSib(rmRel->as<Mem>().shift(), rxReg, rbReg);
4388 
4389       if (relOffset == 0 && rbReg != Gp::kIdBp) {
4390         // [BASE + INDEX << SHIFT].
4391         writer.emit8(mod);
4392         writer.emit8(sib);
4393       }
4394       else {
4395         uint32_t cdShift = (opcode & Opcode::kCDSHL_Mask) >> Opcode::kCDSHL_Shift;
4396         int32_t cdOffset = relOffset >> cdShift;
4397 
4398         if (Support::isInt8(cdOffset) && relOffset == int32_t(uint32_t(cdOffset) << cdShift)) {
4399           // [BASE + INDEX << SHIFT + DISP8].
4400           writer.emit8(mod + 0x40); // <- MOD(1, opReg, 4).
4401           writer.emit8(sib);
4402           writer.emit8(uint32_t(cdOffset));
4403         }
4404         else {
4405           // [BASE + INDEX << SHIFT + DISP32].
4406           writer.emit8(mod + 0x80); // <- MOD(2, opReg, 4).
4407           writer.emit8(sib);
4408           writer.emit32uLE(uint32_t(relOffset));
4409         }
4410       }
4411     }
4412     // ==========|> [INDEX + DISP32].
4413     else if (!(rmInfo & (kX86MemInfo_BaseLabel | kX86MemInfo_BaseRip))) {
4414       // [INDEX << SHIFT + DISP32].
4415       writer.emit8(x86EncodeMod(0, opReg, 4));
4416       writer.emit8(x86EncodeSib(rmRel->as<Mem>().shift(), rxReg, 5));
4417 
4418       relOffset = rmRel->as<Mem>().offsetLo32();
4419       writer.emit32uLE(uint32_t(relOffset));
4420     }
4421     // ==========|> [LABEL|RIP + INDEX + DISP32].
4422     else {
4423       if (is32Bit()) {
4424         writer.emit8(x86EncodeMod(0, opReg, 4));
4425         writer.emit8(x86EncodeSib(rmRel->as<Mem>().shift(), rxReg, 5));
4426         goto EmitModSib_LabelRip_X86;
4427       }
4428       else {
4429         // NOTE: This also handles VSIB+RIP, which is not allowed in 64-bit mode.
4430         goto InvalidAddress;
4431       }
4432     }
4433   }
4434   else {
4435     // 16-bit address mode (32-bit mode with 67 override prefix).
4436     relOffset = (int32_t(rmRel->as<Mem>().offsetLo32()) << 16) >> 16;
4437 
4438     // NOTE: 16-bit addresses don't use SIB byte and their encoding differs. We
4439     // use a table-based approach to calculate the proper MOD byte as it's easier.
4440     // Also, not all BASE [+ INDEX] combinations are supported in 16-bit mode, so
4441     // this may fail.
4442     const uint32_t kBaseGpIdx = (kX86MemInfo_BaseGp | kX86MemInfo_Index);
4443 
4444     if (rmInfo & kBaseGpIdx) {
4445       // ==========|> [BASE + INDEX + DISP16].
4446       uint32_t mod;
4447 
4448       rbReg &= 0x7;
4449       rxReg &= 0x7;
4450 
4451       if ((rmInfo & kBaseGpIdx) == kBaseGpIdx) {
4452         uint32_t shf = rmRel->as<Mem>().shift();
4453         if (ASMJIT_UNLIKELY(shf != 0))
4454           goto InvalidAddress;
4455         mod = x86Mod16BaseIndexTable[(rbReg << 3) + rxReg];
4456       }
4457       else {
4458         if (rmInfo & kX86MemInfo_Index)
4459           rbReg = rxReg;
4460         mod = x86Mod16BaseTable[rbReg];
4461       }
4462 
4463       if (ASMJIT_UNLIKELY(mod == 0xFF))
4464         goto InvalidAddress;
4465 
4466       mod += opReg << 3;
4467       if (relOffset == 0 && mod != 0x06) {
4468         writer.emit8(mod);
4469       }
4470       else if (Support::isInt8(relOffset)) {
4471         writer.emit8(mod + 0x40);
4472         writer.emit8(uint32_t(relOffset));
4473       }
4474       else {
4475         writer.emit8(mod + 0x80);
4476         writer.emit16uLE(uint32_t(relOffset));
4477       }
4478     }
4479     else {
4480       // Not supported in 16-bit addresses.
4481       if (rmInfo & (kX86MemInfo_BaseRip | kX86MemInfo_BaseLabel))
4482         goto InvalidAddress;
4483 
4484       // ==========|> [DISP16].
4485       writer.emit8(opReg | 0x06);
4486       writer.emit16uLE(uint32_t(relOffset));
4487     }
4488   }
4489 
4490   writer.emitImmediate(uint64_t(immValue), immSize);
4491   goto EmitDone;
4492 
4493   // --------------------------------------------------------------------------
4494   // [Emit - FPU]
4495   // --------------------------------------------------------------------------
4496 
4497 EmitFpuOp:
4498   // Mandatory instruction prefix.
4499   writer.emitPP(opcode.v);
4500 
4501   // FPU instructions consist of two opcodes.
4502   writer.emit8(opcode.v >> Opcode::kFPU_2B_Shift);
4503   writer.emit8(opcode.v);
4504   goto EmitDone;
4505 
4506   // --------------------------------------------------------------------------
4507   // [Emit - VEX|EVEX]
4508   // --------------------------------------------------------------------------
4509 
4510 EmitVexEvexOp:
4511   {
4512     // These don't use immediate.
4513     ASMJIT_ASSERT(immSize == 0);
4514 
4515     // Only 'vzeroall' and 'vzeroupper' instructions use this encoding, they
4516     // don't define 'W' to be '1' so we can just check the 'mmmmm' field. Both
4517     // functions can encode by using VEX2 prefix so VEX3 is basically only used
4518     // when specified as instruction option.
4519     ASMJIT_ASSERT((opcode & Opcode::kW) == 0);
4520 
4521     uint32_t x = ((opcode  & Opcode::kMM_Mask    ) >> (Opcode::kMM_Shift     )) |
4522                  ((opcode  & Opcode::kLL_Mask    ) >> (Opcode::kLL_Shift - 10)) |
4523                  ((opcode  & Opcode::kPP_VEXMask ) >> (Opcode::kPP_Shift -  8)) |
4524                  ((options & Inst::kOptionVex3   ) >> (Opcode::kMM_Shift     )) ;
4525     if (x & 0x04u) {
4526       x  = (x & (0x4 ^ 0xFFFF)) << 8;                    // [00000000|00000Lpp|0000m0mm|00000000].
4527       x ^= (kX86ByteVex3) |                              // [........|00000Lpp|0000m0mm|__VEX3__].
4528            (0x07u  << 13) |                              // [........|00000Lpp|1110m0mm|__VEX3__].
4529            (0x0Fu  << 19) |                              // [........|01111Lpp|1110m0mm|__VEX3__].
4530            (opcode << 24) ;                              // [_OPCODE_|01111Lpp|1110m0mm|__VEX3__].
4531 
4532       writer.emit32uLE(x);
4533       goto EmitDone;
4534     }
4535     else {
4536       x = ((x >> 8) ^ x) ^ 0xF9;
4537       writer.emit8(kX86ByteVex2);
4538       writer.emit8(x);
4539       writer.emit8(opcode.v);
4540       goto EmitDone;
4541     }
4542   }
4543 
4544   // --------------------------------------------------------------------------
4545   // [Emit - VEX|EVEX - /r (Register)]
4546   // --------------------------------------------------------------------------
4547 
4548 EmitVexEvexR:
4549   {
4550     // Construct `x` - a complete EVEX|VEX prefix.
4551     uint32_t x = ((opReg << 4) & 0xF980u) |              // [........|........|Vvvvv..R|R.......].
4552                  ((rbReg << 2) & 0x0060u) |              // [........|........|........|.BB.....].
4553                  (opcode.extractLLMM(options)) |         // [........|.LL.....|Vvvvv..R|RBBmmmmm].
4554                  (_extraReg.id() << 16);                 // [........|.LL..aaa|Vvvvv..R|RBBmmmmm].
4555     opReg &= 0x7;
4556 
4557     // Handle AVX512 options by a single branch.
4558     const uint32_t kAvx512Options = Inst::kOptionZMask | Inst::kOptionER | Inst::kOptionSAE;
4559     if (options & kAvx512Options) {
4560       uint32_t kBcstMask = 0x1 << 20;
4561       uint32_t kLLMask10 = 0x2 << 21;
4562       uint32_t kLLMask11 = 0x3 << 21;
4563 
4564       // Designed to be easily encodable so the position must be exact.
4565       // The {rz-sae} is encoded as {11}, so it should match the mask.
4566       ASMJIT_ASSERT(Inst::kOptionRZ_SAE == kLLMask11);
4567 
4568       x |= options & Inst::kOptionZMask;                 // [........|zLLb.aaa|Vvvvv..R|RBBmmmmm].
4569 
4570       // Support embedded-rounding {er} and suppress-all-exceptions {sae}.
4571       if (options & (Inst::kOptionER | Inst::kOptionSAE)) {
4572         // Embedded rounding is only encodable if the instruction is either
4573         // scalar or it's a 512-bit operation as the {er} rounding predicate
4574         // collides with LL part of the instruction.
4575         if ((x & kLLMask11) != kLLMask10) {
4576           // Ok, so LL is not 10, thus the instruction must be scalar.
4577           // Scalar instructions don't support broadcast so if this
4578           // instruction supports it {er} nor {sae} would be encodable.
4579           if (ASMJIT_UNLIKELY(commonInfo->hasAvx512B()))
4580             goto InvalidEROrSAE;
4581         }
4582 
4583         if (options & Inst::kOptionER) {
4584           if (ASMJIT_UNLIKELY(!commonInfo->hasAvx512ER()))
4585             goto InvalidEROrSAE;
4586 
4587           x &=~kLLMask11;                                // [........|.00..aaa|Vvvvv..R|RBBmmmmm].
4588           x |= kBcstMask | (options & kLLMask11);        // [........|.LLb.aaa|Vvvvv..R|RBBmmmmm].
4589         }
4590         else {
4591           if (ASMJIT_UNLIKELY(!commonInfo->hasAvx512SAE()))
4592             goto InvalidEROrSAE;
4593 
4594           x |= kBcstMask;                                // [........|.LLb.aaa|Vvvvv..R|RBBmmmmm].
4595         }
4596       }
4597     }
4598 
4599     // If these bits are used then EVEX prefix is required.
4600     constexpr uint32_t kEvexBits = 0x00D78150u;          // [........|xx.x.xxx|x......x|.x.x....].
4601 
4602     // Force EVEX prefix even in case the instruction has VEX encoding, because EVEX encoding is preferred. At the
4603     // moment this is only required for AVX_VNNI instructions, which were added after AVX512_VNNI instructions. If
4604     // such instruction doesn't specify prefix, EVEX (AVX512_VNNI) would be used by default,
4605     if (commonInfo->preferEvex()) {
4606       if ((x & kEvexBits) == 0 && (options & (Inst::kOptionVex | Inst::kOptionVex3)) == 0) {
4607         x |= (Opcode::kMM_ForceEvex) >> Opcode::kMM_Shift;
4608       }
4609     }
4610 
4611     // Check if EVEX is required by checking bits in `x` :  [........|xx.x.xxx|x......x|.x.x....].
4612     if (x & kEvexBits) {
4613       uint32_t y = ((x << 4) & 0x00080000u) |            // [........|...bV...|........|........].
4614                    ((x >> 4) & 0x00000010u) ;            // [........|...bV...|........|...R....].
4615       x  = (x & 0x00FF78E3u) | y;                        // [........|zLLbVaaa|0vvvv000|RBBR00mm].
4616       x  = x << 8;                                       // [zLLbVaaa|0vvvv000|RBBR00mm|00000000].
4617       x |= (opcode >> kVSHR_W    ) & 0x00800000u;        // [zLLbVaaa|Wvvvv000|RBBR00mm|00000000].
4618       x |= (opcode >> kVSHR_PP_EW) & 0x00830000u;        // [zLLbVaaa|Wvvvv0pp|RBBR00mm|00000000] (added PP and EVEX.W).
4619                                                          //      _     ____    ____
4620       x ^= 0x087CF000u | kX86ByteEvex;                   // [zLLbVaaa|Wvvvv1pp|RBBR00mm|01100010].
4621 
4622       writer.emit32uLE(x);
4623       writer.emit8(opcode.v);
4624 
4625       rbReg &= 0x7;
4626       writer.emit8(x86EncodeMod(3, opReg, rbReg));
4627       writer.emitImmByteOrDWord(uint64_t(immValue), immSize);
4628       goto EmitDone;
4629     }
4630 
4631     // Not EVEX, prepare `x` for VEX2 or VEX3:          x = [........|00L00000|0vvvv000|R0B0mmmm].
4632     x |= ((opcode >> (kVSHR_W  + 8)) & 0x8000u) |        // [00000000|00L00000|Wvvvv000|R0B0mmmm].
4633          ((opcode >> (kVSHR_PP + 8)) & 0x0300u) |        // [00000000|00L00000|0vvvv0pp|R0B0mmmm].
4634          ((x      >> 11            ) & 0x0400u) ;        // [00000000|00L00000|WvvvvLpp|R0B0mmmm].
4635 
4636     // Check if VEX3 is required / forced:                  [........|........|x.......|..x..x..].
4637     if (x & 0x0008024u) {
4638       uint32_t xorMsk = x86VEXPrefix[x & 0xF] | (opcode << 24);
4639 
4640       // Clear 'FORCE-VEX3' bit and all high bits.
4641       x  = (x & (0x4 ^ 0xFFFF)) << 8;                    // [00000000|WvvvvLpp|R0B0m0mm|00000000].
4642                                                          //            ____    _ _
4643       x ^= xorMsk;                                       // [_OPCODE_|WvvvvLpp|R1Bmmmmm|VEX3|XOP].
4644       writer.emit32uLE(x);
4645 
4646       rbReg &= 0x7;
4647       writer.emit8(x86EncodeMod(3, opReg, rbReg));
4648       writer.emitImmByteOrDWord(uint64_t(immValue), immSize);
4649       goto EmitDone;
4650     }
4651     else {
4652       // 'mmmmm' must be '00001'.
4653       ASMJIT_ASSERT((x & 0x1F) == 0x01);
4654 
4655       x = ((x >> 8) ^ x) ^ 0xF9;
4656       writer.emit8(kX86ByteVex2);
4657       writer.emit8(x);
4658       writer.emit8(opcode.v);
4659 
4660       rbReg &= 0x7;
4661       writer.emit8(x86EncodeMod(3, opReg, rbReg));
4662       writer.emitImmByteOrDWord(uint64_t(immValue), immSize);
4663       goto EmitDone;
4664     }
4665   }
4666 
4667   // --------------------------------------------------------------------------
4668   // [Emit - VEX|EVEX - /r (Memory)]
4669   // --------------------------------------------------------------------------
4670 
4671 EmitVexEvexM:
4672   ASMJIT_ASSERT(rmRel != nullptr);
4673   ASMJIT_ASSERT(rmRel->opType() == Operand::kOpMem);
4674 
4675   rmInfo = x86MemInfo[rmRel->as<Mem>().baseAndIndexTypes()];
4676   writer.emitSegmentOverride(rmRel->as<Mem>().segmentId());
4677 
4678   memOpAOMark = writer.cursor();
4679   writer.emitAddressOverride((rmInfo & _addressOverrideMask()) != 0);
4680 
4681   rbReg = rmRel->as<Mem>().hasBaseReg()  ? rmRel->as<Mem>().baseId()  : uint32_t(0);
4682   rxReg = rmRel->as<Mem>().hasIndexReg() ? rmRel->as<Mem>().indexId() : uint32_t(0);
4683 
4684   {
4685     uint32_t broadcastBit = uint32_t(rmRel->as<Mem>().hasBroadcast());
4686 
4687     // Construct `x` - a complete EVEX|VEX prefix.
4688     uint32_t x = ((opReg <<  4) & 0x0000F980u) |         // [........|........|Vvvvv..R|R.......].
4689                  ((rxReg <<  3) & 0x00000040u) |         // [........|........|........|.X......].
4690                  ((rxReg << 15) & 0x00080000u) |         // [........|....X...|........|........].
4691                  ((rbReg <<  2) & 0x00000020u) |         // [........|........|........|..B.....].
4692                  opcode.extractLLMM(options)   |         // [........|.LL.X...|Vvvvv..R|RXBmmmmm].
4693                  (_extraReg.id()    << 16)     |         // [........|.LL.Xaaa|Vvvvv..R|RXBmmmmm].
4694                  (broadcastBit      << 20)     ;         // [........|.LLbXaaa|Vvvvv..R|RXBmmmmm].
4695     opReg &= 0x07u;
4696 
4697     // Mark invalid VEX (force EVEX) case:               // [@.......|.LLbXaaa|Vvvvv..R|RXBmmmmm].
4698     x |= (~commonInfo->flags() & InstDB::kFlagVex) << (31 - Support::constCtz(InstDB::kFlagVex));
4699 
4700     // Handle AVX512 options by a single branch.
4701     const uint32_t kAvx512Options = Inst::kOptionZMask   |
4702                                     Inst::kOptionER      |
4703                                     Inst::kOptionSAE     ;
4704     if (options & kAvx512Options) {
4705       // {er} and {sae} are both invalid if memory operand is used.
4706       if (ASMJIT_UNLIKELY(options & (Inst::kOptionER | Inst::kOptionSAE)))
4707         goto InvalidEROrSAE;
4708 
4709       x |= options & (Inst::kOptionZMask);               // [@.......|zLLbXaaa|Vvvvv..R|RXBmmmmm].
4710     }
4711 
4712     // If these bits are used then EVEX prefix is required.
4713     constexpr uint32_t kEvexBits = 0x80DF8110u;          // [@.......|xx.xxxxx|x......x|...x....].
4714 
4715     // Force EVEX prefix even in case the instruction has VEX encoding, because EVEX encoding is preferred. At the
4716     // moment this is only required for AVX_VNNI instructions, which were added after AVX512_VNNI instructions. If
4717     // such instruction doesn't specify prefix, EVEX (AVX512_VNNI) would be used by default,
4718     if (commonInfo->preferEvex()) {
4719       if ((x & kEvexBits) == 0 && (options & (Inst::kOptionVex | Inst::kOptionVex3)) == 0) {
4720         x |= (Opcode::kMM_ForceEvex) >> Opcode::kMM_Shift;
4721       }
4722     }
4723 
4724     // Check if EVEX is required by checking bits in `x` :  [@.......|xx.xxxxx|x......x|...x....].
4725     if (x & kEvexBits) {
4726       uint32_t y = ((x << 4) & 0x00080000u) |            // [@.......|....V...|........|........].
4727                    ((x >> 4) & 0x00000010u) ;            // [@.......|....V...|........|...R....].
4728       x  = (x & 0x00FF78E3u) | y;                        // [........|zLLbVaaa|0vvvv000|RXBR00mm].
4729       x  = x << 8;                                       // [zLLbVaaa|0vvvv000|RBBR00mm|00000000].
4730       x |= (opcode >> kVSHR_W    ) & 0x00800000u;        // [zLLbVaaa|Wvvvv000|RBBR00mm|00000000].
4731       x |= (opcode >> kVSHR_PP_EW) & 0x00830000u;        // [zLLbVaaa|Wvvvv0pp|RBBR00mm|00000000] (added PP and EVEX.W).
4732                                                          //      _     ____    ____
4733       x ^= 0x087CF000u | kX86ByteEvex;                   // [zLLbVaaa|Wvvvv1pp|RBBR00mm|01100010].
4734 
4735       writer.emit32uLE(x);
4736       writer.emit8(opcode.v);
4737 
4738       if (x & 0x10000000u) {
4739         // Broadcast, change the compressed displacement scale to either x4 (SHL 2) or x8 (SHL 3)
4740         // depending on instruction's W. If 'W' is 1 'SHL' must be 3, otherwise it must be 2.
4741         opcode &=~uint32_t(Opcode::kCDSHL_Mask);
4742         opcode |= ((x & 0x00800000u) ? 3u : 2u) << Opcode::kCDSHL_Shift;
4743       }
4744       else {
4745         // Add the compressed displacement 'SHF' to the opcode based on 'TTWLL'.
4746         // The index to `x86CDisp8SHL` is composed as `CDTT[4:3] | W[2] | LL[1:0]`.
4747         uint32_t TTWLL = ((opcode >> (Opcode::kCDTT_Shift - 3)) & 0x18) +
4748                          ((opcode >> (Opcode::kW_Shift    - 2)) & 0x04) +
4749                          ((x >> 29) & 0x3);
4750         opcode += x86CDisp8SHL[TTWLL];
4751       }
4752     }
4753     else {
4754       // Not EVEX, prepare `x` for VEX2 or VEX3:        x = [........|00L00000|0vvvv000|RXB0mmmm].
4755       x |= ((opcode >> (kVSHR_W  + 8)) & 0x8000u) |      // [00000000|00L00000|Wvvvv000|RXB0mmmm].
4756            ((opcode >> (kVSHR_PP + 8)) & 0x0300u) |      // [00000000|00L00000|Wvvvv0pp|RXB0mmmm].
4757            ((x      >> 11            ) & 0x0400u) ;      // [00000000|00L00000|WvvvvLpp|RXB0mmmm].
4758 
4759       // Clear a possible CDisp specified by EVEX.
4760       opcode &= ~Opcode::kCDSHL_Mask;
4761 
4762       // Check if VEX3 is required / forced:                [........|........|x.......|.xx..x..].
4763       if (x & 0x0008064u) {
4764         uint32_t xorMsk = x86VEXPrefix[x & 0xF] | (opcode << 24);
4765 
4766         // Clear 'FORCE-VEX3' bit and all high bits.
4767         x  = (x & (0x4 ^ 0xFFFF)) << 8;                  // [00000000|WvvvvLpp|RXB0m0mm|00000000].
4768                                                          //            ____    ___
4769         x ^= xorMsk;                                     // [_OPCODE_|WvvvvLpp|RXBmmmmm|VEX3_XOP].
4770         writer.emit32uLE(x);
4771       }
4772       else {
4773         // 'mmmmm' must be '00001'.
4774         ASMJIT_ASSERT((x & 0x1F) == 0x01);
4775 
4776         x = ((x >> 8) ^ x) ^ 0xF9;
4777         writer.emit8(kX86ByteVex2);
4778         writer.emit8(x);
4779         writer.emit8(opcode.v);
4780       }
4781     }
4782   }
4783 
4784   // MOD|SIB address.
4785   if (!commonInfo->hasFlag(InstDB::kFlagVsib))
4786     goto EmitModSib;
4787 
4788   // MOD|VSIB address without INDEX is invalid.
4789   if (rmInfo & kX86MemInfo_Index)
4790     goto EmitModVSib;
4791   goto InvalidInstruction;
4792 
4793   // --------------------------------------------------------------------------
4794   // [Emit - Jmp/Jcc/Call]
4795   // --------------------------------------------------------------------------
4796 
4797 EmitJmpCall:
4798   {
4799     // Emit REX prefix if asked for (64-bit only).
4800     uint32_t rex = opcode.extractRex(options);
4801     if (ASMJIT_UNLIKELY(x86IsRexInvalid(rex)))
4802       goto InvalidRexPrefix;
4803     rex &= ~kX86ByteInvalidRex & 0xFF;
4804     writer.emit8If(rex | kX86ByteRex, rex != 0);
4805 
4806     uint64_t ip = uint64_t(writer.offsetFrom(_bufferData));
4807     uint32_t rel32 = 0;
4808     uint32_t opCode8 = x86AltOpcodeOf(instInfo);
4809 
4810     uint32_t inst8Size  = 1 + 1; //          OPCODE + REL8 .
4811     uint32_t inst32Size = 1 + 4; // [PREFIX] OPCODE + REL32.
4812 
4813     // Jcc instructions with 32-bit displacement use 0x0F prefix,
4814     // other instructions don't. No other prefixes are used by X86.
4815     ASMJIT_ASSERT((opCode8 & Opcode::kMM_Mask) == 0);
4816     ASMJIT_ASSERT((opcode  & Opcode::kMM_Mask) == 0 ||
4817                   (opcode  & Opcode::kMM_Mask) == Opcode::kMM_0F);
4818 
4819     // Only one of these should be used at the same time.
4820     inst32Size += uint32_t(opReg != 0);
4821     inst32Size += uint32_t((opcode & Opcode::kMM_Mask) == Opcode::kMM_0F);
4822 
4823     if (rmRel->isLabel()) {
4824       label = _code->labelEntry(rmRel->as<Label>());
4825       if (ASMJIT_UNLIKELY(!label))
4826         goto InvalidLabel;
4827 
4828       if (label->isBoundTo(_section)) {
4829         // Label bound to the current section.
4830         rel32 = uint32_t((label->offset() - ip - inst32Size) & 0xFFFFFFFFu);
4831         goto EmitJmpCallRel;
4832       }
4833       else {
4834         // Non-bound label or label bound to a different section.
4835         if (opCode8 && (!opcode.v || (options & Inst::kOptionShortForm))) {
4836           writer.emit8(opCode8);
4837 
4838           // Record DISP8 (non-bound label).
4839           relOffset = -1;
4840           relSize = 1;
4841           goto EmitRel;
4842         }
4843         else {
4844           // Refuse also 'short' prefix, if specified.
4845           if (ASMJIT_UNLIKELY(!opcode.v || (options & Inst::kOptionShortForm) != 0))
4846             goto InvalidDisplacement;
4847 
4848           writer.emit8If(0x0F, (opcode & Opcode::kMM_Mask) != 0);// Emit 0F prefix.
4849           writer.emit8(opcode.v);                                // Emit opcode.
4850           writer.emit8If(x86EncodeMod(3, opReg, 0), opReg != 0); // Emit MOD.
4851 
4852           // Record DISP32 (non-bound label).
4853           relOffset = -4;
4854           relSize = 4;
4855           goto EmitRel;
4856         }
4857       }
4858     }
4859 
4860     if (rmRel->isImm()) {
4861       uint64_t baseAddress = code()->baseAddress();
4862       uint64_t jumpAddress = rmRel->as<Imm>().valueAs<uint64_t>();
4863 
4864       // If the base-address is known calculate a relative displacement and
4865       // check if it fits in 32 bits (which is always true in 32-bit mode).
4866       // Emit relative displacement as it was a bound label if all checks are ok.
4867       if (baseAddress != Globals::kNoBaseAddress) {
4868         uint64_t rel64 = jumpAddress - (ip + baseAddress) - inst32Size;
4869         if (Environment::is32Bit(arch()) || Support::isInt32(int64_t(rel64))) {
4870           rel32 = uint32_t(rel64 & 0xFFFFFFFFu);
4871           goto EmitJmpCallRel;
4872         }
4873         else {
4874           // Relative displacement exceeds 32-bits - relocator can only
4875           // insert trampoline for jmp/call, but not for jcc/jecxz.
4876           if (ASMJIT_UNLIKELY(!x86IsJmpOrCall(instId)))
4877             goto InvalidDisplacement;
4878         }
4879       }
4880 
4881       err = _code->newRelocEntry(&re, RelocEntry::kTypeAbsToRel);
4882       if (ASMJIT_UNLIKELY(err))
4883         goto Failed;
4884 
4885       re->_sourceOffset = offset();
4886       re->_sourceSectionId = _section->id();
4887       re->_payload = jumpAddress;
4888 
4889       if (ASMJIT_LIKELY(opcode.v)) {
4890         // 64-bit: Emit REX prefix so the instruction can be patched later.
4891         // REX prefix does nothing if not patched, but allows to patch the
4892         // instruction to use MOD/M and to point to a memory where the final
4893         // 64-bit address is stored.
4894         if (Environment::is64Bit(arch()) && x86IsJmpOrCall(instId)) {
4895           if (!rex)
4896             writer.emit8(kX86ByteRex);
4897 
4898           err = _code->addAddressToAddressTable(jumpAddress);
4899           if (ASMJIT_UNLIKELY(err))
4900             goto Failed;
4901 
4902           re->_relocType = RelocEntry::kTypeX64AddressEntry;
4903         }
4904 
4905         writer.emit8If(0x0F, (opcode & Opcode::kMM_Mask) != 0);  // Emit 0F prefix.
4906         writer.emit8(opcode.v);                                  // Emit opcode.
4907         writer.emit8If(x86EncodeMod(3, opReg, 0), opReg != 0);   // Emit MOD.
4908         re->_format.resetToDataValue(4);
4909         re->_format.setLeadingAndTrailingSize(writer.offsetFrom(_bufferPtr), immSize);
4910         writer.emit32uLE(0);                                     // Emit DISP32.
4911       }
4912       else {
4913         writer.emit8(opCode8);                                   // Emit opcode.
4914         re->_format.resetToDataValue(4);
4915         re->_format.setLeadingAndTrailingSize(writer.offsetFrom(_bufferPtr), immSize);
4916         writer.emit8(0);                                         // Emit DISP8 (zero).
4917       }
4918       goto EmitDone;
4919     }
4920 
4921     // Not Label|Imm -> Invalid.
4922     goto InvalidInstruction;
4923 
4924     // Emit jmp/call with relative displacement known at assembly-time. Decide
4925     // between 8-bit and 32-bit displacement encoding. Some instructions only
4926     // allow either 8-bit or 32-bit encoding, others allow both encodings.
4927 EmitJmpCallRel:
4928     if (Support::isInt8(int32_t(rel32 + inst32Size - inst8Size)) && opCode8 && !(options & Inst::kOptionLongForm)) {
4929       options |= Inst::kOptionShortForm;
4930       writer.emit8(opCode8);                                     // Emit opcode
4931       writer.emit8(rel32 + inst32Size - inst8Size);              // Emit DISP8.
4932       goto EmitDone;
4933     }
4934     else {
4935       if (ASMJIT_UNLIKELY(!opcode.v || (options & Inst::kOptionShortForm) != 0))
4936         goto InvalidDisplacement;
4937 
4938       options &= ~Inst::kOptionShortForm;
4939       writer.emit8If(0x0F, (opcode & Opcode::kMM_Mask) != 0);    // Emit 0x0F prefix.
4940       writer.emit8(opcode.v);                                    // Emit Opcode.
4941       writer.emit8If(x86EncodeMod(3, opReg, 0), opReg != 0);     // Emit MOD.
4942       writer.emit32uLE(rel32);                                   // Emit DISP32.
4943       goto EmitDone;
4944     }
4945   }
4946 
4947   // --------------------------------------------------------------------------
4948   // [Emit - Relative]
4949   // --------------------------------------------------------------------------
4950 
4951 EmitRel:
4952   {
4953     ASMJIT_ASSERT(relSize == 1 || relSize == 4);
4954 
4955     // Chain with label.
4956     size_t offset = size_t(writer.offsetFrom(_bufferData));
4957     OffsetFormat of;
4958     of.resetToDataValue(relSize);
4959 
4960     LabelLink* link = _code->newLabelLink(label, _section->id(), offset, relOffset, of);
4961     if (ASMJIT_UNLIKELY(!link))
4962       goto OutOfMemory;
4963 
4964     if (re)
4965       link->relocId = re->id();
4966 
4967     // Emit dummy zeros, must be patched later when the reference becomes known.
4968     writer.emitZeros(relSize);
4969   }
4970   writer.emitImmediate(uint64_t(immValue), immSize);
4971 
4972   // --------------------------------------------------------------------------
4973   // [Done]
4974   // --------------------------------------------------------------------------
4975 
4976 EmitDone:
4977   if (ASMJIT_UNLIKELY(options & Inst::kOptionReserved)) {
4978 #ifndef ASMJIT_NO_LOGGING
4979     if (_logger)
4980       EmitterUtils::logInstructionEmitted(this, instId, options, o0, o1, o2, opExt, relSize, immSize, writer.cursor());
4981 #endif
4982   }
4983 
4984   resetExtraReg();
4985   resetInstOptions();
4986   resetInlineComment();
4987 
4988   writer.done(this);
4989   return kErrorOk;
4990 
4991   // --------------------------------------------------------------------------
4992   // [Error Handler]
4993   // --------------------------------------------------------------------------
4994 
4995 #define ERROR_HANDLER(ERR) ERR: err = DebugUtils::errored(kError##ERR); goto Failed;
4996   ERROR_HANDLER(OutOfMemory)
4997   ERROR_HANDLER(InvalidLabel)
4998   ERROR_HANDLER(InvalidInstruction)
4999   ERROR_HANDLER(InvalidLockPrefix)
5000   ERROR_HANDLER(InvalidXAcquirePrefix)
5001   ERROR_HANDLER(InvalidXReleasePrefix)
5002   ERROR_HANDLER(InvalidRepPrefix)
5003   ERROR_HANDLER(InvalidRexPrefix)
5004   ERROR_HANDLER(InvalidEROrSAE)
5005   ERROR_HANDLER(InvalidAddress)
5006   ERROR_HANDLER(InvalidAddressIndex)
5007   ERROR_HANDLER(InvalidAddress64Bit)
5008   ERROR_HANDLER(InvalidDisplacement)
5009   ERROR_HANDLER(InvalidPhysId)
5010   ERROR_HANDLER(InvalidSegment)
5011   ERROR_HANDLER(InvalidImmediate)
5012   ERROR_HANDLER(OperandSizeMismatch)
5013   ERROR_HANDLER(AmbiguousOperandSize)
5014   ERROR_HANDLER(NotConsecutiveRegs)
5015 #undef ERROR_HANDLER
5016 
5017 Failed:
5018 #ifndef ASMJIT_NO_LOGGING
5019   return EmitterUtils::logInstructionFailed(this, err, instId, options, o0, o1, o2, opExt);
5020 #else
5021   resetExtraReg();
5022   resetInstOptions();
5023   resetInlineComment();
5024   return reportError(err);
5025 #endif
5026 }
5027 
5028 // ============================================================================
5029 // [asmjit::x86::Assembler - Align]
5030 // ============================================================================
5031 
align(uint32_t alignMode,uint32_t alignment)5032 Error Assembler::align(uint32_t alignMode, uint32_t alignment) {
5033   if (ASMJIT_UNLIKELY(!_code))
5034     return reportError(DebugUtils::errored(kErrorNotInitialized));
5035 
5036   if (ASMJIT_UNLIKELY(alignMode >= kAlignCount))
5037     return reportError(DebugUtils::errored(kErrorInvalidArgument));
5038 
5039   if (alignment <= 1)
5040     return kErrorOk;
5041 
5042   if (ASMJIT_UNLIKELY(!Support::isPowerOf2(alignment) || alignment > Globals::kMaxAlignment))
5043     return reportError(DebugUtils::errored(kErrorInvalidArgument));
5044 
5045   uint32_t i = uint32_t(Support::alignUpDiff<size_t>(offset(), alignment));
5046   if (i > 0) {
5047     CodeWriter writer(this);
5048     ASMJIT_PROPAGATE(writer.ensureSpace(this, i));
5049 
5050     uint8_t pattern = 0x00;
5051     switch (alignMode) {
5052       case kAlignCode: {
5053         if (hasEncodingOption(kEncodingOptionOptimizedAlign)) {
5054           // Intel 64 and IA-32 Architectures Software Developer's Manual - Volume 2B (NOP).
5055           enum { kMaxNopSize = 9 };
5056 
5057           static const uint8_t nopData[kMaxNopSize][kMaxNopSize] = {
5058             { 0x90 },
5059             { 0x66, 0x90 },
5060             { 0x0F, 0x1F, 0x00 },
5061             { 0x0F, 0x1F, 0x40, 0x00 },
5062             { 0x0F, 0x1F, 0x44, 0x00, 0x00 },
5063             { 0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00 },
5064             { 0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00 },
5065             { 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 },
5066             { 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 }
5067           };
5068 
5069           do {
5070             uint32_t n = Support::min<uint32_t>(i, kMaxNopSize);
5071             const uint8_t* src = nopData[n - 1];
5072 
5073             i -= n;
5074             do {
5075               writer.emit8(*src++);
5076             } while (--n);
5077           } while (i);
5078         }
5079 
5080         pattern = 0x90;
5081         break;
5082       }
5083 
5084       case kAlignData:
5085         pattern = 0xCC;
5086         break;
5087 
5088       case kAlignZero:
5089         // Pattern already set to zero.
5090         break;
5091     }
5092 
5093     while (i) {
5094       writer.emit8(pattern);
5095       i--;
5096     }
5097 
5098     writer.done(this);
5099   }
5100 
5101 #ifndef ASMJIT_NO_LOGGING
5102   if (_logger) {
5103     StringTmp<128> sb;
5104     sb.appendChars(' ', _logger->indentation(FormatOptions::kIndentationCode));
5105     sb.appendFormat("align %u\n", alignment);
5106     _logger->log(sb);
5107   }
5108 #endif
5109 
5110   return kErrorOk;
5111 }
5112 
5113 // ============================================================================
5114 // [asmjit::x86::Assembler - Events]
5115 // ============================================================================
5116 
onAttach(CodeHolder * code)5117 Error Assembler::onAttach(CodeHolder* code) noexcept {
5118   uint32_t arch = code->arch();
5119   if (!Environment::isFamilyX86(arch))
5120     return DebugUtils::errored(kErrorInvalidArch);
5121 
5122   ASMJIT_PROPAGATE(Base::onAttach(code));
5123 
5124   if (Environment::is32Bit(arch)) {
5125     // 32 bit architecture - X86.
5126     _forcedInstOptions |= Inst::_kOptionInvalidRex;
5127     _setAddressOverrideMask(kX86MemInfo_67H_X86);
5128   }
5129   else {
5130     // 64 bit architecture - X64.
5131     _forcedInstOptions &= ~Inst::_kOptionInvalidRex;
5132     _setAddressOverrideMask(kX86MemInfo_67H_X64);
5133   }
5134 
5135   return kErrorOk;
5136 }
5137 
onDetach(CodeHolder * code)5138 Error Assembler::onDetach(CodeHolder* code) noexcept {
5139   _forcedInstOptions &= ~Inst::_kOptionInvalidRex;
5140   _setAddressOverrideMask(0);
5141 
5142   return Base::onDetach(code);
5143 }
5144 
5145 ASMJIT_END_SUB_NAMESPACE
5146 
5147 #endif // ASMJIT_BUILD_X86
5148