1 // AsmJit - Machine code generation for C++
2 //
3 // * Official AsmJit Home Page: https://asmjit.com
4 // * Official Github Repository: https://github.com/asmjit/asmjit
5 //
6 // Copyright (c) 2008-2020 The AsmJit Authors
7 //
8 // This software is provided 'as-is', without any express or implied
9 // warranty. In no event will the authors be held liable for any damages
10 // arising from the use of this software.
11 //
12 // Permission is granted to anyone to use this software for any purpose,
13 // including commercial applications, and to alter it and redistribute it
14 // freely, subject to the following restrictions:
15 //
16 // 1. The origin of this software must not be misrepresented; you must not
17 // claim that you wrote the original software. If you use this software
18 // in a product, an acknowledgment in the product documentation would be
19 // appreciated but is not required.
20 // 2. Altered source versions must be plainly marked as such, and must not be
21 // misrepresented as being the original software.
22 // 3. This notice may not be removed or altered from any source distribution.
23
24 #include "../core/api-build_p.h"
25 #ifdef ASMJIT_BUILD_X86
26
27 #include "../core/assembler.h"
28 #include "../core/codebufferwriter_p.h"
29 #include "../core/cpuinfo.h"
30 #include "../core/emitterutils_p.h"
31 #include "../core/formatter.h"
32 #include "../core/logger.h"
33 #include "../core/misc_p.h"
34 #include "../core/support.h"
35 #include "../x86/x86assembler.h"
36 #include "../x86/x86instdb_p.h"
37 #include "../x86/x86formatter_p.h"
38 #include "../x86/x86opcode_p.h"
39
40 ASMJIT_BEGIN_SUB_NAMESPACE(x86)
41
42 // ============================================================================
43 // [TypeDefs]
44 // ============================================================================
45
46 typedef Support::FastUInt8 FastUInt8;
47
48 // ============================================================================
49 // [Constants]
50 // ============================================================================
51
52 //! X86 bytes used to encode important prefixes.
53 enum X86Byte : uint32_t {
54 //! 1-byte REX prefix mask.
55 kX86ByteRex = 0x40,
56
57 //! 1-byte REX.W component.
58 kX86ByteRexW = 0x08,
59
60 kX86ByteInvalidRex = 0x80,
61
62 //! 2-byte VEX prefix:
63 //! - `[0]` - `0xC5`.
64 //! - `[1]` - `RvvvvLpp`.
65 kX86ByteVex2 = 0xC5,
66
67 //! 3-byte VEX prefix:
68 //! - `[0]` - `0xC4`.
69 //! - `[1]` - `RXBmmmmm`.
70 //! - `[2]` - `WvvvvLpp`.
71 kX86ByteVex3 = 0xC4,
72
73 //! 3-byte XOP prefix:
74 //! - `[0]` - `0x8F`.
75 //! - `[1]` - `RXBmmmmm`.
76 //! - `[2]` - `WvvvvLpp`.
77 kX86ByteXop3 = 0x8F,
78
79 //! 4-byte EVEX prefix:
80 //! - `[0]` - `0x62`.
81 //! - `[1]` - Payload0 or `P[ 7: 0]` - `[R X B R' 0 0 m m]`.
82 //! - `[2]` - Payload1 or `P[15: 8]` - `[W v v v v 1 p p]`.
83 //! - `[3]` - Payload2 or `P[23:16]` - `[z L' L b V' a a a]`.
84 //!
85 //! Payload:
86 //! - `P[ 1: 0]` - OPCODE: EVEX.mmmmm, only lowest 2 bits [1:0] used.
87 //! - `P[ 3: 2]` - ______: Must be 0.
88 //! - `P[ 4]` - REG-ID: EVEX.R' - 5th bit of 'RRRRR'.
89 //! - `P[ 5]` - REG-ID: EVEX.B - 4th bit of 'BBBBB'.
90 //! - `P[ 6]` - REG-ID: EVEX.X - 5th bit of 'BBBBB' or 4th bit of 'XXXX' (with SIB).
91 //! - `P[ 7]` - REG-ID: EVEX.R - 4th bit of 'RRRRR'.
92 //! - `P[ 9: 8]` - OPCODE: EVEX.pp.
93 //! - `P[ 10]` - ______: Must be 1.
94 //! - `P[14:11]` - REG-ID: 4 bits of 'VVVV'.
95 //! - `P[ 15]` - OPCODE: EVEX.W.
96 //! - `P[18:16]` - REG-ID: K register k0...k7 (Merging/Zeroing Vector Ops).
97 //! - `P[ 19]` - REG-ID: 5th bit of 'VVVVV'.
98 //! - `P[ 20]` - OPCODE: Broadcast/Rounding Control/SAE bit.
99 //! - `P[22.21]` - OPCODE: Vector Length (L' and L) / Rounding Control.
100 //! - `P[ 23]` - OPCODE: Zeroing/Merging.
101 kX86ByteEvex = 0x62
102 };
103
104 // AsmJit specific (used to encode VVVVV field in XOP/VEX/EVEX).
105 enum VexVVVVV : uint32_t {
106 kVexVVVVVShift = 7,
107 kVexVVVVVMask = 0x1F << kVexVVVVVShift
108 };
109
110 //! Instruction 2-byte/3-byte opcode prefix definition.
111 struct X86OpcodeMM {
112 uint8_t size;
113 uint8_t data[3];
114 };
115
116 //! Mandatory prefixes used to encode legacy [66, F3, F2] or [9B] byte.
117 static const uint8_t x86OpcodePP[8] = { 0x00, 0x66, 0xF3, 0xF2, 0x00, 0x00, 0x00, 0x9B };
118
119 //! Instruction 2-byte/3-byte opcode prefix data.
120 static const X86OpcodeMM x86OpcodeMM[] = {
121 { 0, { 0x00, 0x00, 0 } }, // #00 (0b0000).
122 { 1, { 0x0F, 0x00, 0 } }, // #01 (0b0001).
123 { 2, { 0x0F, 0x38, 0 } }, // #02 (0b0010).
124 { 2, { 0x0F, 0x3A, 0 } }, // #03 (0b0011).
125 { 2, { 0x0F, 0x01, 0 } }, // #04 (0b0100).
126 { 0, { 0x00, 0x00, 0 } }, // #05 (0b0101).
127 { 0, { 0x00, 0x00, 0 } }, // #06 (0b0110).
128 { 0, { 0x00, 0x00, 0 } }, // #07 (0b0111).
129 { 0, { 0x00, 0x00, 0 } }, // #08 (0b1000).
130 { 0, { 0x00, 0x00, 0 } }, // #09 (0b1001).
131 { 0, { 0x00, 0x00, 0 } }, // #0A (0b1010).
132 { 0, { 0x00, 0x00, 0 } }, // #0B (0b1011).
133 { 0, { 0x00, 0x00, 0 } }, // #0C (0b1100).
134 { 0, { 0x00, 0x00, 0 } }, // #0D (0b1101).
135 { 0, { 0x00, 0x00, 0 } }, // #0E (0b1110).
136 { 0, { 0x00, 0x00, 0 } } // #0F (0b1111).
137 };
138
139 static const uint8_t x86SegmentPrefix[8] = {
140 0x00, // None.
141 0x26, // ES.
142 0x2E, // CS.
143 0x36, // SS.
144 0x3E, // DS.
145 0x64, // FS.
146 0x65 // GS.
147 };
148
149 static const uint32_t x86OpcodePushSReg[8] = {
150 Opcode::k000000 | 0x00, // None.
151 Opcode::k000000 | 0x06, // Push ES.
152 Opcode::k000000 | 0x0E, // Push CS.
153 Opcode::k000000 | 0x16, // Push SS.
154 Opcode::k000000 | 0x1E, // Push DS.
155 Opcode::k000F00 | 0xA0, // Push FS.
156 Opcode::k000F00 | 0xA8 // Push GS.
157 };
158
159 static const uint32_t x86OpcodePopSReg[8] = {
160 Opcode::k000000 | 0x00, // None.
161 Opcode::k000000 | 0x07, // Pop ES.
162 Opcode::k000000 | 0x00, // Pop CS.
163 Opcode::k000000 | 0x17, // Pop SS.
164 Opcode::k000000 | 0x1F, // Pop DS.
165 Opcode::k000F00 | 0xA1, // Pop FS.
166 Opcode::k000F00 | 0xA9 // Pop GS.
167 };
168
169 // ============================================================================
170 // [asmjit::X86MemInfo | X86VEXPrefix | X86LLByRegType | X86CDisp8Table]
171 // ============================================================================
172
173 //! Memory operand's info bits.
174 //!
175 //! A lookup table that contains various information based on the BASE and INDEX
176 //! information of a memory operand. This is much better and safer than playing
177 //! with IFs in the code and can check for errors must faster and better.
178 enum X86MemInfo_Enum {
179 kX86MemInfo_0 = 0x00,
180
181 kX86MemInfo_BaseGp = 0x01, //!< Has BASE reg, REX.B can be 1, compatible with REX.B byte.
182 kX86MemInfo_Index = 0x02, //!< Has INDEX reg, REX.X can be 1, compatible with REX.X byte.
183
184 kX86MemInfo_BaseLabel = 0x10, //!< Base is Label.
185 kX86MemInfo_BaseRip = 0x20, //!< Base is RIP.
186
187 kX86MemInfo_67H_X86 = 0x40, //!< Address-size override in 32-bit mode.
188 kX86MemInfo_67H_X64 = 0x80, //!< Address-size override in 64-bit mode.
189 kX86MemInfo_67H_Mask = 0xC0 //!< Contains all address-size override bits.
190 };
191
192 template<uint32_t X>
193 struct X86MemInfo_T {
194 enum {
195 B = (X ) & 0x1F,
196 I = (X >> 5) & 0x1F,
197
198 kBase = (B >= Reg::kTypeGpw && B <= Reg::kTypeGpq ) ? kX86MemInfo_BaseGp :
199 (B == Reg::kTypeRip ) ? kX86MemInfo_BaseRip :
200 (B == Label::kLabelTag ) ? kX86MemInfo_BaseLabel : 0,
201
202 kIndex = (I >= Reg::kTypeGpw && I <= Reg::kTypeGpq ) ? kX86MemInfo_Index :
203 (I >= Reg::kTypeXmm && I <= Reg::kTypeZmm ) ? kX86MemInfo_Index : 0,
204
205 k67H = (B == Reg::kTypeGpw && I == Reg::kTypeNone) ? kX86MemInfo_67H_X86 :
206 (B == Reg::kTypeGpd && I == Reg::kTypeNone) ? kX86MemInfo_67H_X64 :
207 (B == Reg::kTypeNone && I == Reg::kTypeGpw ) ? kX86MemInfo_67H_X86 :
208 (B == Reg::kTypeNone && I == Reg::kTypeGpd ) ? kX86MemInfo_67H_X64 :
209 (B == Reg::kTypeGpw && I == Reg::kTypeGpw ) ? kX86MemInfo_67H_X86 :
210 (B == Reg::kTypeGpd && I == Reg::kTypeGpd ) ? kX86MemInfo_67H_X64 :
211 (B == Reg::kTypeGpw && I == Reg::kTypeXmm ) ? kX86MemInfo_67H_X86 :
212 (B == Reg::kTypeGpd && I == Reg::kTypeXmm ) ? kX86MemInfo_67H_X64 :
213 (B == Reg::kTypeGpw && I == Reg::kTypeYmm ) ? kX86MemInfo_67H_X86 :
214 (B == Reg::kTypeGpd && I == Reg::kTypeYmm ) ? kX86MemInfo_67H_X64 :
215 (B == Reg::kTypeGpw && I == Reg::kTypeZmm ) ? kX86MemInfo_67H_X86 :
216 (B == Reg::kTypeGpd && I == Reg::kTypeZmm ) ? kX86MemInfo_67H_X64 :
217 (B == Label::kLabelTag && I == Reg::kTypeGpw ) ? kX86MemInfo_67H_X86 :
218 (B == Label::kLabelTag && I == Reg::kTypeGpd ) ? kX86MemInfo_67H_X64 : 0,
219
220 kValue = kBase | kIndex | k67H | 0x04 | 0x08
221 };
222 };
223
224 // The result stored in the LUT is a combination of
225 // - 67H - Address override prefix - depends on BASE+INDEX register types and
226 // the target architecture.
227 // - REX - A possible combination of REX.[B|X|R|W] bits in REX prefix where
228 // REX.B and REX.X are possibly masked out, but REX.R and REX.W are
229 // kept as is.
230 #define VALUE(X) X86MemInfo_T<X>::kValue
231 static const uint8_t x86MemInfo[] = { ASMJIT_LOOKUP_TABLE_1024(VALUE, 0) };
232 #undef VALUE
233
234 // VEX3 or XOP xor bits applied to the opcode before emitted. The index to this
235 // table is 'mmmmm' value, which contains all we need. This is only used by a
236 // 3 BYTE VEX and XOP prefixes, 2 BYTE VEX prefix is handled differently. The
237 // idea is to minimize the difference between VEX3 vs XOP when encoding VEX
238 // or XOP instruction. This should minimize the code required to emit such
239 // instructions and should also make it faster as we don't need any branch to
240 // decide between VEX3 vs XOP.
241 // ____ ___
242 // [_OPCODE_|WvvvvLpp|RXBmmmmm|VEX3_XOP]
243 #define VALUE(X) ((X & 0x08) ? kX86ByteXop3 : kX86ByteVex3) | (0xF << 19) | (0x7 << 13)
244 static const uint32_t x86VEXPrefix[] = { ASMJIT_LOOKUP_TABLE_16(VALUE, 0) };
245 #undef VALUE
246
247 // Table that contains LL opcode field addressed by a register size / 16. It's
248 // used to propagate L.256 or L.512 when YMM or ZMM registers are used,
249 // respectively.
250 #define VALUE(X) (X & (64 >> 4)) ? Opcode::kLL_2 : \
251 (X & (32 >> 4)) ? Opcode::kLL_1 : Opcode::kLL_0
252 static const uint32_t x86LLBySizeDiv16[] = { ASMJIT_LOOKUP_TABLE_16(VALUE, 0) };
253 #undef VALUE
254
255 // Table that contains LL opcode field addressed by a register size / 16. It's
256 // used to propagate L.256 or L.512 when YMM or ZMM registers are used,
257 // respectively.
258 #define VALUE(X) X == Reg::kTypeZmm ? Opcode::kLL_2 : \
259 X == Reg::kTypeYmm ? Opcode::kLL_1 : Opcode::kLL_0
260 static const uint32_t x86LLByRegType[] = { ASMJIT_LOOKUP_TABLE_16(VALUE, 0) };
261 #undef VALUE
262
263 // Table that contains a scale (shift left) based on 'TTWLL' field and
264 // the instruction's tuple-type (TT) field. The scale is then applied to
265 // the BASE-N stored in each opcode to calculate the final compressed
266 // displacement used by all EVEX encoded instructions.
267 template<uint32_t X>
268 struct X86CDisp8SHL_T {
269 enum {
270 TT = (X >> 3) << Opcode::kCDTT_Shift,
271 LL = (X >> 0) & 0x3,
272 W = (X >> 2) & 0x1,
273
274 kValue = (TT == Opcode::kCDTT_None ? ((LL==0) ? 0 : (LL==1) ? 0 : 0 ) :
275 TT == Opcode::kCDTT_ByLL ? ((LL==0) ? 0 : (LL==1) ? 1 : 2 ) :
276 TT == Opcode::kCDTT_T1W ? ((LL==0) ? W : (LL==1) ? 1+W : 2+W) :
277 TT == Opcode::kCDTT_DUP ? ((LL==0) ? 0 : (LL==1) ? 2 : 3 ) : 0) << Opcode::kCDSHL_Shift
278 };
279 };
280
281 #define VALUE(X) X86CDisp8SHL_T<X>::kValue
282 static const uint32_t x86CDisp8SHL[] = { ASMJIT_LOOKUP_TABLE_32(VALUE, 0) };
283 #undef VALUE
284
285 // Table that contains MOD byte of a 16-bit [BASE + disp] address.
286 // 0xFF == Invalid.
287 static const uint8_t x86Mod16BaseTable[8] = {
288 0xFF, // AX -> N/A.
289 0xFF, // CX -> N/A.
290 0xFF, // DX -> N/A.
291 0x07, // BX -> 111.
292 0xFF, // SP -> N/A.
293 0x06, // BP -> 110.
294 0x04, // SI -> 100.
295 0x05 // DI -> 101.
296 };
297
298 // Table that contains MOD byte of a 16-bit [BASE + INDEX + disp] combination.
299 // 0xFF == Invalid.
300 template<uint32_t X>
301 struct X86Mod16BaseIndexTable_T {
302 enum {
303 B = X >> 3,
304 I = X & 0x7,
305
306 kValue = ((B == Gp::kIdBx && I == Gp::kIdSi) || (B == Gp::kIdSi && I == Gp::kIdBx)) ? 0x00 :
307 ((B == Gp::kIdBx && I == Gp::kIdDi) || (B == Gp::kIdDi && I == Gp::kIdBx)) ? 0x01 :
308 ((B == Gp::kIdBp && I == Gp::kIdSi) || (B == Gp::kIdSi && I == Gp::kIdBp)) ? 0x02 :
309 ((B == Gp::kIdBp && I == Gp::kIdDi) || (B == Gp::kIdDi && I == Gp::kIdBp)) ? 0x03 : 0xFF
310 };
311 };
312
313 #define VALUE(X) X86Mod16BaseIndexTable_T<X>::kValue
314 static const uint8_t x86Mod16BaseIndexTable[] = { ASMJIT_LOOKUP_TABLE_64(VALUE, 0) };
315 #undef VALUE
316
317 // ============================================================================
318 // [asmjit::x86::Assembler - Helpers]
319 // ============================================================================
320
x86IsJmpOrCall(uint32_t instId)321 static ASMJIT_INLINE bool x86IsJmpOrCall(uint32_t instId) noexcept {
322 return instId == Inst::kIdJmp || instId == Inst::kIdCall;
323 }
324
x86IsImplicitMem(const Operand_ & op,uint32_t base)325 static ASMJIT_INLINE bool x86IsImplicitMem(const Operand_& op, uint32_t base) noexcept {
326 return op.isMem() && op.as<Mem>().baseId() == base && !op.as<Mem>().hasOffset();
327 }
328
329 //! Combine `regId` and `vvvvvId` into a single value (used by AVX and AVX-512).
x86PackRegAndVvvvv(uint32_t regId,uint32_t vvvvvId)330 static ASMJIT_INLINE uint32_t x86PackRegAndVvvvv(uint32_t regId, uint32_t vvvvvId) noexcept {
331 return regId + (vvvvvId << kVexVVVVVShift);
332 }
333
x86OpcodeLByVMem(const Operand_ & op)334 static ASMJIT_INLINE uint32_t x86OpcodeLByVMem(const Operand_& op) noexcept {
335 return x86LLByRegType[op.as<Mem>().indexType()];
336 }
337
x86OpcodeLBySize(uint32_t size)338 static ASMJIT_INLINE uint32_t x86OpcodeLBySize(uint32_t size) noexcept {
339 return x86LLBySizeDiv16[size / 16];
340 }
341
342 //! Encode MOD byte.
x86EncodeMod(uint32_t m,uint32_t o,uint32_t rm)343 static ASMJIT_INLINE uint32_t x86EncodeMod(uint32_t m, uint32_t o, uint32_t rm) noexcept {
344 ASMJIT_ASSERT(m <= 3);
345 ASMJIT_ASSERT(o <= 7);
346 ASMJIT_ASSERT(rm <= 7);
347 return (m << 6) + (o << 3) + rm;
348 }
349
350 //! Encode SIB byte.
x86EncodeSib(uint32_t s,uint32_t i,uint32_t b)351 static ASMJIT_INLINE uint32_t x86EncodeSib(uint32_t s, uint32_t i, uint32_t b) noexcept {
352 ASMJIT_ASSERT(s <= 3);
353 ASMJIT_ASSERT(i <= 7);
354 ASMJIT_ASSERT(b <= 7);
355 return (s << 6) + (i << 3) + b;
356 }
357
x86IsRexInvalid(uint32_t rex)358 static ASMJIT_INLINE bool x86IsRexInvalid(uint32_t rex) noexcept {
359 // Validates the following possibilities:
360 // REX == 0x00 -> OKAY (X86_32 / X86_64).
361 // REX == 0x40-0x4F -> OKAY (X86_64).
362 // REX == 0x80 -> OKAY (X86_32 mode, rex prefix not used).
363 // REX == 0x81-0xCF -> BAD (X86_32 mode, rex prefix used).
364 return rex > kX86ByteInvalidRex;
365 }
366
367 template<typename T>
x86SignExtendI32(T imm)368 static constexpr T x86SignExtendI32(T imm) noexcept { return T(int64_t(int32_t(imm & T(0xFFFFFFFF)))); }
369
x86AltOpcodeOf(const InstDB::InstInfo * info)370 static ASMJIT_INLINE uint32_t x86AltOpcodeOf(const InstDB::InstInfo* info) noexcept {
371 return InstDB::_altOpcodeTable[info->_altOpcodeIndex];
372 }
373
374 // ============================================================================
375 // [asmjit::X86BufferWriter]
376 // ============================================================================
377
378 class X86BufferWriter : public CodeBufferWriter {
379 public:
X86BufferWriter(Assembler * a)380 ASMJIT_INLINE explicit X86BufferWriter(Assembler* a) noexcept
381 : CodeBufferWriter(a) {}
382
emitPP(uint32_t opcode)383 ASMJIT_INLINE void emitPP(uint32_t opcode) noexcept {
384 uint32_t ppIndex = (opcode >> Opcode::kPP_Shift) &
385 (Opcode::kPP_FPUMask >> Opcode::kPP_Shift) ;
386 emit8If(x86OpcodePP[ppIndex], ppIndex != 0);
387 }
388
emitMMAndOpcode(uint32_t opcode)389 ASMJIT_INLINE void emitMMAndOpcode(uint32_t opcode) noexcept {
390 uint32_t mmIndex = (opcode & Opcode::kMM_Mask) >> Opcode::kMM_Shift;
391 const X86OpcodeMM& mmCode = x86OpcodeMM[mmIndex];
392
393 emit8If(mmCode.data[0], mmCode.size > 0);
394 emit8If(mmCode.data[1], mmCode.size > 1);
395 emit8(opcode);
396 }
397
emitSegmentOverride(uint32_t segmentId)398 ASMJIT_INLINE void emitSegmentOverride(uint32_t segmentId) noexcept {
399 ASMJIT_ASSERT(segmentId < ASMJIT_ARRAY_SIZE(x86SegmentPrefix));
400
401 FastUInt8 prefix = x86SegmentPrefix[segmentId];
402 emit8If(prefix, prefix != 0);
403 }
404
405 template<typename CondT>
emitAddressOverride(CondT condition)406 ASMJIT_INLINE void emitAddressOverride(CondT condition) noexcept {
407 emit8If(0x67, condition);
408 }
409
emitImmByteOrDWord(uint64_t immValue,FastUInt8 immSize)410 ASMJIT_INLINE void emitImmByteOrDWord(uint64_t immValue, FastUInt8 immSize) noexcept {
411 if (!immSize)
412 return;
413
414 ASMJIT_ASSERT(immSize == 1 || immSize == 4);
415
416 #if ASMJIT_ARCH_BITS >= 64
417 uint64_t imm = uint64_t(immValue);
418 #else
419 uint32_t imm = uint32_t(immValue & 0xFFFFFFFFu);
420 #endif
421
422 // Many instructions just use a single byte immediate, so make it fast.
423 emit8(imm & 0xFFu);
424 if (immSize == 1) return;
425
426 imm >>= 8;
427 emit8(imm & 0xFFu);
428 imm >>= 8;
429 emit8(imm & 0xFFu);
430 imm >>= 8;
431 emit8(imm & 0xFFu);
432 }
433
emitImmediate(uint64_t immValue,FastUInt8 immSize)434 ASMJIT_INLINE void emitImmediate(uint64_t immValue, FastUInt8 immSize) noexcept {
435 if (!immSize)
436 return;
437
438 #if ASMJIT_ARCH_BITS >= 64
439 uint64_t imm = uint64_t(immValue);
440 #else
441 uint32_t imm = uint32_t(immValue & 0xFFFFFFFFu);
442 #endif
443
444 // Many instructions just use a single byte immediate, so make it fast.
445 emit8(imm & 0xFFu);
446 if (--immSize == 0) return;
447
448 imm >>= 8;
449 emit8(imm & 0xFFu);
450 if (--immSize == 0) return;
451
452 imm >>= 8;
453 emit8(imm & 0xFFu);
454 if (--immSize == 0) return;
455
456 imm >>= 8;
457 emit8(imm & 0xFFu);
458 if (--immSize == 0) return;
459
460 // Can be 1, 2, 4 or 8 bytes, this handles the remaining high DWORD of an 8-byte immediate.
461 ASMJIT_ASSERT(immSize == 4);
462
463 #if ASMJIT_ARCH_BITS >= 64
464 imm >>= 8;
465 emit32uLE(uint32_t(imm));
466 #else
467 emit32uLE(uint32_t((uint64_t(immValue) >> 32) & 0xFFFFFFFFu));
468 #endif
469 }
470 };
471
472 // If the operand is BPL|SPL|SIL|DIL|R8B-15B
473 // - Force REX prefix
474 // If the operand is AH|BH|CH|DH
475 // - patch its index from 0..3 to 4..7 as encoded by X86.
476 // - Disallow REX prefix.
477 #define FIXUP_GPB(REG_OP, REG_ID) \
478 do { \
479 if (!static_cast<const Gp&>(REG_OP).isGpbHi()) { \
480 options |= (REG_ID >= 4) ? uint32_t(Inst::kOptionRex) \
481 : uint32_t(0); \
482 } \
483 else { \
484 options |= Inst::_kOptionInvalidRex; \
485 REG_ID += 4; \
486 } \
487 } while (0)
488
489 #define ENC_OPS1(OP0) ((Operand::kOp##OP0))
490 #define ENC_OPS2(OP0, OP1) ((Operand::kOp##OP0) + ((Operand::kOp##OP1) << 3))
491 #define ENC_OPS3(OP0, OP1, OP2) ((Operand::kOp##OP0) + ((Operand::kOp##OP1) << 3) + ((Operand::kOp##OP2) << 6))
492 #define ENC_OPS4(OP0, OP1, OP2, OP3) ((Operand::kOp##OP0) + ((Operand::kOp##OP1) << 3) + ((Operand::kOp##OP2) << 6) + ((Operand::kOp##OP3) << 9))
493
494 // ============================================================================
495 // [asmjit::x86::Assembler - Movabs Heuristics]
496 // ============================================================================
497
x86GetMovAbsInstSize64Bit(uint32_t regSize,uint32_t options,const Mem & rmRel)498 static ASMJIT_INLINE bool x86GetMovAbsInstSize64Bit(uint32_t regSize, uint32_t options, const Mem& rmRel) noexcept {
499 uint32_t segmentPrefixSize = rmRel.segmentId() != 0;
500 uint32_t _66hPrefixSize = regSize == 2;
501 uint32_t rexPrefixSize = (regSize == 8) || ((options & Inst::kOptionRex) != 0);
502 uint32_t opCodeByteSize = 1;
503 uint32_t immediateSize = 8;
504
505 return segmentPrefixSize + _66hPrefixSize + rexPrefixSize + opCodeByteSize + immediateSize;
506 }
507
x86GetMovAbsAddrType(Assembler * self,X86BufferWriter & writer,uint32_t regSize,uint32_t options,const Mem & rmRel)508 static ASMJIT_INLINE uint32_t x86GetMovAbsAddrType(Assembler* self, X86BufferWriter& writer, uint32_t regSize, uint32_t options, const Mem& rmRel) noexcept {
509 uint32_t addrType = rmRel.addrType();
510 int64_t addrValue = rmRel.offset();
511
512 if (addrType == BaseMem::kAddrTypeDefault && !(options & Inst::kOptionModMR)) {
513 if (self->is64Bit()) {
514 uint64_t baseAddress = self->code()->baseAddress();
515 if (baseAddress != Globals::kNoBaseAddress && !rmRel.hasSegment()) {
516 uint32_t instructionSize = x86GetMovAbsInstSize64Bit(regSize, options, rmRel);
517 uint64_t virtualOffset = uint64_t(writer.offsetFrom(self->_bufferData));
518 uint64_t rip64 = baseAddress + self->_section->offset() + virtualOffset + instructionSize;
519 uint64_t rel64 = uint64_t(addrValue) - rip64;
520
521 if (!Support::isInt32(int64_t(rel64)))
522 addrType = BaseMem::kAddrTypeAbs;
523 }
524 else {
525 if (!Support::isInt32(addrValue))
526 addrType = BaseMem::kAddrTypeAbs;
527 }
528 }
529 else {
530 addrType = BaseMem::kAddrTypeAbs;
531 }
532 }
533
534 return addrType;
535 }
536
537 // ============================================================================
538 // [asmjit::x86::Assembler - Construction / Destruction]
539 // ============================================================================
540
Assembler(CodeHolder * code)541 Assembler::Assembler(CodeHolder* code) noexcept : BaseAssembler() {
542 if (code)
543 code->attach(this);
544 }
~Assembler()545 Assembler::~Assembler() noexcept {}
546
547 // ============================================================================
548 // [asmjit::x86::Assembler - Emit (Low-Level)]
549 // ============================================================================
550
_emit(uint32_t instId,const Operand_ & o0,const Operand_ & o1,const Operand_ & o2,const Operand_ * opExt)551 ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_* opExt) {
552 constexpr uint32_t kVSHR_W = Opcode::kW_Shift - 23;
553 constexpr uint32_t kVSHR_PP = Opcode::kPP_Shift - 16;
554 constexpr uint32_t kVSHR_PP_EW = Opcode::kPP_Shift - 16;
555
556 constexpr uint32_t kRequiresSpecialHandling =
557 uint32_t(Inst::kOptionReserved) | // Logging/Validation/Error.
558 uint32_t(Inst::kOptionRep ) | // REP/REPE prefix.
559 uint32_t(Inst::kOptionRepne ) | // REPNE prefix.
560 uint32_t(Inst::kOptionLock ) | // LOCK prefix.
561 uint32_t(Inst::kOptionXAcquire) | // XACQUIRE prefix.
562 uint32_t(Inst::kOptionXRelease) ; // XRELEASE prefix.
563
564 Error err;
565
566 Opcode opcode; // Instruction opcode.
567 uint32_t options; // Instruction options.
568 uint32_t isign3; // A combined signature of first 3 operands.
569
570 const Operand_* rmRel; // Memory operand or operand that holds Label|Imm.
571 uint32_t rmInfo; // Memory operand's info based on x86MemInfo.
572 uint32_t rbReg; // Memory base or modRM register.
573 uint32_t rxReg; // Memory index register.
574 uint32_t opReg; // ModR/M opcode or register id.
575
576 LabelEntry* label; // Label entry.
577 RelocEntry* re = nullptr; // Relocation entry.
578 int32_t relOffset; // Relative offset
579 FastUInt8 relSize = 0; // Relative size.
580 uint8_t* memOpAOMark = nullptr; // Marker that points before 'address-override prefix' is emitted.
581
582 int64_t immValue = 0; // Immediate value (must be 64-bit).
583 FastUInt8 immSize = 0; // Immediate size.
584
585 X86BufferWriter writer(this);
586
587 if (instId >= Inst::_kIdCount)
588 instId = 0;
589
590 const InstDB::InstInfo* instInfo = &InstDB::_instInfoTable[instId];
591 const InstDB::CommonInfo* commonInfo = &instInfo->commonInfo();
592
593 // Signature of the first 3 operands.
594 isign3 = o0.opType() + (o1.opType() << 3) + (o2.opType() << 6);
595
596 // Combine all instruction options and also check whether the instruction
597 // is valid. All options that require special handling (including invalid
598 // instruction) are handled by the next branch.
599 options = uint32_t(instId == 0);
600 options |= uint32_t((size_t)(_bufferEnd - writer.cursor()) < 16);
601 options |= uint32_t(instOptions() | forcedInstOptions());
602
603 // Handle failure and rare cases first.
604 if (ASMJIT_UNLIKELY(options & kRequiresSpecialHandling)) {
605 if (ASMJIT_UNLIKELY(!_code))
606 return reportError(DebugUtils::errored(kErrorNotInitialized));
607
608 // Unknown instruction.
609 if (ASMJIT_UNLIKELY(instId == 0))
610 goto InvalidInstruction;
611
612 // Grow request, happens rarely.
613 err = writer.ensureSpace(this, 16);
614 if (ASMJIT_UNLIKELY(err))
615 goto Failed;
616
617 #ifndef ASMJIT_NO_VALIDATION
618 // Strict validation.
619 if (hasValidationOption(kValidationOptionAssembler)) {
620 Operand_ opArray[Globals::kMaxOpCount];
621 EmitterUtils::opArrayFromEmitArgs(opArray, o0, o1, o2, opExt);
622
623 err = InstAPI::validate(arch(), BaseInst(instId, options, _extraReg), opArray, Globals::kMaxOpCount);
624 if (ASMJIT_UNLIKELY(err))
625 goto Failed;
626 }
627 #endif
628
629 uint32_t iFlags = instInfo->flags();
630
631 // LOCK, XACQUIRE, and XRELEASE prefixes.
632 if (options & Inst::kOptionLock) {
633 bool xAcqRel = (options & (Inst::kOptionXAcquire | Inst::kOptionXRelease)) != 0;
634
635 if (ASMJIT_UNLIKELY(!(iFlags & (InstDB::kFlagLock)) && !xAcqRel))
636 goto InvalidLockPrefix;
637
638 if (xAcqRel) {
639 if (ASMJIT_UNLIKELY((options & Inst::kOptionXAcquire) && !(iFlags & InstDB::kFlagXAcquire)))
640 goto InvalidXAcquirePrefix;
641
642 if (ASMJIT_UNLIKELY((options & Inst::kOptionXRelease) && !(iFlags & InstDB::kFlagXRelease)))
643 goto InvalidXReleasePrefix;
644
645 writer.emit8((options & Inst::kOptionXAcquire) ? 0xF2 : 0xF3);
646 }
647
648 writer.emit8(0xF0);
649 }
650
651 // REP and REPNE prefixes.
652 if (options & (Inst::kOptionRep | Inst::kOptionRepne)) {
653 if (ASMJIT_UNLIKELY(!(iFlags & InstDB::kFlagRep)))
654 goto InvalidRepPrefix;
655
656 if (_extraReg.isReg() && ASMJIT_UNLIKELY(_extraReg.group() != Reg::kGroupGp || _extraReg.id() != Gp::kIdCx))
657 goto InvalidRepPrefix;
658
659 writer.emit8((options & Inst::kOptionRepne) ? 0xF2 : 0xF3);
660 }
661 }
662
663 // This sequence seems to be the fastest.
664 opcode = InstDB::_mainOpcodeTable[instInfo->_mainOpcodeIndex];
665 opReg = opcode.extractO();
666 opcode |= instInfo->_mainOpcodeValue;
667
668 // --------------------------------------------------------------------------
669 // [Encoding Scope]
670 // --------------------------------------------------------------------------
671
672 switch (instInfo->_encoding) {
673 case InstDB::kEncodingNone:
674 goto EmitDone;
675
676 // ------------------------------------------------------------------------
677 // [X86]
678 // ------------------------------------------------------------------------
679
680 case InstDB::kEncodingX86Op:
681 goto EmitX86Op;
682
683 case InstDB::kEncodingX86Op_O_I8:
684 if (ASMJIT_UNLIKELY(isign3 != ENC_OPS1(Imm)))
685 goto InvalidInstruction;
686
687 immValue = o0.as<Imm>().valueAs<uint8_t>();
688 immSize = 1;
689 ASMJIT_FALLTHROUGH;
690
691 case InstDB::kEncodingX86Op_O:
692 rbReg = 0;
693 goto EmitX86R;
694
695 case InstDB::kEncodingX86Op_xAddr:
696 if (ASMJIT_UNLIKELY(!o0.isReg()))
697 goto InvalidInstruction;
698
699 rmInfo = x86MemInfo[o0.as<Reg>().type()];
700 writer.emitAddressOverride((rmInfo & _addressOverrideMask()) != 0);
701 goto EmitX86Op;
702
703 case InstDB::kEncodingX86Op_xAX:
704 if (isign3 == 0)
705 goto EmitX86Op;
706
707 if (isign3 == ENC_OPS1(Reg) && o0.id() == Gp::kIdAx)
708 goto EmitX86Op;
709 break;
710
711 case InstDB::kEncodingX86Op_xDX_xAX:
712 if (isign3 == 0)
713 goto EmitX86Op;
714
715 if (isign3 == ENC_OPS2(Reg, Reg) && o0.id() == Gp::kIdDx && o1.id() == Gp::kIdAx)
716 goto EmitX86Op;
717 break;
718
719 case InstDB::kEncodingX86Op_MemZAX:
720 if (isign3 == 0)
721 goto EmitX86Op;
722
723 rmRel = &o0;
724 if (isign3 == ENC_OPS1(Mem) && x86IsImplicitMem(o0, Gp::kIdAx))
725 goto EmitX86OpImplicitMem;
726
727 break;
728
729 case InstDB::kEncodingX86I_xAX:
730 // Implicit form.
731 if (isign3 == ENC_OPS1(Imm)) {
732 immValue = o0.as<Imm>().valueAs<uint8_t>();
733 immSize = 1;
734 goto EmitX86Op;
735 }
736
737 // Explicit form.
738 if (isign3 == ENC_OPS2(Reg, Imm) && o0.id() == Gp::kIdAx) {
739 immValue = o1.as<Imm>().valueAs<uint8_t>();
740 immSize = 1;
741 goto EmitX86Op;
742 }
743 break;
744
745 case InstDB::kEncodingX86M:
746 opcode.addPrefixBySize(o0.size());
747 ASMJIT_FALLTHROUGH;
748
749 case InstDB::kEncodingX86M_NoSize:
750 rbReg = o0.id();
751 if (isign3 == ENC_OPS1(Reg))
752 goto EmitX86R;
753
754 rmRel = &o0;
755 if (isign3 == ENC_OPS1(Mem))
756 goto EmitX86M;
757 break;
758
759 case InstDB::kEncodingX86M_GPB_MulDiv:
760 CaseX86M_GPB_MulDiv:
761 // Explicit form?
762 if (isign3 > 0x7) {
763 // [AX] <- [AX] div|mul r8.
764 if (isign3 == ENC_OPS2(Reg, Reg)) {
765 if (ASMJIT_UNLIKELY(!Reg::isGpw(o0, Gp::kIdAx) || !Reg::isGpb(o1)))
766 goto InvalidInstruction;
767
768 rbReg = o1.id();
769 FIXUP_GPB(o1, rbReg);
770 goto EmitX86R;
771 }
772
773 // [AX] <- [AX] div|mul m8.
774 if (isign3 == ENC_OPS2(Reg, Mem)) {
775 if (ASMJIT_UNLIKELY(!Reg::isGpw(o0, Gp::kIdAx)))
776 goto InvalidInstruction;
777
778 rmRel = &o1;
779 goto EmitX86M;
780 }
781
782 // [?DX:?AX] <- [?DX:?AX] div|mul r16|r32|r64
783 if (isign3 == ENC_OPS3(Reg, Reg, Reg)) {
784 if (ASMJIT_UNLIKELY(o0.size() != o1.size()))
785 goto InvalidInstruction;
786
787 opcode.addArithBySize(o0.size());
788 rbReg = o2.id();
789 goto EmitX86R;
790 }
791
792 // [?DX:?AX] <- [?DX:?AX] div|mul m16|m32|m64
793 if (isign3 == ENC_OPS3(Reg, Reg, Mem)) {
794 if (ASMJIT_UNLIKELY(o0.size() != o1.size()))
795 goto InvalidInstruction;
796
797 opcode.addArithBySize(o0.size());
798 rmRel = &o2;
799 goto EmitX86M;
800 }
801
802 goto InvalidInstruction;
803 }
804
805 ASMJIT_FALLTHROUGH;
806
807 case InstDB::kEncodingX86M_GPB:
808 if (isign3 == ENC_OPS1(Reg)) {
809 opcode.addArithBySize(o0.size());
810 rbReg = o0.id();
811
812 if (o0.size() != 1)
813 goto EmitX86R;
814
815 FIXUP_GPB(o0, rbReg);
816 goto EmitX86R;
817 }
818
819 if (isign3 == ENC_OPS1(Mem)) {
820 if (ASMJIT_UNLIKELY(o0.size() == 0))
821 goto AmbiguousOperandSize;
822
823 opcode.addArithBySize(o0.size());
824 rmRel = &o0;
825 goto EmitX86M;
826 }
827 break;
828
829 case InstDB::kEncodingX86M_Only:
830 if (isign3 == ENC_OPS1(Mem)) {
831 rmRel = &o0;
832 goto EmitX86M;
833 }
834 break;
835
836 case InstDB::kEncodingX86M_Nop:
837 if (isign3 == ENC_OPS1(None))
838 goto EmitX86Op;
839
840 // Multi-byte NOP instruction "0F 1F /0".
841 opcode = Opcode::k000F00 | 0x1F;
842 opReg = 0;
843
844 if (isign3 == ENC_OPS1(Reg)) {
845 opcode.add66hBySize(o0.size());
846 rbReg = o0.id();
847 goto EmitX86R;
848 }
849
850 if (isign3 == ENC_OPS1(Mem)) {
851 opcode.add66hBySize(o0.size());
852 rmRel = &o0;
853 goto EmitX86M;
854 }
855 break;
856
857 case InstDB::kEncodingX86R_Native:
858 if (isign3 == ENC_OPS1(Reg)) {
859 rbReg = o0.id();
860 goto EmitX86R;
861 }
862 break;
863
864 case InstDB::kEncodingX86Rm:
865 opcode.addPrefixBySize(o0.size());
866 ASMJIT_FALLTHROUGH;
867
868 case InstDB::kEncodingX86Rm_NoSize:
869 if (isign3 == ENC_OPS2(Reg, Reg)) {
870 opReg = o0.id();
871 rbReg = o1.id();
872 goto EmitX86R;
873 }
874
875 if (isign3 == ENC_OPS2(Reg, Mem)) {
876 opReg = o0.id();
877 rmRel = &o1;
878 goto EmitX86M;
879 }
880 break;
881
882 case InstDB::kEncodingX86Rm_Raw66H:
883 // We normally emit either [66|F2|F3], this instruction requires 66+[F2|F3].
884 if (isign3 == ENC_OPS2(Reg, Reg)) {
885 opReg = o0.id();
886 rbReg = o1.id();
887
888 if (o0.size() == 2)
889 writer.emit8(0x66);
890 else
891 opcode.addWBySize(o0.size());
892 goto EmitX86R;
893 }
894
895 if (isign3 == ENC_OPS2(Reg, Mem)) {
896 opReg = o0.id();
897 rmRel = &o1;
898
899 if (o0.size() == 2)
900 writer.emit8(0x66);
901 else
902 opcode.addWBySize(o0.size());
903 goto EmitX86M;
904 }
905 break;
906
907 case InstDB::kEncodingX86Mr:
908 opcode.addPrefixBySize(o0.size());
909 ASMJIT_FALLTHROUGH;
910
911 case InstDB::kEncodingX86Mr_NoSize:
912 if (isign3 == ENC_OPS2(Reg, Reg)) {
913 rbReg = o0.id();
914 opReg = o1.id();
915 goto EmitX86R;
916 }
917
918 if (isign3 == ENC_OPS2(Mem, Reg)) {
919 rmRel = &o0;
920 opReg = o1.id();
921 goto EmitX86M;
922 }
923 break;
924
925 case InstDB::kEncodingX86Arith:
926 if (isign3 == ENC_OPS2(Reg, Reg)) {
927 opcode += 2;
928 opcode.addArithBySize(o0.size());
929
930 if (o0.size() != o1.size())
931 goto OperandSizeMismatch;
932
933 opReg = o0.id();
934 rbReg = o1.id();
935
936 if (o0.size() == 1) {
937 FIXUP_GPB(o0, opReg);
938 FIXUP_GPB(o1, rbReg);
939
940 if (!(options & Inst::kOptionModMR))
941 goto EmitX86R;
942
943 opcode -= 2;
944 std::swap(opReg, rbReg);
945 goto EmitX86R;
946 }
947 else {
948 if (!(options & Inst::kOptionModMR))
949 goto EmitX86R;
950
951 opcode -= 2;
952 std::swap(opReg, rbReg);
953 goto EmitX86R;
954 }
955 }
956
957 if (isign3 == ENC_OPS2(Reg, Mem)) {
958 opcode += 2;
959 opcode.addArithBySize(o0.size());
960
961 opReg = o0.id();
962 rmRel = &o1;
963
964 if (o0.size() != 1)
965 goto EmitX86M;
966
967 FIXUP_GPB(o0, opReg);
968 goto EmitX86M;
969 }
970
971 if (isign3 == ENC_OPS2(Mem, Reg)) {
972 opcode.addArithBySize(o1.size());
973 opReg = o1.id();
974 rmRel = &o0;
975
976 if (o1.size() != 1)
977 goto EmitX86M;
978
979 FIXUP_GPB(o1, opReg);
980 goto EmitX86M;
981 }
982
983 // The remaining instructions use 0x80 opcode.
984 opcode = 0x80;
985
986 if (isign3 == ENC_OPS2(Reg, Imm)) {
987 uint32_t size = o0.size();
988
989 rbReg = o0.id();
990 immValue = o1.as<Imm>().value();
991
992 if (size == 1) {
993 FIXUP_GPB(o0, rbReg);
994 immSize = 1;
995 }
996 else {
997 if (size == 2) {
998 opcode |= Opcode::kPP_66;
999 }
1000 else if (size == 4) {
1001 // Sign extend so isInt8 returns the right result.
1002 immValue = x86SignExtendI32<int64_t>(immValue);
1003 }
1004 else if (size == 8) {
1005 bool canTransformTo32Bit = instId == Inst::kIdAnd && Support::isUInt32(immValue);
1006
1007 if (!Support::isInt32(immValue)) {
1008 // We would do this by default when `kOptionOptimizedForSize` is
1009 // enabled, however, in this case we just force this as otherwise
1010 // we would have to fail.
1011 if (canTransformTo32Bit)
1012 size = 4;
1013 else
1014 goto InvalidImmediate;
1015 }
1016 else if (canTransformTo32Bit && hasEncodingOption(kEncodingOptionOptimizeForSize)) {
1017 size = 4;
1018 }
1019
1020 opcode.addWBySize(size);
1021 }
1022
1023 immSize = FastUInt8(Support::min<uint32_t>(size, 4));
1024 if (Support::isInt8(immValue) && !(options & Inst::kOptionLongForm))
1025 immSize = 1;
1026 }
1027
1028 // Short form - AL, AX, EAX, RAX.
1029 if (rbReg == 0 && (size == 1 || immSize != 1) && !(options & Inst::kOptionLongForm)) {
1030 opcode &= Opcode::kPP_66 | Opcode::kW;
1031 opcode |= ((opReg << 3) | (0x04 + (size != 1)));
1032 immSize = FastUInt8(Support::min<uint32_t>(size, 4));
1033 goto EmitX86Op;
1034 }
1035
1036 opcode += size != 1 ? (immSize != 1 ? 1 : 3) : 0;
1037 goto EmitX86R;
1038 }
1039
1040 if (isign3 == ENC_OPS2(Mem, Imm)) {
1041 uint32_t memSize = o0.size();
1042
1043 if (ASMJIT_UNLIKELY(memSize == 0))
1044 goto AmbiguousOperandSize;
1045
1046 immValue = o1.as<Imm>().value();
1047 immSize = FastUInt8(Support::min<uint32_t>(memSize, 4));
1048
1049 // Sign extend so isInt8 returns the right result.
1050 if (memSize == 4)
1051 immValue = x86SignExtendI32<int64_t>(immValue);
1052
1053 if (Support::isInt8(immValue) && !(options & Inst::kOptionLongForm))
1054 immSize = 1;
1055
1056 opcode += memSize != 1 ? (immSize != 1 ? 1 : 3) : 0;
1057 opcode.addPrefixBySize(memSize);
1058
1059 rmRel = &o0;
1060 goto EmitX86M;
1061 }
1062 break;
1063
1064 case InstDB::kEncodingX86Bswap:
1065 if (isign3 == ENC_OPS1(Reg)) {
1066 if (ASMJIT_UNLIKELY(o0.size() == 1))
1067 goto InvalidInstruction;
1068
1069 opReg = o0.id();
1070 opcode.addPrefixBySize(o0.size());
1071 goto EmitX86OpReg;
1072 }
1073 break;
1074
1075 case InstDB::kEncodingX86Bt:
1076 if (isign3 == ENC_OPS2(Reg, Reg)) {
1077 opcode.addPrefixBySize(o1.size());
1078 opReg = o1.id();
1079 rbReg = o0.id();
1080 goto EmitX86R;
1081 }
1082
1083 if (isign3 == ENC_OPS2(Mem, Reg)) {
1084 opcode.addPrefixBySize(o1.size());
1085 opReg = o1.id();
1086 rmRel = &o0;
1087 goto EmitX86M;
1088 }
1089
1090 // The remaining instructions use the secondary opcode/r.
1091 immValue = o1.as<Imm>().value();
1092 immSize = 1;
1093
1094 opcode = x86AltOpcodeOf(instInfo);
1095 opcode.addPrefixBySize(o0.size());
1096 opReg = opcode.extractO();
1097
1098 if (isign3 == ENC_OPS2(Reg, Imm)) {
1099 rbReg = o0.id();
1100 goto EmitX86R;
1101 }
1102
1103 if (isign3 == ENC_OPS2(Mem, Imm)) {
1104 if (ASMJIT_UNLIKELY(o0.size() == 0))
1105 goto AmbiguousOperandSize;
1106
1107 rmRel = &o0;
1108 goto EmitX86M;
1109 }
1110 break;
1111
1112 case InstDB::kEncodingX86Call:
1113 if (isign3 == ENC_OPS1(Reg)) {
1114 rbReg = o0.id();
1115 goto EmitX86R;
1116 }
1117
1118 rmRel = &o0;
1119 if (isign3 == ENC_OPS1(Mem))
1120 goto EmitX86M;
1121
1122 // Call with 32-bit displacement use 0xE8 opcode. Call with 8-bit
1123 // displacement is not encodable so the alternative opcode field
1124 // in X86DB must be zero.
1125 opcode = 0xE8;
1126 opReg = 0;
1127 goto EmitJmpCall;
1128
1129 case InstDB::kEncodingX86Cmpxchg: {
1130 // Convert explicit to implicit.
1131 if (isign3 & (0x7 << 6)) {
1132 if (!Reg::isGp(o2) || o2.id() != Gp::kIdAx)
1133 goto InvalidInstruction;
1134 isign3 &= 0x3F;
1135 }
1136
1137 if (isign3 == ENC_OPS2(Reg, Reg)) {
1138 if (o0.size() != o1.size())
1139 goto OperandSizeMismatch;
1140
1141 opcode.addArithBySize(o0.size());
1142 rbReg = o0.id();
1143 opReg = o1.id();
1144
1145 if (o0.size() != 1)
1146 goto EmitX86R;
1147
1148 FIXUP_GPB(o0, rbReg);
1149 FIXUP_GPB(o1, opReg);
1150 goto EmitX86R;
1151 }
1152
1153 if (isign3 == ENC_OPS2(Mem, Reg)) {
1154 opcode.addArithBySize(o1.size());
1155 opReg = o1.id();
1156 rmRel = &o0;
1157
1158 if (o1.size() != 1)
1159 goto EmitX86M;
1160
1161 FIXUP_GPB(o0, opReg);
1162 goto EmitX86M;
1163 }
1164 break;
1165 }
1166
1167 case InstDB::kEncodingX86Cmpxchg8b_16b: {
1168 const Operand_& o3 = opExt[EmitterUtils::kOp3];
1169 const Operand_& o4 = opExt[EmitterUtils::kOp4];
1170
1171 if (isign3 == ENC_OPS3(Mem, Reg, Reg)) {
1172 if (o3.isReg() && o4.isReg()) {
1173 rmRel = &o0;
1174 goto EmitX86M;
1175 }
1176 }
1177
1178 if (isign3 == ENC_OPS1(Mem)) {
1179 rmRel = &o0;
1180 goto EmitX86M;
1181 }
1182 break;
1183 }
1184
1185 case InstDB::kEncodingX86Crc:
1186 opReg = o0.id();
1187 opcode.addWBySize(o0.size());
1188
1189 if (isign3 == ENC_OPS2(Reg, Reg)) {
1190 rbReg = o1.id();
1191
1192 if (o1.size() == 1) {
1193 FIXUP_GPB(o1, rbReg);
1194 goto EmitX86R;
1195 }
1196 else {
1197 // This seems to be the only exception of encoding '66F2' prefix.
1198 if (o1.size() == 2) writer.emit8(0x66);
1199
1200 opcode.add(1);
1201 goto EmitX86R;
1202 }
1203 }
1204
1205 if (isign3 == ENC_OPS2(Reg, Mem)) {
1206 rmRel = &o1;
1207 if (o1.size() == 0)
1208 goto AmbiguousOperandSize;
1209
1210 // This seems to be the only exception of encoding '66F2' prefix.
1211 if (o1.size() == 2) writer.emit8(0x66);
1212
1213 opcode += o1.size() != 1;
1214 goto EmitX86M;
1215 }
1216 break;
1217
1218 case InstDB::kEncodingX86Enter:
1219 if (isign3 == ENC_OPS2(Imm, Imm)) {
1220 uint32_t iw = o0.as<Imm>().valueAs<uint16_t>();
1221 uint32_t ib = o1.as<Imm>().valueAs<uint8_t>();
1222
1223 immValue = iw | (ib << 16);
1224 immSize = 3;
1225 goto EmitX86Op;
1226 }
1227 break;
1228
1229 case InstDB::kEncodingX86Imul:
1230 // First process all forms distinct of `kEncodingX86M_OptB_MulDiv`.
1231 if (isign3 == ENC_OPS3(Reg, Reg, Imm)) {
1232 opcode = 0x6B;
1233 opcode.addPrefixBySize(o0.size());
1234
1235 immValue = o2.as<Imm>().value();
1236 immSize = 1;
1237
1238 if (!Support::isInt8(immValue) || (options & Inst::kOptionLongForm)) {
1239 opcode -= 2;
1240 immSize = o0.size() == 2 ? 2 : 4;
1241 }
1242
1243 opReg = o0.id();
1244 rbReg = o1.id();
1245
1246 goto EmitX86R;
1247 }
1248
1249 if (isign3 == ENC_OPS3(Reg, Mem, Imm)) {
1250 opcode = 0x6B;
1251 opcode.addPrefixBySize(o0.size());
1252
1253 immValue = o2.as<Imm>().value();
1254 immSize = 1;
1255
1256 // Sign extend so isInt8 returns the right result.
1257 if (o0.size() == 4)
1258 immValue = x86SignExtendI32<int64_t>(immValue);
1259
1260 if (!Support::isInt8(immValue) || (options & Inst::kOptionLongForm)) {
1261 opcode -= 2;
1262 immSize = o0.size() == 2 ? 2 : 4;
1263 }
1264
1265 opReg = o0.id();
1266 rmRel = &o1;
1267
1268 goto EmitX86M;
1269 }
1270
1271 if (isign3 == ENC_OPS2(Reg, Reg)) {
1272 // Must be explicit 'ax, r8' form.
1273 if (o1.size() == 1)
1274 goto CaseX86M_GPB_MulDiv;
1275
1276 if (o0.size() != o1.size())
1277 goto OperandSizeMismatch;
1278
1279 opReg = o0.id();
1280 rbReg = o1.id();
1281
1282 opcode = Opcode::k000F00 | 0xAF;
1283 opcode.addPrefixBySize(o0.size());
1284 goto EmitX86R;
1285 }
1286
1287 if (isign3 == ENC_OPS2(Reg, Mem)) {
1288 // Must be explicit 'ax, m8' form.
1289 if (o1.size() == 1)
1290 goto CaseX86M_GPB_MulDiv;
1291
1292 opReg = o0.id();
1293 rmRel = &o1;
1294
1295 opcode = Opcode::k000F00 | 0xAF;
1296 opcode.addPrefixBySize(o0.size());
1297 goto EmitX86M;
1298 }
1299
1300 // Shorthand to imul 'reg, reg, imm'.
1301 if (isign3 == ENC_OPS2(Reg, Imm)) {
1302 opcode = 0x6B;
1303 opcode.addPrefixBySize(o0.size());
1304
1305 immValue = o1.as<Imm>().value();
1306 immSize = 1;
1307
1308 // Sign extend so isInt8 returns the right result.
1309 if (o0.size() == 4)
1310 immValue = x86SignExtendI32<int64_t>(immValue);
1311
1312 if (!Support::isInt8(immValue) || (options & Inst::kOptionLongForm)) {
1313 opcode -= 2;
1314 immSize = o0.size() == 2 ? 2 : 4;
1315 }
1316
1317 opReg = rbReg = o0.id();
1318 goto EmitX86R;
1319 }
1320
1321 // Try implicit form.
1322 goto CaseX86M_GPB_MulDiv;
1323
1324 case InstDB::kEncodingX86In:
1325 if (isign3 == ENC_OPS2(Reg, Imm)) {
1326 if (ASMJIT_UNLIKELY(o0.id() != Gp::kIdAx))
1327 goto InvalidInstruction;
1328
1329 immValue = o1.as<Imm>().valueAs<uint8_t>();
1330 immSize = 1;
1331
1332 opcode = x86AltOpcodeOf(instInfo) + (o0.size() != 1);
1333 opcode.add66hBySize(o0.size());
1334 goto EmitX86Op;
1335 }
1336
1337 if (isign3 == ENC_OPS2(Reg, Reg)) {
1338 if (ASMJIT_UNLIKELY(o0.id() != Gp::kIdAx || o1.id() != Gp::kIdDx))
1339 goto InvalidInstruction;
1340
1341 opcode += o0.size() != 1;
1342 opcode.add66hBySize(o0.size());
1343 goto EmitX86Op;
1344 }
1345 break;
1346
1347 case InstDB::kEncodingX86Ins:
1348 if (isign3 == ENC_OPS2(Mem, Reg)) {
1349 if (ASMJIT_UNLIKELY(!x86IsImplicitMem(o0, Gp::kIdDi) || o1.id() != Gp::kIdDx))
1350 goto InvalidInstruction;
1351
1352 uint32_t size = o0.size();
1353 if (ASMJIT_UNLIKELY(size == 0))
1354 goto AmbiguousOperandSize;
1355
1356 rmRel = &o0;
1357 opcode += (size != 1);
1358
1359 opcode.add66hBySize(size);
1360 goto EmitX86OpImplicitMem;
1361 }
1362 break;
1363
1364 case InstDB::kEncodingX86IncDec:
1365 if (isign3 == ENC_OPS1(Reg)) {
1366 rbReg = o0.id();
1367
1368 if (o0.size() == 1) {
1369 FIXUP_GPB(o0, rbReg);
1370 goto EmitX86R;
1371 }
1372
1373 if (is32Bit()) {
1374 // INC r16|r32 is only encodable in 32-bit mode (collides with REX).
1375 opcode = x86AltOpcodeOf(instInfo) + (rbReg & 0x07);
1376 opcode.add66hBySize(o0.size());
1377 goto EmitX86Op;
1378 }
1379 else {
1380 opcode.addArithBySize(o0.size());
1381 goto EmitX86R;
1382 }
1383 }
1384
1385 if (isign3 == ENC_OPS1(Mem)) {
1386 opcode.addArithBySize(o0.size());
1387 rmRel = &o0;
1388 goto EmitX86M;
1389 }
1390 break;
1391
1392 case InstDB::kEncodingX86Int:
1393 if (isign3 == ENC_OPS1(Imm)) {
1394 immValue = o0.as<Imm>().value();
1395 immSize = 1;
1396 goto EmitX86Op;
1397 }
1398 break;
1399
1400 case InstDB::kEncodingX86Jcc:
1401 if ((options & (Inst::kOptionTaken | Inst::kOptionNotTaken)) && hasEncodingOption(kEncodingOptionPredictedJumps)) {
1402 uint8_t prefix = (options & Inst::kOptionTaken) ? uint8_t(0x3E) : uint8_t(0x2E);
1403 writer.emit8(prefix);
1404 }
1405
1406 rmRel = &o0;
1407 opReg = 0;
1408 goto EmitJmpCall;
1409
1410 case InstDB::kEncodingX86JecxzLoop:
1411 rmRel = &o0;
1412 // Explicit jecxz|loop [r|e]cx, dst
1413 if (o0.isReg()) {
1414 if (ASMJIT_UNLIKELY(!Reg::isGp(o0, Gp::kIdCx)))
1415 goto InvalidInstruction;
1416
1417 writer.emitAddressOverride((is32Bit() && o0.size() == 2) || (is64Bit() && o0.size() == 4));
1418 rmRel = &o1;
1419 }
1420
1421 opReg = 0;
1422 goto EmitJmpCall;
1423
1424 case InstDB::kEncodingX86Jmp:
1425 if (isign3 == ENC_OPS1(Reg)) {
1426 rbReg = o0.id();
1427 goto EmitX86R;
1428 }
1429
1430 rmRel = &o0;
1431 if (isign3 == ENC_OPS1(Mem))
1432 goto EmitX86M;
1433
1434 // Jump encoded with 32-bit displacement use 0xE9 opcode. Jump encoded
1435 // with 8-bit displacement's opcode is stored as an alternative opcode.
1436 opcode = 0xE9;
1437 opReg = 0;
1438 goto EmitJmpCall;
1439
1440 case InstDB::kEncodingX86JmpRel:
1441 rmRel = &o0;
1442 goto EmitJmpCall;
1443
1444 case InstDB::kEncodingX86Lea:
1445 if (isign3 == ENC_OPS2(Reg, Mem)) {
1446 opcode.addPrefixBySize(o0.size());
1447 opReg = o0.id();
1448 rmRel = &o1;
1449 goto EmitX86M;
1450 }
1451 break;
1452
1453 case InstDB::kEncodingX86Mov:
1454 // Reg <- Reg
1455 if (isign3 == ENC_OPS2(Reg, Reg)) {
1456 opReg = o0.id();
1457 rbReg = o1.id();
1458
1459 // Asmjit uses segment registers indexed from 1 to 6, leaving zero as
1460 // "no segment register used". We have to fix this (decrement the index
1461 // of the register) when emitting MOV instructions which move to/from
1462 // a segment register. The segment register is always `opReg`, because
1463 // the MOV instruction uses either RM or MR encoding.
1464
1465 // GP <- ??
1466 if (Reg::isGp(o0)) {
1467 // GP <- GP
1468 if (Reg::isGp(o1)) {
1469 uint32_t size0 = o0.size();
1470 uint32_t size1 = o1.size();
1471
1472 if (size0 != size1) {
1473 // We allow 'mov r64, r32' as it's basically zero-extend.
1474 if (size0 == 8 && size1 == 4)
1475 size0 = 4; // Zero extend, don't promote to 64-bit.
1476 else
1477 goto InvalidInstruction;
1478 }
1479
1480 if (size0 == 1) {
1481 FIXUP_GPB(o0, opReg);
1482 FIXUP_GPB(o1, rbReg);
1483 opcode = 0x8A;
1484
1485 if (!(options & Inst::kOptionModMR))
1486 goto EmitX86R;
1487
1488 opcode -= 2;
1489 std::swap(opReg, rbReg);
1490 goto EmitX86R;
1491 }
1492 else {
1493 opcode = 0x8B;
1494 opcode.addPrefixBySize(size0);
1495
1496 if (!(options & Inst::kOptionModMR))
1497 goto EmitX86R;
1498
1499 opcode -= 2;
1500 std::swap(opReg, rbReg);
1501 goto EmitX86R;
1502 }
1503 }
1504
1505 opReg = rbReg;
1506 rbReg = o0.id();
1507
1508 // GP <- SReg
1509 if (Reg::isSReg(o1)) {
1510 opcode = 0x8C;
1511 opcode.addPrefixBySize(o0.size());
1512 opReg--;
1513 goto EmitX86R;
1514 }
1515
1516 // GP <- CReg
1517 if (Reg::isCReg(o1)) {
1518 opcode = Opcode::k000F00 | 0x20;
1519
1520 // Use `LOCK MOV` in 32-bit mode if CR8+ register is accessed (AMD extension).
1521 if ((opReg & 0x8) && is32Bit()) {
1522 writer.emit8(0xF0);
1523 opReg &= 0x7;
1524 }
1525 goto EmitX86R;
1526 }
1527
1528 // GP <- DReg
1529 if (Reg::isDReg(o1)) {
1530 opcode = Opcode::k000F00 | 0x21;
1531 goto EmitX86R;
1532 }
1533 }
1534 else {
1535 // ?? <- GP
1536 if (!Reg::isGp(o1))
1537 goto InvalidInstruction;
1538
1539 // SReg <- GP
1540 if (Reg::isSReg(o0)) {
1541 opcode = 0x8E;
1542 opcode.addPrefixBySize(o1.size());
1543 opReg--;
1544 goto EmitX86R;
1545 }
1546
1547 // CReg <- GP
1548 if (Reg::isCReg(o0)) {
1549 opcode = Opcode::k000F00 | 0x22;
1550
1551 // Use `LOCK MOV` in 32-bit mode if CR8+ register is accessed (AMD extension).
1552 if ((opReg & 0x8) && is32Bit()) {
1553 writer.emit8(0xF0);
1554 opReg &= 0x7;
1555 }
1556 goto EmitX86R;
1557 }
1558
1559 // DReg <- GP
1560 if (Reg::isDReg(o0)) {
1561 opcode = Opcode::k000F00 | 0x23;
1562 goto EmitX86R;
1563 }
1564 }
1565
1566 goto InvalidInstruction;
1567 }
1568
1569 if (isign3 == ENC_OPS2(Reg, Mem)) {
1570 opReg = o0.id();
1571 rmRel = &o1;
1572
1573 // SReg <- Mem
1574 if (Reg::isSReg(o0)) {
1575 opcode = 0x8E;
1576 opcode.addPrefixBySize(o1.size());
1577 opReg--;
1578 goto EmitX86M;
1579 }
1580 // Reg <- Mem
1581 else {
1582 opcode = 0;
1583 opcode.addArithBySize(o0.size());
1584
1585 if (o0.size() == 1)
1586 FIXUP_GPB(o0, opReg);
1587
1588 // Handle a special form of `mov al|ax|eax|rax, [ptr64]` that doesn't use MOD.
1589 if (opReg == Gp::kIdAx && !rmRel->as<Mem>().hasBaseOrIndex()) {
1590 immValue = rmRel->as<Mem>().offset();
1591 if (x86GetMovAbsAddrType(this, writer, o0.size(), options, rmRel->as<Mem>()) == BaseMem::kAddrTypeAbs) {
1592 opcode += 0xA0;
1593 goto EmitX86OpMovAbs;
1594 }
1595 }
1596
1597 opcode += 0x8A;
1598 goto EmitX86M;
1599 }
1600 }
1601
1602 if (isign3 == ENC_OPS2(Mem, Reg)) {
1603 opReg = o1.id();
1604 rmRel = &o0;
1605
1606 // Mem <- SReg
1607 if (Reg::isSReg(o1)) {
1608 opcode = 0x8C;
1609 opcode.addPrefixBySize(o0.size());
1610 opReg--;
1611 goto EmitX86M;
1612 }
1613 // Mem <- Reg
1614 else {
1615 opcode = 0;
1616 opcode.addArithBySize(o1.size());
1617
1618 if (o1.size() == 1)
1619 FIXUP_GPB(o1, opReg);
1620
1621 // Handle a special form of `mov [ptr64], al|ax|eax|rax` that doesn't use MOD.
1622 if (opReg == Gp::kIdAx && !rmRel->as<Mem>().hasBaseOrIndex()) {
1623 immValue = rmRel->as<Mem>().offset();
1624 if (x86GetMovAbsAddrType(this, writer, o1.size(), options, rmRel->as<Mem>()) == BaseMem::kAddrTypeAbs) {
1625 opcode += 0xA2;
1626 goto EmitX86OpMovAbs;
1627 }
1628 }
1629
1630 opcode += 0x88;
1631 goto EmitX86M;
1632 }
1633 }
1634
1635 if (isign3 == ENC_OPS2(Reg, Imm)) {
1636 opReg = o0.id();
1637 immSize = FastUInt8(o0.size());
1638
1639 if (immSize == 1) {
1640 FIXUP_GPB(o0, opReg);
1641
1642 opcode = 0xB0;
1643 immValue = o1.as<Imm>().valueAs<uint8_t>();
1644 goto EmitX86OpReg;
1645 }
1646 else {
1647 // 64-bit immediate in 64-bit mode is allowed.
1648 immValue = o1.as<Imm>().value();
1649
1650 // Optimize the instruction size by using a 32-bit immediate if possible.
1651 if (immSize == 8 && !(options & Inst::kOptionLongForm)) {
1652 if (Support::isUInt32(immValue) && hasEncodingOption(kEncodingOptionOptimizeForSize)) {
1653 // Zero-extend by using a 32-bit GPD destination instead of a 64-bit GPQ.
1654 immSize = 4;
1655 }
1656 else if (Support::isInt32(immValue)) {
1657 // Sign-extend, uses 'C7 /0' opcode.
1658 rbReg = opReg;
1659
1660 opcode = Opcode::kW | 0xC7;
1661 opReg = 0;
1662
1663 immSize = 4;
1664 goto EmitX86R;
1665 }
1666 }
1667
1668 opcode = 0xB8;
1669 opcode.addPrefixBySize(immSize);
1670 goto EmitX86OpReg;
1671 }
1672 }
1673
1674 if (isign3 == ENC_OPS2(Mem, Imm)) {
1675 uint32_t memSize = o0.size();
1676 if (ASMJIT_UNLIKELY(memSize == 0))
1677 goto AmbiguousOperandSize;
1678
1679 opcode = 0xC6 + (memSize != 1);
1680 opcode.addPrefixBySize(memSize);
1681 opReg = 0;
1682 rmRel = &o0;
1683
1684 immValue = o1.as<Imm>().value();
1685 immSize = FastUInt8(Support::min<uint32_t>(memSize, 4));
1686 goto EmitX86M;
1687 }
1688 break;
1689
1690 case InstDB::kEncodingX86MovsxMovzx:
1691 opcode.add(o1.size() != 1);
1692 opcode.addPrefixBySize(o0.size());
1693
1694 if (isign3 == ENC_OPS2(Reg, Reg)) {
1695 opReg = o0.id();
1696 rbReg = o1.id();
1697
1698 if (o1.size() != 1)
1699 goto EmitX86R;
1700
1701 FIXUP_GPB(o1, rbReg);
1702 goto EmitX86R;
1703 }
1704
1705 if (isign3 == ENC_OPS2(Reg, Mem)) {
1706 opReg = o0.id();
1707 rmRel = &o1;
1708 goto EmitX86M;
1709 }
1710 break;
1711
1712 case InstDB::kEncodingX86MovntiMovdiri:
1713 if (isign3 == ENC_OPS2(Mem, Reg)) {
1714 opcode.addWIf(Reg::isGpq(o1));
1715
1716 opReg = o1.id();
1717 rmRel = &o0;
1718 goto EmitX86M;
1719 }
1720 break;
1721
1722 case InstDB::kEncodingX86EnqcmdMovdir64b:
1723 if (isign3 == ENC_OPS2(Mem, Mem)) {
1724 const Mem& m0 = o0.as<Mem>();
1725 // This is the only required validation, the rest is handled afterwards.
1726 if (ASMJIT_UNLIKELY(m0.baseType() != o1.as<Mem>().baseType() ||
1727 m0.hasIndex() ||
1728 m0.hasOffset() ||
1729 (m0.hasSegment() && m0.segmentId() != SReg::kIdEs)))
1730 goto InvalidInstruction;
1731
1732 // The first memory operand is passed via register, the second memory operand is RM.
1733 opReg = o0.as<Mem>().baseId();
1734 rmRel = &o1;
1735 goto EmitX86M;
1736 }
1737 break;
1738
1739 case InstDB::kEncodingX86Out:
1740 if (isign3 == ENC_OPS2(Imm, Reg)) {
1741 if (ASMJIT_UNLIKELY(o1.id() != Gp::kIdAx))
1742 goto InvalidInstruction;
1743
1744 opcode = x86AltOpcodeOf(instInfo) + (o1.size() != 1);
1745 opcode.add66hBySize(o1.size());
1746
1747 immValue = o0.as<Imm>().valueAs<uint8_t>();
1748 immSize = 1;
1749 goto EmitX86Op;
1750 }
1751
1752 if (isign3 == ENC_OPS2(Reg, Reg)) {
1753 if (ASMJIT_UNLIKELY(o0.id() != Gp::kIdDx || o1.id() != Gp::kIdAx))
1754 goto InvalidInstruction;
1755
1756 opcode.add(o1.size() != 1);
1757 opcode.add66hBySize(o1.size());
1758 goto EmitX86Op;
1759 }
1760 break;
1761
1762 case InstDB::kEncodingX86Outs:
1763 if (isign3 == ENC_OPS2(Reg, Mem)) {
1764 if (ASMJIT_UNLIKELY(o0.id() != Gp::kIdDx || !x86IsImplicitMem(o1, Gp::kIdSi)))
1765 goto InvalidInstruction;
1766
1767 uint32_t size = o1.size();
1768 if (ASMJIT_UNLIKELY(size == 0))
1769 goto AmbiguousOperandSize;
1770
1771 rmRel = &o1;
1772 opcode.add(size != 1);
1773 opcode.add66hBySize(size);
1774 goto EmitX86OpImplicitMem;
1775 }
1776 break;
1777
1778 case InstDB::kEncodingX86Push:
1779 if (isign3 == ENC_OPS1(Reg)) {
1780 if (Reg::isSReg(o0)) {
1781 uint32_t segment = o0.id();
1782 if (ASMJIT_UNLIKELY(segment >= SReg::kIdCount))
1783 goto InvalidSegment;
1784
1785 opcode = x86OpcodePushSReg[segment];
1786 goto EmitX86Op;
1787 }
1788 else {
1789 goto CaseX86PushPop_Gp;
1790 }
1791 }
1792
1793 if (isign3 == ENC_OPS1(Imm)) {
1794 immValue = o0.as<Imm>().value();
1795 immSize = 4;
1796
1797 if (Support::isInt8(immValue) && !(options & Inst::kOptionLongForm))
1798 immSize = 1;
1799
1800 opcode = immSize == 1 ? 0x6A : 0x68;
1801 goto EmitX86Op;
1802 }
1803 ASMJIT_FALLTHROUGH;
1804
1805 case InstDB::kEncodingX86Pop:
1806 if (isign3 == ENC_OPS1(Reg)) {
1807 if (Reg::isSReg(o0)) {
1808 uint32_t segment = o0.id();
1809 if (ASMJIT_UNLIKELY(segment == SReg::kIdCs || segment >= SReg::kIdCount))
1810 goto InvalidSegment;
1811
1812 opcode = x86OpcodePopSReg[segment];
1813 goto EmitDone;
1814 }
1815 else {
1816 CaseX86PushPop_Gp:
1817 // We allow 2 byte, 4 byte, and 8 byte register sizes, although PUSH
1818 // and POP only allow 2 bytes or native size. On 64-bit we simply
1819 // PUSH/POP 64-bit register even if 32-bit register was given.
1820 if (ASMJIT_UNLIKELY(o0.size() < 2))
1821 goto InvalidInstruction;
1822
1823 opcode = x86AltOpcodeOf(instInfo);
1824 opcode.add66hBySize(o0.size());
1825 opReg = o0.id();
1826 goto EmitX86OpReg;
1827 }
1828 }
1829
1830 if (isign3 == ENC_OPS1(Mem)) {
1831 if (ASMJIT_UNLIKELY(o0.size() == 0))
1832 goto AmbiguousOperandSize;
1833
1834 if (ASMJIT_UNLIKELY(o0.size() != 2 && o0.size() != registerSize()))
1835 goto InvalidInstruction;
1836
1837 opcode.add66hBySize(o0.size());
1838 rmRel = &o0;
1839 goto EmitX86M;
1840 }
1841 break;
1842
1843 case InstDB::kEncodingX86Ret:
1844 if (isign3 == 0) {
1845 // 'ret' without immediate, change C2 to C3.
1846 opcode.add(1);
1847 goto EmitX86Op;
1848 }
1849
1850 if (isign3 == ENC_OPS1(Imm)) {
1851 immValue = o0.as<Imm>().value();
1852 if (immValue == 0 && !(options & Inst::kOptionLongForm)) {
1853 // 'ret' without immediate, change C2 to C3.
1854 opcode.add(1);
1855 goto EmitX86Op;
1856 }
1857 else {
1858 immSize = 2;
1859 goto EmitX86Op;
1860 }
1861 }
1862 break;
1863
1864 case InstDB::kEncodingX86Rot:
1865 if (o0.isReg()) {
1866 opcode.addArithBySize(o0.size());
1867 rbReg = o0.id();
1868
1869 if (o0.size() == 1)
1870 FIXUP_GPB(o0, rbReg);
1871
1872 if (isign3 == ENC_OPS2(Reg, Reg)) {
1873 if (ASMJIT_UNLIKELY(o1.id() != Gp::kIdCx))
1874 goto InvalidInstruction;
1875
1876 opcode += 2;
1877 goto EmitX86R;
1878 }
1879
1880 if (isign3 == ENC_OPS2(Reg, Imm)) {
1881 immValue = o1.as<Imm>().value() & 0xFF;
1882 immSize = 0;
1883
1884 if (immValue == 1 && !(options & Inst::kOptionLongForm))
1885 goto EmitX86R;
1886
1887 opcode -= 0x10;
1888 immSize = 1;
1889 goto EmitX86R;
1890 }
1891 }
1892 else {
1893 opcode.addArithBySize(o0.size());
1894
1895 if (isign3 == ENC_OPS2(Mem, Reg)) {
1896 if (ASMJIT_UNLIKELY(o1.id() != Gp::kIdCx))
1897 goto InvalidInstruction;
1898
1899 opcode += 2;
1900 rmRel = &o0;
1901 goto EmitX86M;
1902 }
1903
1904 if (isign3 == ENC_OPS2(Mem, Imm)) {
1905 if (ASMJIT_UNLIKELY(o0.size() == 0))
1906 goto AmbiguousOperandSize;
1907
1908 rmRel = &o0;
1909 immValue = o1.as<Imm>().value() & 0xFF;
1910 immSize = 0;
1911
1912 if (immValue == 1 && !(options & Inst::kOptionLongForm))
1913 goto EmitX86M;
1914
1915 opcode -= 0x10;
1916 immSize = 1;
1917 goto EmitX86M;
1918 }
1919 }
1920 break;
1921
1922 case InstDB::kEncodingX86Set:
1923 if (isign3 == ENC_OPS1(Reg)) {
1924 rbReg = o0.id();
1925 FIXUP_GPB(o0, rbReg);
1926 goto EmitX86R;
1927 }
1928
1929 if (isign3 == ENC_OPS1(Mem)) {
1930 rmRel = &o0;
1931 goto EmitX86M;
1932 }
1933 break;
1934
1935 case InstDB::kEncodingX86ShldShrd:
1936 if (isign3 == ENC_OPS3(Reg, Reg, Imm)) {
1937 opcode.addPrefixBySize(o0.size());
1938 opReg = o1.id();
1939 rbReg = o0.id();
1940
1941 immValue = o2.as<Imm>().value();
1942 immSize = 1;
1943 goto EmitX86R;
1944 }
1945
1946 if (isign3 == ENC_OPS3(Mem, Reg, Imm)) {
1947 opcode.addPrefixBySize(o1.size());
1948 opReg = o1.id();
1949 rmRel = &o0;
1950
1951 immValue = o2.as<Imm>().value();
1952 immSize = 1;
1953 goto EmitX86M;
1954 }
1955
1956 // The following instructions use opcode + 1.
1957 opcode.add(1);
1958
1959 if (isign3 == ENC_OPS3(Reg, Reg, Reg)) {
1960 if (ASMJIT_UNLIKELY(o2.id() != Gp::kIdCx))
1961 goto InvalidInstruction;
1962
1963 opcode.addPrefixBySize(o0.size());
1964 opReg = o1.id();
1965 rbReg = o0.id();
1966 goto EmitX86R;
1967 }
1968
1969 if (isign3 == ENC_OPS3(Mem, Reg, Reg)) {
1970 if (ASMJIT_UNLIKELY(o2.id() != Gp::kIdCx))
1971 goto InvalidInstruction;
1972
1973 opcode.addPrefixBySize(o1.size());
1974 opReg = o1.id();
1975 rmRel = &o0;
1976 goto EmitX86M;
1977 }
1978 break;
1979
1980 case InstDB::kEncodingX86StrRm:
1981 if (isign3 == ENC_OPS2(Reg, Mem)) {
1982 rmRel = &o1;
1983 if (ASMJIT_UNLIKELY(rmRel->as<Mem>().offsetLo32() || !Reg::isGp(o0.as<Reg>(), Gp::kIdAx)))
1984 goto InvalidInstruction;
1985
1986 uint32_t size = o0.size();
1987 if (o1.hasSize() && ASMJIT_UNLIKELY(o1.size() != size))
1988 goto OperandSizeMismatch;
1989
1990 opcode.addArithBySize(size);
1991 goto EmitX86OpImplicitMem;
1992 }
1993 break;
1994
1995 case InstDB::kEncodingX86StrMr:
1996 if (isign3 == ENC_OPS2(Mem, Reg)) {
1997 rmRel = &o0;
1998 if (ASMJIT_UNLIKELY(rmRel->as<Mem>().offsetLo32() || !Reg::isGp(o1.as<Reg>(), Gp::kIdAx)))
1999 goto InvalidInstruction;
2000
2001 uint32_t size = o1.size();
2002 if (o0.hasSize() && ASMJIT_UNLIKELY(o0.size() != size))
2003 goto OperandSizeMismatch;
2004
2005 opcode.addArithBySize(size);
2006 goto EmitX86OpImplicitMem;
2007 }
2008 break;
2009
2010 case InstDB::kEncodingX86StrMm:
2011 if (isign3 == ENC_OPS2(Mem, Mem)) {
2012 if (ASMJIT_UNLIKELY(o0.as<Mem>().baseAndIndexTypes() !=
2013 o1.as<Mem>().baseAndIndexTypes()))
2014 goto InvalidInstruction;
2015
2016 rmRel = &o1;
2017 if (ASMJIT_UNLIKELY(o0.as<Mem>().hasOffset()))
2018 goto InvalidInstruction;
2019
2020 uint32_t size = o1.size();
2021 if (ASMJIT_UNLIKELY(size == 0))
2022 goto AmbiguousOperandSize;
2023
2024 if (ASMJIT_UNLIKELY(o0.size() != size))
2025 goto OperandSizeMismatch;
2026
2027 opcode.addArithBySize(size);
2028 goto EmitX86OpImplicitMem;
2029 }
2030 break;
2031
2032 case InstDB::kEncodingX86Test:
2033 if (isign3 == ENC_OPS2(Reg, Reg)) {
2034 if (o0.size() != o1.size())
2035 goto OperandSizeMismatch;
2036
2037 opcode.addArithBySize(o0.size());
2038 rbReg = o0.id();
2039 opReg = o1.id();
2040
2041 if (o0.size() != 1)
2042 goto EmitX86R;
2043
2044 FIXUP_GPB(o0, rbReg);
2045 FIXUP_GPB(o1, opReg);
2046 goto EmitX86R;
2047 }
2048
2049 if (isign3 == ENC_OPS2(Mem, Reg)) {
2050 opcode.addArithBySize(o1.size());
2051 opReg = o1.id();
2052 rmRel = &o0;
2053
2054 if (o1.size() != 1)
2055 goto EmitX86M;
2056
2057 FIXUP_GPB(o1, opReg);
2058 goto EmitX86M;
2059 }
2060
2061 // The following instructions use the secondary opcode.
2062 opcode = x86AltOpcodeOf(instInfo);
2063 opReg = opcode.extractO();
2064
2065 if (isign3 == ENC_OPS2(Reg, Imm)) {
2066 opcode.addArithBySize(o0.size());
2067 rbReg = o0.id();
2068
2069 if (o0.size() == 1) {
2070 FIXUP_GPB(o0, rbReg);
2071 immValue = o1.as<Imm>().valueAs<uint8_t>();
2072 immSize = 1;
2073 }
2074 else {
2075 immValue = o1.as<Imm>().value();
2076 immSize = FastUInt8(Support::min<uint32_t>(o0.size(), 4));
2077 }
2078
2079 // Short form - AL, AX, EAX, RAX.
2080 if (rbReg == 0 && !(options & Inst::kOptionLongForm)) {
2081 opcode &= Opcode::kPP_66 | Opcode::kW;
2082 opcode |= 0xA8 + (o0.size() != 1);
2083 goto EmitX86Op;
2084 }
2085
2086 goto EmitX86R;
2087 }
2088
2089 if (isign3 == ENC_OPS2(Mem, Imm)) {
2090 if (ASMJIT_UNLIKELY(o0.size() == 0))
2091 goto AmbiguousOperandSize;
2092
2093 opcode.addArithBySize(o0.size());
2094 rmRel = &o0;
2095
2096 immValue = o1.as<Imm>().value();
2097 immSize = FastUInt8(Support::min<uint32_t>(o0.size(), 4));
2098 goto EmitX86M;
2099 }
2100 break;
2101
2102 case InstDB::kEncodingX86Xchg:
2103 if (isign3 == ENC_OPS2(Reg, Mem)) {
2104 opcode.addArithBySize(o0.size());
2105 opReg = o0.id();
2106 rmRel = &o1;
2107
2108 if (o0.size() != 1)
2109 goto EmitX86M;
2110
2111 FIXUP_GPB(o0, opReg);
2112 goto EmitX86M;
2113 }
2114 ASMJIT_FALLTHROUGH;
2115
2116 case InstDB::kEncodingX86Xadd:
2117 if (isign3 == ENC_OPS2(Reg, Reg)) {
2118 opcode.addArithBySize(o0.size());
2119 rbReg = o0.id();
2120 opReg = o1.id();
2121
2122 if (o0.size() != o1.size())
2123 goto OperandSizeMismatch;
2124
2125 if (o0.size() == 1) {
2126 FIXUP_GPB(o0, rbReg);
2127 FIXUP_GPB(o1, opReg);
2128 goto EmitX86R;
2129 }
2130
2131 // Special opcode for 'xchg ?ax, reg'.
2132 if (instId == Inst::kIdXchg && (opReg == 0 || rbReg == 0)) {
2133 opcode &= Opcode::kPP_66 | Opcode::kW;
2134 opcode |= 0x90;
2135 // One of `xchg a, b` or `xchg b, a` is AX/EAX/RAX.
2136 opReg += rbReg;
2137 goto EmitX86OpReg;
2138 }
2139 else {
2140 goto EmitX86R;
2141 }
2142 }
2143
2144 if (isign3 == ENC_OPS2(Mem, Reg)) {
2145 opcode.addArithBySize(o1.size());
2146 opReg = o1.id();
2147 rmRel = &o0;
2148
2149 if (o1.size() == 1) {
2150 FIXUP_GPB(o1, opReg);
2151 }
2152
2153 goto EmitX86M;
2154 }
2155 break;
2156
2157 case InstDB::kEncodingX86Fence:
2158 rbReg = 0;
2159 goto EmitX86R;
2160
2161 case InstDB::kEncodingX86Bndmov:
2162 if (isign3 == ENC_OPS2(Reg, Reg)) {
2163 opReg = o0.id();
2164 rbReg = o1.id();
2165
2166 // ModRM encoding:
2167 if (!(options & Inst::kOptionModMR))
2168 goto EmitX86R;
2169
2170 // ModMR encoding:
2171 opcode = x86AltOpcodeOf(instInfo);
2172 std::swap(opReg, rbReg);
2173 goto EmitX86R;
2174 }
2175
2176 if (isign3 == ENC_OPS2(Reg, Mem)) {
2177 opReg = o0.id();
2178 rmRel = &o1;
2179 goto EmitX86M;
2180 }
2181
2182 if (isign3 == ENC_OPS2(Mem, Reg)) {
2183 opcode = x86AltOpcodeOf(instInfo);
2184
2185 rmRel = &o0;
2186 opReg = o1.id();
2187 goto EmitX86M;
2188 }
2189 break;
2190
2191 // ------------------------------------------------------------------------
2192 // [FPU]
2193 // ------------------------------------------------------------------------
2194
2195 case InstDB::kEncodingFpuOp:
2196 goto EmitFpuOp;
2197
2198 case InstDB::kEncodingFpuArith:
2199 if (isign3 == ENC_OPS2(Reg, Reg)) {
2200 opReg = o0.id();
2201 rbReg = o1.id();
2202
2203 // We switch to the alternative opcode if the first operand is zero.
2204 if (opReg == 0) {
2205 CaseFpuArith_Reg:
2206 opcode = ((0xD8 << Opcode::kFPU_2B_Shift) ) +
2207 ((opcode >> Opcode::kFPU_2B_Shift) & 0xFF) + rbReg;
2208 goto EmitFpuOp;
2209 }
2210 else if (rbReg == 0) {
2211 rbReg = opReg;
2212 opcode = ((0xDC << Opcode::kFPU_2B_Shift) ) +
2213 ((opcode ) & 0xFF) + rbReg;
2214 goto EmitFpuOp;
2215 }
2216 else {
2217 goto InvalidInstruction;
2218 }
2219 }
2220
2221 if (isign3 == ENC_OPS1(Mem)) {
2222 CaseFpuArith_Mem:
2223 // 0xD8/0xDC, depends on the size of the memory operand; opReg is valid.
2224 opcode = (o0.size() == 4) ? 0xD8 : 0xDC;
2225 // Clear compressed displacement before going to EmitX86M.
2226 opcode &= ~uint32_t(Opcode::kCDSHL_Mask);
2227
2228 rmRel = &o0;
2229 goto EmitX86M;
2230 }
2231 break;
2232
2233 case InstDB::kEncodingFpuCom:
2234 if (isign3 == 0) {
2235 rbReg = 1;
2236 goto CaseFpuArith_Reg;
2237 }
2238
2239 if (isign3 == ENC_OPS1(Reg)) {
2240 rbReg = o0.id();
2241 goto CaseFpuArith_Reg;
2242 }
2243
2244 if (isign3 == ENC_OPS1(Mem)) {
2245 goto CaseFpuArith_Mem;
2246 }
2247 break;
2248
2249 case InstDB::kEncodingFpuFldFst:
2250 if (isign3 == ENC_OPS1(Mem)) {
2251 rmRel = &o0;
2252
2253 if (o0.size() == 4 && commonInfo->hasFlag(InstDB::kFlagFpuM32)) {
2254 goto EmitX86M;
2255 }
2256
2257 if (o0.size() == 8 && commonInfo->hasFlag(InstDB::kFlagFpuM64)) {
2258 opcode += 4;
2259 goto EmitX86M;
2260 }
2261
2262 if (o0.size() == 10 && commonInfo->hasFlag(InstDB::kFlagFpuM80)) {
2263 opcode = x86AltOpcodeOf(instInfo);
2264 opReg = opcode.extractO();
2265 goto EmitX86M;
2266 }
2267 }
2268
2269 if (isign3 == ENC_OPS1(Reg)) {
2270 if (instId == Inst::kIdFld ) { opcode = (0xD9 << Opcode::kFPU_2B_Shift) + 0xC0 + o0.id(); goto EmitFpuOp; }
2271 if (instId == Inst::kIdFst ) { opcode = (0xDD << Opcode::kFPU_2B_Shift) + 0xD0 + o0.id(); goto EmitFpuOp; }
2272 if (instId == Inst::kIdFstp) { opcode = (0xDD << Opcode::kFPU_2B_Shift) + 0xD8 + o0.id(); goto EmitFpuOp; }
2273 }
2274 break;
2275
2276 case InstDB::kEncodingFpuM:
2277 if (isign3 == ENC_OPS1(Mem)) {
2278 // Clear compressed displacement before going to EmitX86M.
2279 opcode &= ~uint32_t(Opcode::kCDSHL_Mask);
2280
2281 rmRel = &o0;
2282 if (o0.size() == 2 && commonInfo->hasFlag(InstDB::kFlagFpuM16)) {
2283 opcode += 4;
2284 goto EmitX86M;
2285 }
2286
2287 if (o0.size() == 4 && commonInfo->hasFlag(InstDB::kFlagFpuM32)) {
2288 goto EmitX86M;
2289 }
2290
2291 if (o0.size() == 8 && commonInfo->hasFlag(InstDB::kFlagFpuM64)) {
2292 opcode = x86AltOpcodeOf(instInfo) & ~uint32_t(Opcode::kCDSHL_Mask);
2293 opReg = opcode.extractO();
2294 goto EmitX86M;
2295 }
2296 }
2297 break;
2298
2299 case InstDB::kEncodingFpuRDef:
2300 if (isign3 == 0) {
2301 opcode += 1;
2302 goto EmitFpuOp;
2303 }
2304 ASMJIT_FALLTHROUGH;
2305
2306 case InstDB::kEncodingFpuR:
2307 if (isign3 == ENC_OPS1(Reg)) {
2308 opcode += o0.id();
2309 goto EmitFpuOp;
2310 }
2311 break;
2312
2313 case InstDB::kEncodingFpuStsw:
2314 if (isign3 == ENC_OPS1(Reg)) {
2315 if (ASMJIT_UNLIKELY(o0.id() != Gp::kIdAx))
2316 goto InvalidInstruction;
2317
2318 opcode = x86AltOpcodeOf(instInfo);
2319 goto EmitFpuOp;
2320 }
2321
2322 if (isign3 == ENC_OPS1(Mem)) {
2323 // Clear compressed displacement before going to EmitX86M.
2324 opcode &= ~uint32_t(Opcode::kCDSHL_Mask);
2325
2326 rmRel = &o0;
2327 goto EmitX86M;
2328 }
2329 break;
2330
2331 // ------------------------------------------------------------------------
2332 // [Ext]
2333 // ------------------------------------------------------------------------
2334
2335 case InstDB::kEncodingExtPextrw:
2336 if (isign3 == ENC_OPS3(Reg, Reg, Imm)) {
2337 opcode.add66hIf(Reg::isXmm(o1));
2338
2339 immValue = o2.as<Imm>().value();
2340 immSize = 1;
2341
2342 opReg = o0.id();
2343 rbReg = o1.id();
2344 goto EmitX86R;
2345 }
2346
2347 if (isign3 == ENC_OPS3(Mem, Reg, Imm)) {
2348 // Secondary opcode of 'pextrw' instruction (SSE4.1).
2349 opcode = x86AltOpcodeOf(instInfo);
2350 opcode.add66hIf(Reg::isXmm(o1));
2351
2352 immValue = o2.as<Imm>().value();
2353 immSize = 1;
2354
2355 opReg = o1.id();
2356 rmRel = &o0;
2357 goto EmitX86M;
2358 }
2359 break;
2360
2361 case InstDB::kEncodingExtExtract:
2362 if (isign3 == ENC_OPS3(Reg, Reg, Imm)) {
2363 opcode.add66hIf(Reg::isXmm(o1));
2364
2365 immValue = o2.as<Imm>().value();
2366 immSize = 1;
2367
2368 opReg = o1.id();
2369 rbReg = o0.id();
2370 goto EmitX86R;
2371 }
2372
2373 if (isign3 == ENC_OPS3(Mem, Reg, Imm)) {
2374 opcode.add66hIf(Reg::isXmm(o1));
2375
2376 immValue = o2.as<Imm>().value();
2377 immSize = 1;
2378
2379 opReg = o1.id();
2380 rmRel = &o0;
2381 goto EmitX86M;
2382 }
2383 break;
2384
2385 case InstDB::kEncodingExtMov:
2386 // GP|MM|XMM <- GP|MM|XMM
2387 if (isign3 == ENC_OPS2(Reg, Reg)) {
2388 opReg = o0.id();
2389 rbReg = o1.id();
2390
2391 if (!(options & Inst::kOptionModMR) || !instInfo->_altOpcodeIndex)
2392 goto EmitX86R;
2393
2394 opcode = x86AltOpcodeOf(instInfo);
2395 std::swap(opReg, rbReg);
2396 goto EmitX86R;
2397 }
2398
2399 // GP|MM|XMM <- Mem
2400 if (isign3 == ENC_OPS2(Reg, Mem)) {
2401 opReg = o0.id();
2402 rmRel = &o1;
2403 goto EmitX86M;
2404 }
2405
2406 // The following instruction uses opcode[1].
2407 opcode = x86AltOpcodeOf(instInfo);
2408
2409 // Mem <- GP|MM|XMM
2410 if (isign3 == ENC_OPS2(Mem, Reg)) {
2411 opReg = o1.id();
2412 rmRel = &o0;
2413 goto EmitX86M;
2414 }
2415 break;
2416
2417 case InstDB::kEncodingExtMovbe:
2418 if (isign3 == ENC_OPS2(Reg, Mem)) {
2419 if (o0.size() == 1)
2420 goto InvalidInstruction;
2421
2422 opcode.addPrefixBySize(o0.size());
2423 opReg = o0.id();
2424 rmRel = &o1;
2425 goto EmitX86M;
2426 }
2427
2428 // The following instruction uses the secondary opcode.
2429 opcode = x86AltOpcodeOf(instInfo);
2430
2431 if (isign3 == ENC_OPS2(Mem, Reg)) {
2432 if (o1.size() == 1)
2433 goto InvalidInstruction;
2434
2435 opcode.addPrefixBySize(o1.size());
2436 opReg = o1.id();
2437 rmRel = &o0;
2438 goto EmitX86M;
2439 }
2440 break;
2441
2442 case InstDB::kEncodingExtMovd:
2443 CaseExtMovd:
2444 opReg = o0.id();
2445 opcode.add66hIf(Reg::isXmm(o0));
2446
2447 // MM/XMM <- Gp
2448 if (isign3 == ENC_OPS2(Reg, Reg) && Reg::isGp(o1)) {
2449 rbReg = o1.id();
2450 goto EmitX86R;
2451 }
2452
2453 // MM/XMM <- Mem
2454 if (isign3 == ENC_OPS2(Reg, Mem)) {
2455 rmRel = &o1;
2456 goto EmitX86M;
2457 }
2458
2459 // The following instructions use the secondary opcode.
2460 opcode &= Opcode::kW;
2461 opcode |= x86AltOpcodeOf(instInfo);
2462 opReg = o1.id();
2463 opcode.add66hIf(Reg::isXmm(o1));
2464
2465 // GP <- MM/XMM
2466 if (isign3 == ENC_OPS2(Reg, Reg) && Reg::isGp(o0)) {
2467 rbReg = o0.id();
2468 goto EmitX86R;
2469 }
2470
2471 // Mem <- MM/XMM
2472 if (isign3 == ENC_OPS2(Mem, Reg)) {
2473 rmRel = &o0;
2474 goto EmitX86M;
2475 }
2476 break;
2477
2478 case InstDB::kEncodingExtMovq:
2479 if (isign3 == ENC_OPS2(Reg, Reg)) {
2480 opReg = o0.id();
2481 rbReg = o1.id();
2482
2483 // MM <- MM
2484 if (Reg::isMm(o0) && Reg::isMm(o1)) {
2485 opcode = Opcode::k000F00 | 0x6F;
2486
2487 if (!(options & Inst::kOptionModMR))
2488 goto EmitX86R;
2489
2490 opcode += 0x10;
2491 std::swap(opReg, rbReg);
2492 goto EmitX86R;
2493 }
2494
2495 // XMM <- XMM
2496 if (Reg::isXmm(o0) && Reg::isXmm(o1)) {
2497 opcode = Opcode::kF30F00 | 0x7E;
2498
2499 if (!(options & Inst::kOptionModMR))
2500 goto EmitX86R;
2501
2502 opcode = Opcode::k660F00 | 0xD6;
2503 std::swap(opReg, rbReg);
2504 goto EmitX86R;
2505 }
2506 }
2507
2508 if (isign3 == ENC_OPS2(Reg, Mem)) {
2509 opReg = o0.id();
2510 rmRel = &o1;
2511
2512 // MM <- Mem
2513 if (Reg::isMm(o0)) {
2514 opcode = Opcode::k000F00 | 0x6F;
2515 goto EmitX86M;
2516 }
2517
2518 // XMM <- Mem
2519 if (Reg::isXmm(o0)) {
2520 opcode = Opcode::kF30F00 | 0x7E;
2521 goto EmitX86M;
2522 }
2523 }
2524
2525 if (isign3 == ENC_OPS2(Mem, Reg)) {
2526 opReg = o1.id();
2527 rmRel = &o0;
2528
2529 // Mem <- MM
2530 if (Reg::isMm(o1)) {
2531 opcode = Opcode::k000F00 | 0x7F;
2532 goto EmitX86M;
2533 }
2534
2535 // Mem <- XMM
2536 if (Reg::isXmm(o1)) {
2537 opcode = Opcode::k660F00 | 0xD6;
2538 goto EmitX86M;
2539 }
2540 }
2541
2542 // MOVQ in other case is simply a MOVD instruction promoted to 64-bit.
2543 opcode |= Opcode::kW;
2544 goto CaseExtMovd;
2545
2546 case InstDB::kEncodingExtRm_XMM0:
2547 if (ASMJIT_UNLIKELY(!o2.isNone() && !Reg::isXmm(o2, 0)))
2548 goto InvalidInstruction;
2549
2550 isign3 &= 0x3F;
2551 goto CaseExtRm;
2552
2553 case InstDB::kEncodingExtRm_ZDI:
2554 if (ASMJIT_UNLIKELY(!o2.isNone() && !x86IsImplicitMem(o2, Gp::kIdDi)))
2555 goto InvalidInstruction;
2556
2557 isign3 &= 0x3F;
2558 goto CaseExtRm;
2559
2560 case InstDB::kEncodingExtRm_Wx:
2561 opcode.addWIf(Reg::isGpq(o0) || o1.size() == 8);
2562 ASMJIT_FALLTHROUGH;
2563
2564 case InstDB::kEncodingExtRm:
2565 CaseExtRm:
2566 if (isign3 == ENC_OPS2(Reg, Reg)) {
2567 opReg = o0.id();
2568 rbReg = o1.id();
2569 goto EmitX86R;
2570 }
2571
2572 if (isign3 == ENC_OPS2(Reg, Mem)) {
2573 opReg = o0.id();
2574 rmRel = &o1;
2575 goto EmitX86M;
2576 }
2577 break;
2578
2579 case InstDB::kEncodingExtRm_P:
2580 if (isign3 == ENC_OPS2(Reg, Reg)) {
2581 opcode.add66hIf(Reg::isXmm(o0) | Reg::isXmm(o1));
2582
2583 opReg = o0.id();
2584 rbReg = o1.id();
2585 goto EmitX86R;
2586 }
2587
2588 if (isign3 == ENC_OPS2(Reg, Mem)) {
2589 opcode.add66hIf(Reg::isXmm(o0));
2590
2591 opReg = o0.id();
2592 rmRel = &o1;
2593 goto EmitX86M;
2594 }
2595 break;
2596
2597 case InstDB::kEncodingExtRmRi:
2598 if (isign3 == ENC_OPS2(Reg, Reg)) {
2599 opReg = o0.id();
2600 rbReg = o1.id();
2601 goto EmitX86R;
2602 }
2603
2604 if (isign3 == ENC_OPS2(Reg, Mem)) {
2605 opReg = o0.id();
2606 rmRel = &o1;
2607 goto EmitX86M;
2608 }
2609
2610 // The following instruction uses the secondary opcode.
2611 opcode = x86AltOpcodeOf(instInfo);
2612 opReg = opcode.extractO();
2613
2614 if (isign3 == ENC_OPS2(Reg, Imm)) {
2615 immValue = o1.as<Imm>().value();
2616 immSize = 1;
2617
2618 rbReg = o0.id();
2619 goto EmitX86R;
2620 }
2621 break;
2622
2623 case InstDB::kEncodingExtRmRi_P:
2624 if (isign3 == ENC_OPS2(Reg, Reg)) {
2625 opcode.add66hIf(Reg::isXmm(o0) | Reg::isXmm(o1));
2626
2627 opReg = o0.id();
2628 rbReg = o1.id();
2629 goto EmitX86R;
2630 }
2631
2632 if (isign3 == ENC_OPS2(Reg, Mem)) {
2633 opcode.add66hIf(Reg::isXmm(o0));
2634
2635 opReg = o0.id();
2636 rmRel = &o1;
2637 goto EmitX86M;
2638 }
2639
2640 // The following instruction uses the secondary opcode.
2641 opcode = x86AltOpcodeOf(instInfo);
2642 opReg = opcode.extractO();
2643
2644 if (isign3 == ENC_OPS2(Reg, Imm)) {
2645 opcode.add66hIf(Reg::isXmm(o0));
2646
2647 immValue = o1.as<Imm>().value();
2648 immSize = 1;
2649
2650 rbReg = o0.id();
2651 goto EmitX86R;
2652 }
2653 break;
2654
2655 case InstDB::kEncodingExtRmi:
2656 immValue = o2.as<Imm>().value();
2657 immSize = 1;
2658
2659 if (isign3 == ENC_OPS3(Reg, Reg, Imm)) {
2660 opReg = o0.id();
2661 rbReg = o1.id();
2662 goto EmitX86R;
2663 }
2664
2665 if (isign3 == ENC_OPS3(Reg, Mem, Imm)) {
2666 opReg = o0.id();
2667 rmRel = &o1;
2668 goto EmitX86M;
2669 }
2670 break;
2671
2672 case InstDB::kEncodingExtRmi_P:
2673 immValue = o2.as<Imm>().value();
2674 immSize = 1;
2675
2676 if (isign3 == ENC_OPS3(Reg, Reg, Imm)) {
2677 opcode.add66hIf(Reg::isXmm(o0) | Reg::isXmm(o1));
2678
2679 opReg = o0.id();
2680 rbReg = o1.id();
2681 goto EmitX86R;
2682 }
2683
2684 if (isign3 == ENC_OPS3(Reg, Mem, Imm)) {
2685 opcode.add66hIf(Reg::isXmm(o0));
2686
2687 opReg = o0.id();
2688 rmRel = &o1;
2689 goto EmitX86M;
2690 }
2691 break;
2692
2693 // ------------------------------------------------------------------------
2694 // [Extrq / Insertq (SSE4A)]
2695 // ------------------------------------------------------------------------
2696
2697 case InstDB::kEncodingExtExtrq:
2698 opReg = o0.id();
2699 rbReg = o1.id();
2700
2701 if (isign3 == ENC_OPS2(Reg, Reg))
2702 goto EmitX86R;
2703
2704 // The following instruction uses the secondary opcode.
2705 opcode = x86AltOpcodeOf(instInfo);
2706
2707 if (isign3 == ENC_OPS3(Reg, Imm, Imm)) {
2708 immValue = (uint32_t(o1.as<Imm>().valueAs<uint8_t>()) ) +
2709 (uint32_t(o2.as<Imm>().valueAs<uint8_t>()) << 8) ;
2710 immSize = 2;
2711
2712 rbReg = opcode.extractO();
2713 goto EmitX86R;
2714 }
2715 break;
2716
2717 case InstDB::kEncodingExtInsertq: {
2718 const Operand_& o3 = opExt[EmitterUtils::kOp3];
2719 const uint32_t isign4 = isign3 + (o3.opType() << 9);
2720
2721 opReg = o0.id();
2722 rbReg = o1.id();
2723
2724 if (isign4 == ENC_OPS2(Reg, Reg))
2725 goto EmitX86R;
2726
2727 // The following instruction uses the secondary opcode.
2728 opcode = x86AltOpcodeOf(instInfo);
2729
2730 if (isign4 == ENC_OPS4(Reg, Reg, Imm, Imm)) {
2731 immValue = (uint32_t(o2.as<Imm>().valueAs<uint8_t>()) ) +
2732 (uint32_t(o3.as<Imm>().valueAs<uint8_t>()) << 8) ;
2733 immSize = 2;
2734 goto EmitX86R;
2735 }
2736 break;
2737 }
2738
2739 // ------------------------------------------------------------------------
2740 // [3dNow]
2741 // ------------------------------------------------------------------------
2742
2743 case InstDB::kEncodingExt3dNow:
2744 // Every 3dNow instruction starts with 0x0F0F and the actual opcode is
2745 // stored as 8-bit immediate.
2746 immValue = opcode.v & 0xFFu;
2747 immSize = 1;
2748
2749 opcode = Opcode::k000F00 | 0x0F;
2750 opReg = o0.id();
2751
2752 if (isign3 == ENC_OPS2(Reg, Reg)) {
2753 rbReg = o1.id();
2754 goto EmitX86R;
2755 }
2756
2757 if (isign3 == ENC_OPS2(Reg, Mem)) {
2758 rmRel = &o1;
2759 goto EmitX86M;
2760 }
2761 break;
2762
2763 // ------------------------------------------------------------------------
2764 // [VEX/EVEX]
2765 // ------------------------------------------------------------------------
2766
2767 case InstDB::kEncodingVexOp:
2768 goto EmitVexEvexOp;
2769
2770 case InstDB::kEncodingVexKmov:
2771 if (isign3 == ENC_OPS2(Reg, Reg)) {
2772 opReg = o0.id();
2773 rbReg = o1.id();
2774
2775 // Form 'k, reg'.
2776 if (Reg::isGp(o1)) {
2777 opcode = x86AltOpcodeOf(instInfo);
2778 goto EmitVexEvexR;
2779 }
2780
2781 // Form 'reg, k'.
2782 if (Reg::isGp(o0)) {
2783 opcode = x86AltOpcodeOf(instInfo) + 1;
2784 goto EmitVexEvexR;
2785 }
2786
2787 // Form 'k, k'.
2788 if (!(options & Inst::kOptionModMR))
2789 goto EmitVexEvexR;
2790
2791 opcode.add(1);
2792 std::swap(opReg, rbReg);
2793 goto EmitVexEvexR;
2794 }
2795
2796 if (isign3 == ENC_OPS2(Reg, Mem)) {
2797 opReg = o0.id();
2798 rmRel = &o1;
2799
2800 goto EmitVexEvexM;
2801 }
2802
2803 if (isign3 == ENC_OPS2(Mem, Reg)) {
2804 opcode.add(1);
2805 opReg = o1.id();
2806 rmRel = &o0;
2807 goto EmitVexEvexM;
2808 }
2809 break;
2810
2811 case InstDB::kEncodingVexR_Wx:
2812 if (isign3 == ENC_OPS1(Reg)) {
2813 rbReg = o0.id();
2814 opcode.addWIf(o0.as<Reg>().isGpq());
2815 goto EmitVexEvexR;
2816 }
2817 break;
2818
2819 case InstDB::kEncodingVexM:
2820 if (isign3 == ENC_OPS1(Mem)) {
2821 rmRel = &o0;
2822 goto EmitVexEvexM;
2823 }
2824 break;
2825
2826 case InstDB::kEncodingVexM_VM:
2827 if (isign3 == ENC_OPS1(Mem)) {
2828 opcode |= x86OpcodeLByVMem(o0);
2829 rmRel = &o0;
2830 goto EmitVexEvexM;
2831 }
2832 break;
2833
2834 case InstDB::kEncodingVexMr_Lx:
2835 opcode |= x86OpcodeLBySize(o0.size() | o1.size());
2836
2837 if (isign3 == ENC_OPS2(Reg, Reg)) {
2838 opReg = o1.id();
2839 rbReg = o0.id();
2840 goto EmitVexEvexR;
2841 }
2842
2843 if (isign3 == ENC_OPS2(Mem, Reg)) {
2844 opReg = o1.id();
2845 rmRel = &o0;
2846 goto EmitVexEvexM;
2847 }
2848 break;
2849
2850 case InstDB::kEncodingVexMr_VM:
2851 if (isign3 == ENC_OPS2(Mem, Reg)) {
2852 opcode |= Support::max(x86OpcodeLByVMem(o0), x86OpcodeLBySize(o1.size()));
2853
2854 opReg = o1.id();
2855 rmRel = &o0;
2856 goto EmitVexEvexM;
2857 }
2858 break;
2859
2860 case InstDB::kEncodingVexMri_Lx:
2861 opcode |= x86OpcodeLBySize(o0.size() | o1.size());
2862 ASMJIT_FALLTHROUGH;
2863
2864 case InstDB::kEncodingVexMri:
2865 immValue = o2.as<Imm>().value();
2866 immSize = 1;
2867
2868 if (isign3 == ENC_OPS3(Reg, Reg, Imm)) {
2869 opReg = o1.id();
2870 rbReg = o0.id();
2871 goto EmitVexEvexR;
2872 }
2873
2874 if (isign3 == ENC_OPS3(Mem, Reg, Imm)) {
2875 opReg = o1.id();
2876 rmRel = &o0;
2877 goto EmitVexEvexM;
2878 }
2879 break;
2880
2881 case InstDB::kEncodingVexRm_ZDI:
2882 if (ASMJIT_UNLIKELY(!o2.isNone() && !x86IsImplicitMem(o2, Gp::kIdDi)))
2883 goto InvalidInstruction;
2884
2885 isign3 &= 0x3F;
2886 goto CaseVexRm;
2887
2888 case InstDB::kEncodingVexRm_Wx:
2889 opcode.addWIf(Reg::isGpq(o0) | Reg::isGpq(o1));
2890 goto CaseVexRm;
2891
2892 case InstDB::kEncodingVexRm_Lx_Bcst:
2893 if (isign3 == ENC_OPS2(Reg, Reg) && Reg::isGp(o1.as<Reg>())) {
2894 opcode = x86AltOpcodeOf(instInfo) | x86OpcodeLBySize(o0.size() | o1.size());
2895 opReg = o0.id();
2896 rbReg = o1.id();
2897 goto EmitVexEvexR;
2898 }
2899 ASMJIT_FALLTHROUGH;
2900
2901 case InstDB::kEncodingVexRm_Lx:
2902 opcode |= x86OpcodeLBySize(o0.size() | o1.size());
2903 ASMJIT_FALLTHROUGH;
2904
2905 case InstDB::kEncodingVexRm:
2906 CaseVexRm:
2907 if (isign3 == ENC_OPS2(Reg, Reg)) {
2908 opReg = o0.id();
2909 rbReg = o1.id();
2910 goto EmitVexEvexR;
2911 }
2912
2913 if (isign3 == ENC_OPS2(Reg, Mem)) {
2914 opReg = o0.id();
2915 rmRel = &o1;
2916 goto EmitVexEvexM;
2917 }
2918 break;
2919
2920 case InstDB::kEncodingVexRm_VM:
2921 if (isign3 == ENC_OPS2(Reg, Mem)) {
2922 opcode |= Support::max(x86OpcodeLByVMem(o1), x86OpcodeLBySize(o0.size()));
2923 opReg = o0.id();
2924 rmRel = &o1;
2925 goto EmitVexEvexM;
2926 }
2927 break;
2928
2929 case InstDB::kEncodingVexRm_T1_4X: {
2930 const Operand_& o3 = opExt[EmitterUtils::kOp3];
2931 const Operand_& o4 = opExt[EmitterUtils::kOp4];
2932 const Operand_& o5 = opExt[EmitterUtils::kOp5];
2933
2934 if (Reg::isZmm(o0) && Reg::isZmm(o1) && Reg::isZmm(o2) && Reg::isZmm(o3) && Reg::isZmm(o4) && o5.isMem()) {
2935 // Registers [o1, o2, o3, o4] must start aligned and must be consecutive.
2936 uint32_t i1 = o1.id();
2937 uint32_t i2 = o2.id();
2938 uint32_t i3 = o3.id();
2939 uint32_t i4 = o4.id();
2940
2941 if (ASMJIT_UNLIKELY((i1 & 0x3) != 0 || i2 != i1 + 1 || i3 != i1 + 2 || i4 != i1 + 3))
2942 goto NotConsecutiveRegs;
2943
2944 opReg = o0.id();
2945 rmRel = &o5;
2946 goto EmitVexEvexM;
2947 }
2948 break;
2949 }
2950
2951 case InstDB::kEncodingVexRmi_Wx:
2952 opcode.addWIf(Reg::isGpq(o0) | Reg::isGpq(o1));
2953 goto CaseVexRmi;
2954
2955 case InstDB::kEncodingVexRmi_Lx:
2956 opcode |= x86OpcodeLBySize(o0.size() | o1.size());
2957 ASMJIT_FALLTHROUGH;
2958
2959 case InstDB::kEncodingVexRmi:
2960 CaseVexRmi:
2961 immValue = o2.as<Imm>().value();
2962 immSize = 1;
2963
2964 if (isign3 == ENC_OPS3(Reg, Reg, Imm)) {
2965 opReg = o0.id();
2966 rbReg = o1.id();
2967 goto EmitVexEvexR;
2968 }
2969
2970 if (isign3 == ENC_OPS3(Reg, Mem, Imm)) {
2971 opReg = o0.id();
2972 rmRel = &o1;
2973 goto EmitVexEvexM;
2974 }
2975 break;
2976
2977 case InstDB::kEncodingVexRvm:
2978 CaseVexRvm:
2979 if (isign3 == ENC_OPS3(Reg, Reg, Reg)) {
2980 CaseVexRvm_R:
2981 opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
2982 rbReg = o2.id();
2983 goto EmitVexEvexR;
2984 }
2985
2986 if (isign3 == ENC_OPS3(Reg, Reg, Mem)) {
2987 opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
2988 rmRel = &o2;
2989 goto EmitVexEvexM;
2990 }
2991 break;
2992
2993 case InstDB::kEncodingVexRvm_ZDX_Wx: {
2994 const Operand_& o3 = opExt[EmitterUtils::kOp3];
2995 if (ASMJIT_UNLIKELY(!o3.isNone() && !Reg::isGp(o3, Gp::kIdDx)))
2996 goto InvalidInstruction;
2997 ASMJIT_FALLTHROUGH;
2998 }
2999
3000 case InstDB::kEncodingVexRvm_Wx: {
3001 opcode.addWIf(Reg::isGpq(o0) | (o2.size() == 8));
3002 goto CaseVexRvm;
3003 }
3004
3005 case InstDB::kEncodingVexRvm_Lx: {
3006 opcode |= x86OpcodeLBySize(o0.size() | o1.size());
3007 goto CaseVexRvm;
3008 }
3009
3010 case InstDB::kEncodingVexRvmr_Lx: {
3011 opcode |= x86OpcodeLBySize(o0.size() | o1.size());
3012 ASMJIT_FALLTHROUGH;
3013 }
3014
3015 case InstDB::kEncodingVexRvmr: {
3016 const Operand_& o3 = opExt[EmitterUtils::kOp3];
3017 const uint32_t isign4 = isign3 + (o3.opType() << 9);
3018
3019 immValue = o3.id() << 4;
3020 immSize = 1;
3021
3022 if (isign4 == ENC_OPS4(Reg, Reg, Reg, Reg)) {
3023 opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
3024 rbReg = o2.id();
3025 goto EmitVexEvexR;
3026 }
3027
3028 if (isign4 == ENC_OPS4(Reg, Reg, Mem, Reg)) {
3029 opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
3030 rmRel = &o2;
3031 goto EmitVexEvexM;
3032 }
3033 break;
3034 }
3035
3036 case InstDB::kEncodingVexRvmi_Lx:
3037 opcode |= x86OpcodeLBySize(o0.size() | o1.size());
3038 ASMJIT_FALLTHROUGH;
3039
3040 case InstDB::kEncodingVexRvmi: {
3041 const Operand_& o3 = opExt[EmitterUtils::kOp3];
3042 const uint32_t isign4 = isign3 + (o3.opType() << 9);
3043
3044 immValue = o3.as<Imm>().value();
3045 immSize = 1;
3046
3047 if (isign4 == ENC_OPS4(Reg, Reg, Reg, Imm)) {
3048 opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
3049 rbReg = o2.id();
3050 goto EmitVexEvexR;
3051 }
3052
3053 if (isign4 == ENC_OPS4(Reg, Reg, Mem, Imm)) {
3054 opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
3055 rmRel = &o2;
3056 goto EmitVexEvexM;
3057 }
3058 break;
3059 }
3060
3061 case InstDB::kEncodingVexRmv_Wx:
3062 opcode.addWIf(Reg::isGpq(o0) | Reg::isGpq(o2));
3063 ASMJIT_FALLTHROUGH;
3064
3065 case InstDB::kEncodingVexRmv:
3066 if (isign3 == ENC_OPS3(Reg, Reg, Reg)) {
3067 opReg = x86PackRegAndVvvvv(o0.id(), o2.id());
3068 rbReg = o1.id();
3069 goto EmitVexEvexR;
3070 }
3071
3072 if (isign3 == ENC_OPS3(Reg, Mem, Reg)) {
3073 opReg = x86PackRegAndVvvvv(o0.id(), o2.id());
3074 rmRel = &o1;
3075 goto EmitVexEvexM;
3076 }
3077 break;
3078
3079 case InstDB::kEncodingVexRmvRm_VM:
3080 if (isign3 == ENC_OPS2(Reg, Mem)) {
3081 opcode = x86AltOpcodeOf(instInfo);
3082 opcode |= Support::max(x86OpcodeLByVMem(o1), x86OpcodeLBySize(o0.size()));
3083
3084 opReg = o0.id();
3085 rmRel = &o1;
3086 goto EmitVexEvexM;
3087 }
3088
3089 ASMJIT_FALLTHROUGH;
3090
3091 case InstDB::kEncodingVexRmv_VM:
3092 if (isign3 == ENC_OPS3(Reg, Mem, Reg)) {
3093 opcode |= Support::max(x86OpcodeLByVMem(o1), x86OpcodeLBySize(o0.size() | o2.size()));
3094
3095 opReg = x86PackRegAndVvvvv(o0.id(), o2.id());
3096 rmRel = &o1;
3097 goto EmitVexEvexM;
3098 }
3099 break;
3100
3101
3102 case InstDB::kEncodingVexRmvi: {
3103 const Operand_& o3 = opExt[EmitterUtils::kOp3];
3104 const uint32_t isign4 = isign3 + (o3.opType() << 9);
3105
3106 immValue = o3.as<Imm>().value();
3107 immSize = 1;
3108
3109 if (isign4 == ENC_OPS4(Reg, Reg, Reg, Imm)) {
3110 opReg = x86PackRegAndVvvvv(o0.id(), o2.id());
3111 rbReg = o1.id();
3112 goto EmitVexEvexR;
3113 }
3114
3115 if (isign4 == ENC_OPS4(Reg, Mem, Reg, Imm)) {
3116 opReg = x86PackRegAndVvvvv(o0.id(), o2.id());
3117 rmRel = &o1;
3118 goto EmitVexEvexM;
3119 }
3120 break;
3121 }
3122
3123 case InstDB::kEncodingVexMovdMovq:
3124 if (isign3 == ENC_OPS2(Reg, Reg)) {
3125 if (Reg::isGp(o0)) {
3126 opcode = x86AltOpcodeOf(instInfo);
3127 opcode.addWBySize(o0.size());
3128 opReg = o1.id();
3129 rbReg = o0.id();
3130 goto EmitVexEvexR;
3131 }
3132
3133 if (Reg::isGp(o1)) {
3134 opcode.addWBySize(o1.size());
3135 opReg = o0.id();
3136 rbReg = o1.id();
3137 goto EmitVexEvexR;
3138 }
3139
3140 // If this is a 'W' version (movq) then allow also vmovq 'xmm|xmm' form.
3141 if (opcode & Opcode::kEvex_W_1) {
3142 opcode &= ~(Opcode::kPP_VEXMask | Opcode::kMM_Mask | 0xFF);
3143 opcode |= (Opcode::kF30F00 | 0x7E);
3144
3145 opReg = o0.id();
3146 rbReg = o1.id();
3147 goto EmitVexEvexR;
3148 }
3149 }
3150
3151 if (isign3 == ENC_OPS2(Reg, Mem)) {
3152 if (opcode & Opcode::kEvex_W_1) {
3153 opcode &= ~(Opcode::kPP_VEXMask | Opcode::kMM_Mask | 0xFF);
3154 opcode |= (Opcode::kF30F00 | 0x7E);
3155 }
3156
3157 opReg = o0.id();
3158 rmRel = &o1;
3159 goto EmitVexEvexM;
3160 }
3161
3162 // The following instruction uses the secondary opcode.
3163 opcode = x86AltOpcodeOf(instInfo);
3164
3165 if (isign3 == ENC_OPS2(Mem, Reg)) {
3166 if (opcode & Opcode::kEvex_W_1) {
3167 opcode &= ~(Opcode::kPP_VEXMask | Opcode::kMM_Mask | 0xFF);
3168 opcode |= (Opcode::k660F00 | 0xD6);
3169 }
3170
3171 opReg = o1.id();
3172 rmRel = &o0;
3173 goto EmitVexEvexM;
3174 }
3175 break;
3176
3177 case InstDB::kEncodingVexRmMr_Lx:
3178 opcode |= x86OpcodeLBySize(o0.size() | o1.size());
3179 ASMJIT_FALLTHROUGH;
3180
3181 case InstDB::kEncodingVexRmMr:
3182 if (isign3 == ENC_OPS2(Reg, Reg)) {
3183 opReg = o0.id();
3184 rbReg = o1.id();
3185 goto EmitVexEvexR;
3186 }
3187
3188 if (isign3 == ENC_OPS2(Reg, Mem)) {
3189 opReg = o0.id();
3190 rmRel = &o1;
3191 goto EmitVexEvexM;
3192 }
3193
3194 // The following instruction uses the secondary opcode.
3195 opcode &= Opcode::kLL_Mask;
3196 opcode |= x86AltOpcodeOf(instInfo);
3197
3198 if (isign3 == ENC_OPS2(Mem, Reg)) {
3199 opReg = o1.id();
3200 rmRel = &o0;
3201 goto EmitVexEvexM;
3202 }
3203 break;
3204
3205 case InstDB::kEncodingVexRvmRmv:
3206 if (isign3 == ENC_OPS3(Reg, Reg, Reg)) {
3207 opReg = x86PackRegAndVvvvv(o0.id(), o2.id());
3208 rbReg = o1.id();
3209
3210 if (!(options & Inst::kOptionModMR))
3211 goto EmitVexEvexR;
3212
3213 opcode.addW();
3214 opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
3215 rbReg = o2.id();
3216 goto EmitVexEvexR;
3217 }
3218
3219 if (isign3 == ENC_OPS3(Reg, Mem, Reg)) {
3220 opReg = x86PackRegAndVvvvv(o0.id(), o2.id());
3221 rmRel = &o1;
3222 goto EmitVexEvexM;
3223 }
3224
3225 if (isign3 == ENC_OPS3(Reg, Reg, Mem)) {
3226 opcode.addW();
3227 opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
3228 rmRel = &o2;
3229 goto EmitVexEvexM;
3230 }
3231 break;
3232
3233 case InstDB::kEncodingVexRvmRmi_Lx:
3234 opcode |= x86OpcodeLBySize(o0.size() | o1.size());
3235 ASMJIT_FALLTHROUGH;
3236
3237 case InstDB::kEncodingVexRvmRmi:
3238 if (isign3 == ENC_OPS3(Reg, Reg, Reg)) {
3239 opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
3240 rbReg = o2.id();
3241 goto EmitVexEvexR;
3242 }
3243
3244 if (isign3 == ENC_OPS3(Reg, Reg, Mem)) {
3245 opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
3246 rmRel = &o2;
3247 goto EmitVexEvexM;
3248 }
3249
3250 // The following instructions use the secondary opcode.
3251 opcode &= Opcode::kLL_Mask;
3252 opcode |= x86AltOpcodeOf(instInfo);
3253
3254 immValue = o2.as<Imm>().value();
3255 immSize = 1;
3256
3257 if (isign3 == ENC_OPS3(Reg, Reg, Imm)) {
3258 opReg = o0.id();
3259 rbReg = o1.id();
3260 goto EmitVexEvexR;
3261 }
3262
3263 if (isign3 == ENC_OPS3(Reg, Mem, Imm)) {
3264 opReg = o0.id();
3265 rmRel = &o1;
3266 goto EmitVexEvexM;
3267 }
3268 break;
3269
3270 case InstDB::kEncodingVexRvmRmvRmi:
3271 if (isign3 == ENC_OPS3(Reg, Reg, Reg)) {
3272 opReg = x86PackRegAndVvvvv(o0.id(), o2.id());
3273 rbReg = o1.id();
3274
3275 if (!(options & Inst::kOptionModMR))
3276 goto EmitVexEvexR;
3277
3278 opcode.addW();
3279 opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
3280 rbReg = o2.id();
3281 goto EmitVexEvexR;
3282 }
3283
3284 if (isign3 == ENC_OPS3(Reg, Mem, Reg)) {
3285 opReg = x86PackRegAndVvvvv(o0.id(), o2.id());
3286 rmRel = &o1;
3287 goto EmitVexEvexM;
3288 }
3289
3290 if (isign3 == ENC_OPS3(Reg, Reg, Mem)) {
3291 opcode.addW();
3292 opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
3293 rmRel = &o2;
3294 goto EmitVexEvexM;
3295 }
3296
3297 // The following instructions use the secondary opcode.
3298 opcode = x86AltOpcodeOf(instInfo);
3299
3300 immValue = o2.as<Imm>().value();
3301 immSize = 1;
3302
3303 if (isign3 == ENC_OPS3(Reg, Reg, Imm)) {
3304 opReg = o0.id();
3305 rbReg = o1.id();
3306 goto EmitVexEvexR;
3307 }
3308
3309 if (isign3 == ENC_OPS3(Reg, Mem, Imm)) {
3310 opReg = o0.id();
3311 rmRel = &o1;
3312 goto EmitVexEvexM;
3313 }
3314 break;
3315
3316 case InstDB::kEncodingVexRvmMr:
3317 if (isign3 == ENC_OPS3(Reg, Reg, Reg)) {
3318 opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
3319 rbReg = o2.id();
3320 goto EmitVexEvexR;
3321 }
3322
3323 if (isign3 == ENC_OPS3(Reg, Reg, Mem)) {
3324 opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
3325 rmRel = &o2;
3326 goto EmitVexEvexM;
3327 }
3328
3329 // The following instructions use the secondary opcode.
3330 opcode = x86AltOpcodeOf(instInfo);
3331
3332 if (isign3 == ENC_OPS2(Reg, Reg)) {
3333 opReg = o1.id();
3334 rbReg = o0.id();
3335 goto EmitVexEvexR;
3336 }
3337
3338 if (isign3 == ENC_OPS2(Mem, Reg)) {
3339 opReg = o1.id();
3340 rmRel = &o0;
3341 goto EmitVexEvexM;
3342 }
3343 break;
3344
3345 case InstDB::kEncodingVexRvmMvr_Lx:
3346 opcode |= x86OpcodeLBySize(o0.size() | o1.size());
3347 ASMJIT_FALLTHROUGH;
3348
3349 case InstDB::kEncodingVexRvmMvr:
3350 if (isign3 == ENC_OPS3(Reg, Reg, Reg)) {
3351 opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
3352 rbReg = o2.id();
3353 goto EmitVexEvexR;
3354 }
3355
3356 if (isign3 == ENC_OPS3(Reg, Reg, Mem)) {
3357 opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
3358 rmRel = &o2;
3359 goto EmitVexEvexM;
3360 }
3361
3362 // The following instruction uses the secondary opcode.
3363 opcode &= Opcode::kLL_Mask;
3364 opcode |= x86AltOpcodeOf(instInfo);
3365
3366 if (isign3 == ENC_OPS3(Mem, Reg, Reg)) {
3367 opReg = x86PackRegAndVvvvv(o2.id(), o1.id());
3368 rmRel = &o0;
3369 goto EmitVexEvexM;
3370 }
3371 break;
3372
3373 case InstDB::kEncodingVexRvmVmi_Lx:
3374 opcode |= x86OpcodeLBySize(o0.size() | o1.size());
3375 ASMJIT_FALLTHROUGH;
3376
3377 case InstDB::kEncodingVexRvmVmi:
3378 if (isign3 == ENC_OPS3(Reg, Reg, Reg)) {
3379 opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
3380 rbReg = o2.id();
3381 goto EmitVexEvexR;
3382 }
3383
3384 if (isign3 == ENC_OPS3(Reg, Reg, Mem)) {
3385 opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
3386 rmRel = &o2;
3387 goto EmitVexEvexM;
3388 }
3389
3390 // The following instruction uses the secondary opcode.
3391 opcode &= Opcode::kLL_Mask;
3392 opcode |= x86AltOpcodeOf(instInfo);
3393 opReg = opcode.extractO();
3394
3395 immValue = o2.as<Imm>().value();
3396 immSize = 1;
3397
3398 if (isign3 == ENC_OPS3(Reg, Reg, Imm)) {
3399 opReg = x86PackRegAndVvvvv(opReg, o0.id());
3400 rbReg = o1.id();
3401 goto EmitVexEvexR;
3402 }
3403
3404 if (isign3 == ENC_OPS3(Reg, Mem, Imm)) {
3405 opReg = x86PackRegAndVvvvv(opReg, o0.id());
3406 rmRel = &o1;
3407 goto EmitVexEvexM;
3408 }
3409 break;
3410
3411 case InstDB::kEncodingVexVm_Wx:
3412 opcode.addWIf(Reg::isGpq(o0) | Reg::isGpq(o1));
3413 ASMJIT_FALLTHROUGH;
3414
3415 case InstDB::kEncodingVexVm:
3416 if (isign3 == ENC_OPS2(Reg, Reg)) {
3417 opReg = x86PackRegAndVvvvv(opReg, o0.id());
3418 rbReg = o1.id();
3419 goto EmitVexEvexR;
3420 }
3421
3422 if (isign3 == ENC_OPS2(Reg, Mem)) {
3423 opReg = x86PackRegAndVvvvv(opReg, o0.id());
3424 rmRel = &o1;
3425 goto EmitVexEvexM;
3426 }
3427 break;
3428
3429 case InstDB::kEncodingVexEvexVmi_Lx:
3430 if (isign3 == ENC_OPS3(Reg, Mem, Imm))
3431 opcode |= Opcode::kMM_ForceEvex;
3432 ASMJIT_FALLTHROUGH;
3433
3434 case InstDB::kEncodingVexVmi_Lx:
3435 opcode |= x86OpcodeLBySize(o0.size() | o1.size());
3436 ASMJIT_FALLTHROUGH;
3437
3438 case InstDB::kEncodingVexVmi:
3439 immValue = o2.as<Imm>().value();
3440 immSize = 1;
3441
3442 CaseVexVmi_AfterImm:
3443 if (isign3 == ENC_OPS3(Reg, Reg, Imm)) {
3444 opReg = x86PackRegAndVvvvv(opReg, o0.id());
3445 rbReg = o1.id();
3446 goto EmitVexEvexR;
3447 }
3448
3449 if (isign3 == ENC_OPS3(Reg, Mem, Imm)) {
3450 opReg = x86PackRegAndVvvvv(opReg, o0.id());
3451 rmRel = &o1;
3452 goto EmitVexEvexM;
3453 }
3454 break;
3455
3456 case InstDB::kEncodingVexVmi4_Wx:
3457 opcode.addWIf(Reg::isGpq(o0) || o1.size() == 8);
3458 immValue = o2.as<Imm>().value();
3459 immSize = 4;
3460 goto CaseVexVmi_AfterImm;
3461
3462 case InstDB::kEncodingVexRvrmRvmr_Lx:
3463 opcode |= x86OpcodeLBySize(o0.size() | o1.size());
3464 ASMJIT_FALLTHROUGH;
3465
3466 case InstDB::kEncodingVexRvrmRvmr: {
3467 const Operand_& o3 = opExt[EmitterUtils::kOp3];
3468 const uint32_t isign4 = isign3 + (o3.opType() << 9);
3469
3470 if (isign4 == ENC_OPS4(Reg, Reg, Reg, Reg)) {
3471 opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
3472 rbReg = o2.id();
3473
3474 immValue = o3.id() << 4;
3475 immSize = 1;
3476 goto EmitVexEvexR;
3477 }
3478
3479 if (isign4 == ENC_OPS4(Reg, Reg, Reg, Mem)) {
3480 opcode.addW();
3481 opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
3482 rmRel = &o3;
3483
3484 immValue = o2.id() << 4;
3485 immSize = 1;
3486 goto EmitVexEvexM;
3487 }
3488
3489 if (isign4 == ENC_OPS4(Reg, Reg, Mem, Reg)) {
3490 opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
3491 rmRel = &o2;
3492
3493 immValue = o3.id() << 4;
3494 immSize = 1;
3495 goto EmitVexEvexM;
3496 }
3497 break;
3498 }
3499
3500 case InstDB::kEncodingVexRvrmiRvmri_Lx: {
3501 const Operand_& o3 = opExt[EmitterUtils::kOp3];
3502 const Operand_& o4 = opExt[EmitterUtils::kOp4];
3503
3504 if (ASMJIT_UNLIKELY(!o4.isImm()))
3505 goto InvalidInstruction;
3506
3507 const uint32_t isign4 = isign3 + (o3.opType() << 9);
3508 opcode |= x86OpcodeLBySize(o0.size() | o1.size() | o2.size() | o3.size());
3509
3510 immValue = o4.as<Imm>().valueAs<uint8_t>() & 0x0F;
3511 immSize = 1;
3512
3513 if (isign4 == ENC_OPS4(Reg, Reg, Reg, Reg)) {
3514 opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
3515 rbReg = o2.id();
3516
3517 immValue |= o3.id() << 4;
3518 goto EmitVexEvexR;
3519 }
3520
3521 if (isign4 == ENC_OPS4(Reg, Reg, Reg, Mem)) {
3522 opcode.addW();
3523 opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
3524 rmRel = &o3;
3525
3526 immValue |= o2.id() << 4;
3527 goto EmitVexEvexM;
3528 }
3529
3530 if (isign4 == ENC_OPS4(Reg, Reg, Mem, Reg)) {
3531 opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
3532 rmRel = &o2;
3533
3534 immValue |= o3.id() << 4;
3535 goto EmitVexEvexM;
3536 }
3537 break;
3538 }
3539
3540 case InstDB::kEncodingVexMovssMovsd:
3541 if (isign3 == ENC_OPS3(Reg, Reg, Reg)) {
3542 goto CaseVexRvm_R;
3543 }
3544
3545 if (isign3 == ENC_OPS2(Reg, Mem)) {
3546 opReg = o0.id();
3547 rmRel = &o1;
3548 goto EmitVexEvexM;
3549 }
3550
3551 if (isign3 == ENC_OPS2(Mem, Reg)) {
3552 opcode = x86AltOpcodeOf(instInfo);
3553 opReg = o1.id();
3554 rmRel = &o0;
3555 goto EmitVexEvexM;
3556 }
3557 break;
3558
3559 // ------------------------------------------------------------------------
3560 // [FMA4]
3561 // ------------------------------------------------------------------------
3562
3563 case InstDB::kEncodingFma4_Lx:
3564 // It's fine to just check the first operand, second is just for sanity.
3565 opcode |= x86OpcodeLBySize(o0.size() | o1.size());
3566 ASMJIT_FALLTHROUGH;
3567
3568 case InstDB::kEncodingFma4: {
3569 const Operand_& o3 = opExt[EmitterUtils::kOp3];
3570 const uint32_t isign4 = isign3 + (o3.opType() << 9);
3571
3572 if (isign4 == ENC_OPS4(Reg, Reg, Reg, Reg)) {
3573 opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
3574 rbReg = o2.id();
3575
3576 immValue = o3.id() << 4;
3577 immSize = 1;
3578 goto EmitVexEvexR;
3579 }
3580
3581 if (isign4 == ENC_OPS4(Reg, Reg, Reg, Mem)) {
3582 opcode.addW();
3583 opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
3584 rmRel = &o3;
3585
3586 immValue = o2.id() << 4;
3587 immSize = 1;
3588 goto EmitVexEvexM;
3589 }
3590
3591 if (isign4 == ENC_OPS4(Reg, Reg, Mem, Reg)) {
3592 opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
3593 rmRel = &o2;
3594
3595 immValue = o3.id() << 4;
3596 immSize = 1;
3597 goto EmitVexEvexM;
3598 }
3599 break;
3600 }
3601 }
3602
3603 goto InvalidInstruction;
3604
3605 // --------------------------------------------------------------------------
3606 // [Emit - X86]
3607 // --------------------------------------------------------------------------
3608
3609 EmitX86OpMovAbs:
3610 immSize = FastUInt8(registerSize());
3611 writer.emitSegmentOverride(rmRel->as<Mem>().segmentId());
3612
3613 EmitX86Op:
3614 // Emit mandatory instruction prefix.
3615 writer.emitPP(opcode.v);
3616
3617 // Emit REX prefix (64-bit only).
3618 {
3619 uint32_t rex = opcode.extractRex(options);
3620 if (ASMJIT_UNLIKELY(x86IsRexInvalid(rex)))
3621 goto InvalidRexPrefix;
3622 rex &= ~kX86ByteInvalidRex & 0xFF;
3623 writer.emit8If(rex | kX86ByteRex, rex != 0);
3624 }
3625
3626 // Emit instruction opcodes.
3627 writer.emitMMAndOpcode(opcode.v);
3628 writer.emitImmediate(uint64_t(immValue), immSize);
3629 goto EmitDone;
3630
3631 EmitX86OpReg:
3632 // Emit mandatory instruction prefix.
3633 writer.emitPP(opcode.v);
3634
3635 // Emit REX prefix (64-bit only).
3636 {
3637 uint32_t rex = opcode.extractRex(options) | (opReg >> 3); // Rex.B (0x01).
3638 if (ASMJIT_UNLIKELY(x86IsRexInvalid(rex)))
3639 goto InvalidRexPrefix;
3640 rex &= ~kX86ByteInvalidRex & 0xFF;
3641 writer.emit8If(rex | kX86ByteRex, rex != 0);
3642
3643 opReg &= 0x7;
3644 }
3645
3646 // Emit instruction opcodes.
3647 opcode += opReg;
3648 writer.emitMMAndOpcode(opcode.v);
3649 writer.emitImmediate(uint64_t(immValue), immSize);
3650 goto EmitDone;
3651
3652 EmitX86OpImplicitMem:
3653 // NOTE: Don't change the emit order here, it's compatible with KeyStone/LLVM.
3654 rmInfo = x86MemInfo[rmRel->as<Mem>().baseAndIndexTypes()];
3655 if (ASMJIT_UNLIKELY(rmRel->as<Mem>().hasOffset() || (rmInfo & kX86MemInfo_Index)))
3656 goto InvalidInstruction;
3657
3658 // Emit mandatory instruction prefix.
3659 writer.emitPP(opcode.v);
3660
3661 // Emit REX prefix (64-bit only).
3662 {
3663 uint32_t rex = opcode.extractRex(options);
3664 if (ASMJIT_UNLIKELY(x86IsRexInvalid(rex)))
3665 goto InvalidRexPrefix;
3666 rex &= ~kX86ByteInvalidRex & 0xFF;
3667 writer.emit8If(rex | kX86ByteRex, rex != 0);
3668 }
3669
3670 writer.emitSegmentOverride(rmRel->as<Mem>().segmentId());
3671 writer.emitAddressOverride((rmInfo & _addressOverrideMask()) != 0);
3672
3673 // Emit instruction opcodes.
3674 writer.emitMMAndOpcode(opcode.v);
3675 writer.emitImmediate(uint64_t(immValue), immSize);
3676 goto EmitDone;
3677
3678 EmitX86R:
3679 // Mandatory instruction prefix.
3680 writer.emitPP(opcode.v);
3681
3682 // Rex prefix (64-bit only).
3683 {
3684 uint32_t rex = opcode.extractRex(options) |
3685 ((opReg & 0x08) >> 1) | // REX.R (0x04).
3686 ((rbReg ) >> 3) ; // REX.B (0x01).
3687
3688 if (ASMJIT_UNLIKELY(x86IsRexInvalid(rex)))
3689 goto InvalidRexPrefix;
3690 rex &= ~kX86ByteInvalidRex & 0xFF;
3691 writer.emit8If(rex | kX86ByteRex, rex != 0);
3692
3693 opReg &= 0x07;
3694 rbReg &= 0x07;
3695 }
3696
3697 // Instruction opcodes.
3698 writer.emitMMAndOpcode(opcode.v);
3699 // ModR.
3700 writer.emit8(x86EncodeMod(3, opReg, rbReg));
3701 writer.emitImmediate(uint64_t(immValue), immSize);
3702 goto EmitDone;
3703
3704 EmitX86M:
3705 // `rmRel` operand must be memory.
3706 ASMJIT_ASSERT(rmRel != nullptr);
3707 ASMJIT_ASSERT(rmRel->opType() == Operand::kOpMem);
3708 ASMJIT_ASSERT((opcode & Opcode::kCDSHL_Mask) == 0);
3709
3710 rmInfo = x86MemInfo[rmRel->as<Mem>().baseAndIndexTypes()];
3711 writer.emitSegmentOverride(rmRel->as<Mem>().segmentId());
3712
3713 memOpAOMark = writer.cursor();
3714 writer.emitAddressOverride((rmInfo & _addressOverrideMask()) != 0);
3715
3716 // Mandatory instruction prefix.
3717 writer.emitPP(opcode.v);
3718
3719 rbReg = rmRel->as<Mem>().baseId();
3720 rxReg = rmRel->as<Mem>().indexId();
3721
3722 // REX prefix (64-bit only).
3723 {
3724 uint32_t rex;
3725
3726 rex = (rbReg >> 3) & 0x01; // REX.B (0x01).
3727 rex |= (rxReg >> 2) & 0x02; // REX.X (0x02).
3728 rex |= (opReg >> 1) & 0x04; // REX.R (0x04).
3729
3730 rex &= rmInfo;
3731 rex |= opcode.extractRex(options);
3732
3733 if (ASMJIT_UNLIKELY(x86IsRexInvalid(rex)))
3734 goto InvalidRexPrefix;
3735 rex &= ~kX86ByteInvalidRex & 0xFF;
3736 writer.emit8If(rex | kX86ByteRex, rex != 0);
3737
3738 opReg &= 0x07;
3739 }
3740
3741 // Instruction opcodes.
3742 writer.emitMMAndOpcode(opcode.v);
3743 // ... Fall through ...
3744
3745 // --------------------------------------------------------------------------
3746 // [Emit - MOD/SIB]
3747 // --------------------------------------------------------------------------
3748
3749 EmitModSib:
3750 if (!(rmInfo & (kX86MemInfo_Index | kX86MemInfo_67H_X86))) {
3751 // ==========|> [BASE + DISP8|DISP32].
3752 if (rmInfo & kX86MemInfo_BaseGp) {
3753 rbReg &= 0x7;
3754 relOffset = rmRel->as<Mem>().offsetLo32();
3755
3756 uint32_t mod = x86EncodeMod(0, opReg, rbReg);
3757 if (rbReg == Gp::kIdSp) {
3758 // [XSP|R12].
3759 if (relOffset == 0) {
3760 writer.emit8(mod);
3761 writer.emit8(x86EncodeSib(0, 4, 4));
3762 }
3763 // [XSP|R12 + DISP8|DISP32].
3764 else {
3765 uint32_t cdShift = (opcode & Opcode::kCDSHL_Mask) >> Opcode::kCDSHL_Shift;
3766 int32_t cdOffset = relOffset >> cdShift;
3767
3768 if (Support::isInt8(cdOffset) && relOffset == int32_t(uint32_t(cdOffset) << cdShift)) {
3769 writer.emit8(mod + 0x40); // <- MOD(1, opReg, rbReg).
3770 writer.emit8(x86EncodeSib(0, 4, 4));
3771 writer.emit8(cdOffset & 0xFF);
3772 }
3773 else {
3774 writer.emit8(mod + 0x80); // <- MOD(2, opReg, rbReg).
3775 writer.emit8(x86EncodeSib(0, 4, 4));
3776 writer.emit32uLE(uint32_t(relOffset));
3777 }
3778 }
3779 }
3780 else if (rbReg != Gp::kIdBp && relOffset == 0) {
3781 // [BASE].
3782 writer.emit8(mod);
3783 }
3784 else {
3785 // [BASE + DISP8|DISP32].
3786 uint32_t cdShift = (opcode & Opcode::kCDSHL_Mask) >> Opcode::kCDSHL_Shift;
3787 int32_t cdOffset = relOffset >> cdShift;
3788
3789 if (Support::isInt8(cdOffset) && relOffset == int32_t(uint32_t(cdOffset) << cdShift)) {
3790 writer.emit8(mod + 0x40);
3791 writer.emit8(cdOffset & 0xFF);
3792 }
3793 else {
3794 writer.emit8(mod + 0x80);
3795 writer.emit32uLE(uint32_t(relOffset));
3796 }
3797 }
3798 }
3799 // ==========|> [ABSOLUTE | DISP32].
3800 else if (!(rmInfo & (kX86MemInfo_BaseLabel | kX86MemInfo_BaseRip))) {
3801 uint32_t addrType = rmRel->as<Mem>().addrType();
3802 relOffset = rmRel->as<Mem>().offsetLo32();
3803
3804 if (is32Bit()) {
3805 // Explicit relative addressing doesn't work in 32-bit mode.
3806 if (ASMJIT_UNLIKELY(addrType == BaseMem::kAddrTypeRel))
3807 goto InvalidAddress;
3808
3809 writer.emit8(x86EncodeMod(0, opReg, 5));
3810 writer.emit32uLE(uint32_t(relOffset));
3811 }
3812 else {
3813 bool isOffsetI32 = rmRel->as<Mem>().offsetHi32() == (relOffset >> 31);
3814 bool isOffsetU32 = rmRel->as<Mem>().offsetHi32() == 0;
3815 uint64_t baseAddress = code()->baseAddress();
3816
3817 // If relative addressing was not explicitly set then we can try to guess.
3818 // By guessing we check some properties of the memory operand and try to
3819 // base the decision on the segment prefix and the address type.
3820 if (addrType == BaseMem::kAddrTypeDefault) {
3821 if (baseAddress == Globals::kNoBaseAddress) {
3822 // Prefer absolute addressing mode if the offset is 32-bit.
3823 addrType = isOffsetI32 || isOffsetU32 ? BaseMem::kAddrTypeAbs
3824 : BaseMem::kAddrTypeRel;
3825 }
3826 else {
3827 // Prefer absolute addressing mode if FS|GS segment override is present.
3828 bool hasFsGs = rmRel->as<Mem>().segmentId() >= SReg::kIdFs;
3829 // Prefer absolute addressing mode if this is LEA with 32-bit immediate.
3830 bool isLea32 = (instId == Inst::kIdLea) && (isOffsetI32 || isOffsetU32);
3831
3832 addrType = hasFsGs || isLea32 ? BaseMem::kAddrTypeAbs
3833 : BaseMem::kAddrTypeRel;
3834 }
3835 }
3836
3837 if (addrType == BaseMem::kAddrTypeRel) {
3838 uint32_t kModRel32Size = 5;
3839 uint64_t virtualOffset = uint64_t(writer.offsetFrom(_bufferData)) + immSize + kModRel32Size;
3840
3841 if (baseAddress == Globals::kNoBaseAddress) {
3842 // Create a new RelocEntry as we cannot calculate the offset right now.
3843 err = _code->newRelocEntry(&re, RelocEntry::kTypeAbsToRel, 4);
3844 if (ASMJIT_UNLIKELY(err))
3845 goto Failed;
3846
3847 writer.emit8(x86EncodeMod(0, opReg, 5));
3848 writer.emit32uLE(0);
3849
3850 re->_sourceSectionId = _section->id();
3851 re->_sourceOffset = offset();
3852 re->_leadingSize = uint8_t(writer.offsetFrom(_bufferPtr) - 4);
3853 re->_trailingSize = uint8_t(immSize);
3854 re->_payload = uint64_t(rmRel->as<Mem>().offset());
3855
3856 writer.emitImmediate(uint64_t(immValue), immSize);
3857 goto EmitDone;
3858 }
3859 else {
3860 uint64_t rip64 = baseAddress + _section->offset() + virtualOffset;
3861 uint64_t rel64 = uint64_t(rmRel->as<Mem>().offset()) - rip64;
3862
3863 if (Support::isInt32(int64_t(rel64))) {
3864 writer.emit8(x86EncodeMod(0, opReg, 5));
3865 writer.emit32uLE(uint32_t(rel64 & 0xFFFFFFFFu));
3866 writer.emitImmediate(uint64_t(immValue), immSize);
3867 goto EmitDone;
3868 }
3869 else {
3870 // We must check the original address type as we have modified
3871 // `addrType`. We failed if the original address type is 'rel'.
3872 if (ASMJIT_UNLIKELY(rmRel->as<Mem>().isRel()))
3873 goto InvalidAddress;
3874 }
3875 }
3876 }
3877
3878 // Handle unsigned 32-bit address that doesn't work with sign extension.
3879 // Consider the following instructions:
3880 //
3881 // 1. lea rax, [-1] - Sign extended to 0xFFFFFFFFFFFFFFFF
3882 // 2. lea rax, [0xFFFFFFFF] - Zero extended to 0x00000000FFFFFFFF
3883 // 3. add rax, [-1] - Sign extended to 0xFFFFFFFFFFFFFFFF
3884 // 4. add rax, [0xFFFFFFFF] - Zero extended to 0x00000000FFFFFFFF
3885 //
3886 // Sign extension is naturally performed by the CPU so we don't have to
3887 // bother, however, zero extension requires address-size override prefix,
3888 // which we probably don't have at this moment. So to make the address
3889 // valid we need to insert it at `memOpAOMark` if it's not already there.
3890 //
3891 // If this is 'lea' instruction then it's possible to remove REX.W part
3892 // from REX prefix (if it's there), which would be one-byte shorter than
3893 // inserting address-size override.
3894 //
3895 // NOTE: If we don't do this then these instructions are unencodable.
3896 if (!isOffsetI32) {
3897 // 64-bit absolute address is unencodable.
3898 if (ASMJIT_UNLIKELY(!isOffsetU32))
3899 goto InvalidAddress64Bit;
3900
3901 // We only patch the existing code if we don't have address-size override.
3902 if (*memOpAOMark != 0x67) {
3903 if (instId == Inst::kIdLea) {
3904 // LEA: Remove REX.W, if present. This is easy as we know that 'lea'
3905 // doesn't use any PP prefix so if REX prefix was emitted it would be
3906 // at `memOpAOMark`.
3907 uint32_t rex = *memOpAOMark;
3908 if (rex & kX86ByteRex) {
3909 rex &= (~kX86ByteRexW) & 0xFF;
3910 *memOpAOMark = uint8_t(rex);
3911
3912 // We can remove the REX prefix completely if it was not forced.
3913 if (rex == kX86ByteRex && !(options & Inst::kOptionRex))
3914 writer.remove8(memOpAOMark);
3915 }
3916 }
3917 else {
3918 // Any other instruction: Insert address-size override prefix.
3919 writer.insert8(memOpAOMark, 0x67);
3920 }
3921 }
3922 }
3923
3924 // Emit 32-bit absolute address.
3925 writer.emit8(x86EncodeMod(0, opReg, 4));
3926 writer.emit8(x86EncodeSib(0, 4, 5));
3927 writer.emit32uLE(uint32_t(relOffset));
3928 }
3929 }
3930 // ==========|> [LABEL|RIP + DISP32]
3931 else {
3932 writer.emit8(x86EncodeMod(0, opReg, 5));
3933
3934 if (is32Bit()) {
3935 EmitModSib_LabelRip_X86:
3936 if (ASMJIT_UNLIKELY(_code->_relocations.willGrow(_code->allocator()) != kErrorOk))
3937 goto OutOfMemory;
3938
3939 relOffset = rmRel->as<Mem>().offsetLo32();
3940 if (rmInfo & kX86MemInfo_BaseLabel) {
3941 // [LABEL->ABS].
3942 label = _code->labelEntry(rmRel->as<Mem>().baseId());
3943 if (ASMJIT_UNLIKELY(!label))
3944 goto InvalidLabel;
3945
3946 err = _code->newRelocEntry(&re, RelocEntry::kTypeRelToAbs, 4);
3947 if (ASMJIT_UNLIKELY(err))
3948 goto Failed;
3949
3950 re->_sourceSectionId = _section->id();
3951 re->_sourceOffset = offset();
3952 re->_leadingSize = uint8_t(writer.offsetFrom(_bufferPtr));
3953 re->_trailingSize = uint8_t(immSize);
3954 re->_payload = uint64_t(int64_t(relOffset));
3955
3956 if (label->isBound()) {
3957 // Label bound to the current section.
3958 re->_payload += label->offset();
3959 re->_targetSectionId = label->section()->id();
3960 writer.emit32uLE(0);
3961 }
3962 else {
3963 // Non-bound label or label bound to a different section.
3964 relOffset = -4 - immSize;
3965 relSize = 4;
3966 goto EmitRel;
3967 }
3968 }
3969 else {
3970 // [RIP->ABS].
3971 err = _code->newRelocEntry(&re, RelocEntry::kTypeRelToAbs, 4);
3972 if (ASMJIT_UNLIKELY(err))
3973 goto Failed;
3974
3975 re->_sourceSectionId = _section->id();
3976 re->_targetSectionId = _section->id();
3977 re->_sourceOffset = offset();
3978 re->_leadingSize = uint8_t(writer.offsetFrom(_bufferPtr));
3979 re->_trailingSize = uint8_t(immSize);
3980 re->_payload = re->_sourceOffset + re->_leadingSize + 4 + re->_trailingSize + uint64_t(int64_t(relOffset));
3981
3982 writer.emit32uLE(0);
3983 }
3984 }
3985 else {
3986 relOffset = rmRel->as<Mem>().offsetLo32();
3987 if (rmInfo & kX86MemInfo_BaseLabel) {
3988 // [RIP].
3989 label = _code->labelEntry(rmRel->as<Mem>().baseId());
3990 if (ASMJIT_UNLIKELY(!label))
3991 goto InvalidLabel;
3992
3993 relOffset -= (4 + immSize);
3994 if (label->isBoundTo(_section)) {
3995 // Label bound to the current section.
3996 relOffset += int32_t(label->offset() - writer.offsetFrom(_bufferData));
3997 writer.emit32uLE(uint32_t(relOffset));
3998 }
3999 else {
4000 // Non-bound label or label bound to a different section.
4001 relSize = 4;
4002 goto EmitRel;
4003 }
4004 }
4005 else {
4006 // [RIP].
4007 writer.emit32uLE(uint32_t(relOffset));
4008 }
4009 }
4010 }
4011 }
4012 else if (!(rmInfo & kX86MemInfo_67H_X86)) {
4013 // ESP|RSP can't be used as INDEX in pure SIB mode, however, VSIB mode
4014 // allows XMM4|YMM4|ZMM4 (that's why the check is before the label).
4015 if (ASMJIT_UNLIKELY(rxReg == Gp::kIdSp))
4016 goto InvalidAddressIndex;
4017
4018 EmitModVSib:
4019 rxReg &= 0x7;
4020
4021 // ==========|> [BASE + INDEX + DISP8|DISP32].
4022 if (rmInfo & kX86MemInfo_BaseGp) {
4023 rbReg &= 0x7;
4024 relOffset = rmRel->as<Mem>().offsetLo32();
4025
4026 uint32_t mod = x86EncodeMod(0, opReg, 4);
4027 uint32_t sib = x86EncodeSib(rmRel->as<Mem>().shift(), rxReg, rbReg);
4028
4029 if (relOffset == 0 && rbReg != Gp::kIdBp) {
4030 // [BASE + INDEX << SHIFT].
4031 writer.emit8(mod);
4032 writer.emit8(sib);
4033 }
4034 else {
4035 uint32_t cdShift = (opcode & Opcode::kCDSHL_Mask) >> Opcode::kCDSHL_Shift;
4036 int32_t cdOffset = relOffset >> cdShift;
4037
4038 if (Support::isInt8(cdOffset) && relOffset == int32_t(uint32_t(cdOffset) << cdShift)) {
4039 // [BASE + INDEX << SHIFT + DISP8].
4040 writer.emit8(mod + 0x40); // <- MOD(1, opReg, 4).
4041 writer.emit8(sib);
4042 writer.emit8(uint32_t(cdOffset));
4043 }
4044 else {
4045 // [BASE + INDEX << SHIFT + DISP32].
4046 writer.emit8(mod + 0x80); // <- MOD(2, opReg, 4).
4047 writer.emit8(sib);
4048 writer.emit32uLE(uint32_t(relOffset));
4049 }
4050 }
4051 }
4052 // ==========|> [INDEX + DISP32].
4053 else if (!(rmInfo & (kX86MemInfo_BaseLabel | kX86MemInfo_BaseRip))) {
4054 // [INDEX << SHIFT + DISP32].
4055 writer.emit8(x86EncodeMod(0, opReg, 4));
4056 writer.emit8(x86EncodeSib(rmRel->as<Mem>().shift(), rxReg, 5));
4057
4058 relOffset = rmRel->as<Mem>().offsetLo32();
4059 writer.emit32uLE(uint32_t(relOffset));
4060 }
4061 // ==========|> [LABEL|RIP + INDEX + DISP32].
4062 else {
4063 if (is32Bit()) {
4064 writer.emit8(x86EncodeMod(0, opReg, 4));
4065 writer.emit8(x86EncodeSib(rmRel->as<Mem>().shift(), rxReg, 5));
4066 goto EmitModSib_LabelRip_X86;
4067 }
4068 else {
4069 // NOTE: This also handles VSIB+RIP, which is not allowed in 64-bit mode.
4070 goto InvalidAddress;
4071 }
4072 }
4073 }
4074 else {
4075 // 16-bit address mode (32-bit mode with 67 override prefix).
4076 relOffset = (int32_t(rmRel->as<Mem>().offsetLo32()) << 16) >> 16;
4077
4078 // NOTE: 16-bit addresses don't use SIB byte and their encoding differs. We
4079 // use a table-based approach to calculate the proper MOD byte as it's easier.
4080 // Also, not all BASE [+ INDEX] combinations are supported in 16-bit mode, so
4081 // this may fail.
4082 const uint32_t kBaseGpIdx = (kX86MemInfo_BaseGp | kX86MemInfo_Index);
4083
4084 if (rmInfo & kBaseGpIdx) {
4085 // ==========|> [BASE + INDEX + DISP16].
4086 uint32_t mod;
4087
4088 rbReg &= 0x7;
4089 rxReg &= 0x7;
4090
4091 if ((rmInfo & kBaseGpIdx) == kBaseGpIdx) {
4092 uint32_t shf = rmRel->as<Mem>().shift();
4093 if (ASMJIT_UNLIKELY(shf != 0))
4094 goto InvalidAddress;
4095 mod = x86Mod16BaseIndexTable[(rbReg << 3) + rxReg];
4096 }
4097 else {
4098 if (rmInfo & kX86MemInfo_Index)
4099 rbReg = rxReg;
4100 mod = x86Mod16BaseTable[rbReg];
4101 }
4102
4103 if (ASMJIT_UNLIKELY(mod == 0xFF))
4104 goto InvalidAddress;
4105
4106 mod += opReg << 3;
4107 if (relOffset == 0 && mod != 0x06) {
4108 writer.emit8(mod);
4109 }
4110 else if (Support::isInt8(relOffset)) {
4111 writer.emit8(mod + 0x40);
4112 writer.emit8(uint32_t(relOffset));
4113 }
4114 else {
4115 writer.emit8(mod + 0x80);
4116 writer.emit16uLE(uint32_t(relOffset));
4117 }
4118 }
4119 else {
4120 // Not supported in 16-bit addresses.
4121 if (rmInfo & (kX86MemInfo_BaseRip | kX86MemInfo_BaseLabel))
4122 goto InvalidAddress;
4123
4124 // ==========|> [DISP16].
4125 writer.emit8(opReg | 0x06);
4126 writer.emit16uLE(uint32_t(relOffset));
4127 }
4128 }
4129
4130 writer.emitImmediate(uint64_t(immValue), immSize);
4131 goto EmitDone;
4132
4133 // --------------------------------------------------------------------------
4134 // [Emit - FPU]
4135 // --------------------------------------------------------------------------
4136
4137 EmitFpuOp:
4138 // Mandatory instruction prefix.
4139 writer.emitPP(opcode.v);
4140
4141 // FPU instructions consist of two opcodes.
4142 writer.emit8(opcode.v >> Opcode::kFPU_2B_Shift);
4143 writer.emit8(opcode.v);
4144 goto EmitDone;
4145
4146 // --------------------------------------------------------------------------
4147 // [Emit - VEX / EVEX]
4148 // --------------------------------------------------------------------------
4149
4150 EmitVexEvexOp:
4151 {
4152 // These don't use immediate.
4153 ASMJIT_ASSERT(immSize == 0);
4154
4155 // Only 'vzeroall' and 'vzeroupper' instructions use this encoding, they
4156 // don't define 'W' to be '1' so we can just check the 'mmmmm' field. Both
4157 // functions can encode by using VEX2 prefix so VEX3 is basically only used
4158 // when specified as instruction option.
4159 ASMJIT_ASSERT((opcode & Opcode::kW) == 0);
4160
4161 uint32_t x = ((opcode & Opcode::kMM_Mask ) >> (Opcode::kMM_Shift )) |
4162 ((opcode & Opcode::kLL_Mask ) >> (Opcode::kLL_Shift - 10)) |
4163 ((opcode & Opcode::kPP_VEXMask ) >> (Opcode::kPP_Shift - 8)) |
4164 ((options & Inst::kOptionVex3 ) >> (Opcode::kMM_Shift )) ;
4165 if (x & 0x04u) {
4166 x = (x & (0x4 ^ 0xFFFF)) << 8; // [00000000|00000Lpp|0000m0mm|00000000].
4167 x ^= (kX86ByteVex3) | // [........|00000Lpp|0000m0mm|__VEX3__].
4168 (0x07u << 13) | // [........|00000Lpp|1110m0mm|__VEX3__].
4169 (0x0Fu << 19) | // [........|01111Lpp|1110m0mm|__VEX3__].
4170 (opcode << 24) ; // [_OPCODE_|01111Lpp|1110m0mm|__VEX3__].
4171
4172 writer.emit32uLE(x);
4173 goto EmitDone;
4174 }
4175 else {
4176 x = ((x >> 8) ^ x) ^ 0xF9;
4177 writer.emit8(kX86ByteVex2);
4178 writer.emit8(x);
4179 writer.emit8(opcode.v);
4180 goto EmitDone;
4181 }
4182 }
4183
4184 EmitVexEvexR:
4185 {
4186 // Construct `x` - a complete EVEX|VEX prefix.
4187 uint32_t x = ((opReg << 4) & 0xF980u) | // [........|........|Vvvvv..R|R.......].
4188 ((rbReg << 2) & 0x0060u) | // [........|........|........|.BB.....].
4189 (opcode.extractLLMM(options)) | // [........|.LL.....|Vvvvv..R|RBBmmmmm].
4190 (_extraReg.id() << 16); // [........|.LL..aaa|Vvvvv..R|RBBmmmmm].
4191 opReg &= 0x7;
4192
4193 // Handle AVX512 options by a single branch.
4194 const uint32_t kAvx512Options = Inst::kOptionZMask | Inst::kOptionER | Inst::kOptionSAE;
4195 if (options & kAvx512Options) {
4196 uint32_t kBcstMask = 0x1 << 20;
4197 uint32_t kLLMask10 = 0x2 << 21;
4198 uint32_t kLLMask11 = 0x3 << 21;
4199
4200 // Designed to be easily encodable so the position must be exact.
4201 // The {rz-sae} is encoded as {11}, so it should match the mask.
4202 ASMJIT_ASSERT(Inst::kOptionRZ_SAE == kLLMask11);
4203
4204 x |= options & Inst::kOptionZMask; // [........|zLLb.aaa|Vvvvv..R|RBBmmmmm].
4205
4206 // Support embedded-rounding {er} and suppress-all-exceptions {sae}.
4207 if (options & (Inst::kOptionER | Inst::kOptionSAE)) {
4208 // Embedded rounding is only encodable if the instruction is either
4209 // scalar or it's a 512-bit operation as the {er} rounding predicate
4210 // collides with LL part of the instruction.
4211 if ((x & kLLMask11) != kLLMask10) {
4212 // Ok, so LL is not 10, thus the instruction must be scalar.
4213 // Scalar instructions don't support broadcast so if this
4214 // instruction supports it {er} nor {sae} would be encodable.
4215 if (ASMJIT_UNLIKELY(commonInfo->hasAvx512B()))
4216 goto InvalidEROrSAE;
4217 }
4218
4219 if (options & Inst::kOptionER) {
4220 if (ASMJIT_UNLIKELY(!commonInfo->hasAvx512ER()))
4221 goto InvalidEROrSAE;
4222
4223 x &=~kLLMask11; // [........|.00..aaa|Vvvvv..R|RBBmmmmm].
4224 x |= kBcstMask | (options & kLLMask11); // [........|.LLb.aaa|Vvvvv..R|RBBmmmmm].
4225 }
4226 else {
4227 if (ASMJIT_UNLIKELY(!commonInfo->hasAvx512SAE()))
4228 goto InvalidEROrSAE;
4229
4230 x |= kBcstMask; // [........|.LLb.aaa|Vvvvv..R|RBBmmmmm].
4231 }
4232 }
4233 }
4234
4235 // Check if EVEX is required by checking bits in `x` : [........|xx.x.xxx|x......x|.x.x....].
4236 if (x & 0x00D78150u) {
4237 uint32_t y = ((x << 4) & 0x00080000u) | // [........|...bV...|........|........].
4238 ((x >> 4) & 0x00000010u) ; // [........|...bV...|........|...R....].
4239 x = (x & 0x00FF78E3u) | y; // [........|zLLbVaaa|0vvvv000|RBBR00mm].
4240 x = x << 8; // [zLLbVaaa|0vvvv000|RBBR00mm|00000000].
4241 x |= (opcode >> kVSHR_W ) & 0x00800000u; // [zLLbVaaa|Wvvvv000|RBBR00mm|00000000].
4242 x |= (opcode >> kVSHR_PP_EW) & 0x00830000u; // [zLLbVaaa|Wvvvv0pp|RBBR00mm|00000000] (added PP and EVEX.W).
4243 // _ ____ ____
4244 x ^= 0x087CF000u | kX86ByteEvex; // [zLLbVaaa|Wvvvv1pp|RBBR00mm|01100010].
4245
4246 writer.emit32uLE(x);
4247 writer.emit8(opcode.v);
4248
4249 rbReg &= 0x7;
4250 writer.emit8(x86EncodeMod(3, opReg, rbReg));
4251 writer.emitImmByteOrDWord(uint64_t(immValue), immSize);
4252 goto EmitDone;
4253 }
4254
4255 // Not EVEX, prepare `x` for VEX2 or VEX3: x = [........|00L00000|0vvvv000|R0B0mmmm].
4256 x |= ((opcode >> (kVSHR_W + 8)) & 0x8000u) | // [00000000|00L00000|Wvvvv000|R0B0mmmm].
4257 ((opcode >> (kVSHR_PP + 8)) & 0x0300u) | // [00000000|00L00000|0vvvv0pp|R0B0mmmm].
4258 ((x >> 11 ) & 0x0400u) ; // [00000000|00L00000|WvvvvLpp|R0B0mmmm].
4259
4260 // Check if VEX3 is required / forced: [........|........|x.......|..x..x..].
4261 if (x & 0x0008024u) {
4262 uint32_t xorMsk = x86VEXPrefix[x & 0xF] | (opcode << 24);
4263
4264 // Clear 'FORCE-VEX3' bit and all high bits.
4265 x = (x & (0x4 ^ 0xFFFF)) << 8; // [00000000|WvvvvLpp|R0B0m0mm|00000000].
4266 // ____ _ _
4267 x ^= xorMsk; // [_OPCODE_|WvvvvLpp|R1Bmmmmm|VEX3|XOP].
4268 writer.emit32uLE(x);
4269
4270 rbReg &= 0x7;
4271 writer.emit8(x86EncodeMod(3, opReg, rbReg));
4272 writer.emitImmByteOrDWord(uint64_t(immValue), immSize);
4273 goto EmitDone;
4274 }
4275 else {
4276 // 'mmmmm' must be '00001'.
4277 ASMJIT_ASSERT((x & 0x1F) == 0x01);
4278
4279 x = ((x >> 8) ^ x) ^ 0xF9;
4280 writer.emit8(kX86ByteVex2);
4281 writer.emit8(x);
4282 writer.emit8(opcode.v);
4283
4284 rbReg &= 0x7;
4285 writer.emit8(x86EncodeMod(3, opReg, rbReg));
4286 writer.emitImmByteOrDWord(uint64_t(immValue), immSize);
4287 goto EmitDone;
4288 }
4289 }
4290
4291 EmitVexEvexM:
4292 ASMJIT_ASSERT(rmRel != nullptr);
4293 ASMJIT_ASSERT(rmRel->opType() == Operand::kOpMem);
4294
4295 rmInfo = x86MemInfo[rmRel->as<Mem>().baseAndIndexTypes()];
4296 writer.emitSegmentOverride(rmRel->as<Mem>().segmentId());
4297
4298 memOpAOMark = writer.cursor();
4299 writer.emitAddressOverride((rmInfo & _addressOverrideMask()) != 0);
4300
4301 rbReg = rmRel->as<Mem>().hasBaseReg() ? rmRel->as<Mem>().baseId() : uint32_t(0);
4302 rxReg = rmRel->as<Mem>().hasIndexReg() ? rmRel->as<Mem>().indexId() : uint32_t(0);
4303
4304 {
4305 uint32_t broadcastBit = uint32_t(rmRel->as<Mem>().hasBroadcast());
4306
4307 // Construct `x` - a complete EVEX|VEX prefix.
4308 uint32_t x = ((opReg << 4) & 0x0000F980u) | // [........|........|Vvvvv..R|R.......].
4309 ((rxReg << 3) & 0x00000040u) | // [........|........|........|.X......].
4310 ((rxReg << 15) & 0x00080000u) | // [........|....X...|........|........].
4311 ((rbReg << 2) & 0x00000020u) | // [........|........|........|..B.....].
4312 opcode.extractLLMM(options) | // [........|.LL.X...|Vvvvv..R|RXBmmmmm].
4313 (_extraReg.id() << 16) | // [........|.LL.Xaaa|Vvvvv..R|RXBmmmmm].
4314 (broadcastBit << 20) ; // [........|.LLbXaaa|Vvvvv..R|RXBmmmmm].
4315 opReg &= 0x07u;
4316
4317 // Mark invalid VEX (force EVEX) case: // [@.......|.LLbXaaa|Vvvvv..R|RXBmmmmm].
4318 x |= (~commonInfo->flags() & InstDB::kFlagVex) << (31 - Support::constCtz(InstDB::kFlagVex));
4319
4320 // Handle AVX512 options by a single branch.
4321 const uint32_t kAvx512Options = Inst::kOptionZMask |
4322 Inst::kOptionER |
4323 Inst::kOptionSAE ;
4324 if (options & kAvx512Options) {
4325 // {er} and {sae} are both invalid if memory operand is used.
4326 if (ASMJIT_UNLIKELY(options & (Inst::kOptionER | Inst::kOptionSAE)))
4327 goto InvalidEROrSAE;
4328
4329 x |= options & (Inst::kOptionZMask); // [@.......|zLLbXaaa|Vvvvv..R|RXBmmmmm].
4330 }
4331
4332 // Check if EVEX is required by checking bits in `x` : [@.......|xx.xxxxx|x......x|...x....].
4333 if (x & 0x80DF8110u) {
4334 uint32_t y = ((x << 4) & 0x00080000u) | // [@.......|....V...|........|........].
4335 ((x >> 4) & 0x00000010u) ; // [@.......|....V...|........|...R....].
4336 x = (x & 0x00FF78E3u) | y; // [........|zLLbVaaa|0vvvv000|RXBR00mm].
4337 x = x << 8; // [zLLbVaaa|0vvvv000|RBBR00mm|00000000].
4338 x |= (opcode >> kVSHR_W ) & 0x00800000u; // [zLLbVaaa|Wvvvv000|RBBR00mm|00000000].
4339 x |= (opcode >> kVSHR_PP_EW) & 0x00830000u; // [zLLbVaaa|Wvvvv0pp|RBBR00mm|00000000] (added PP and EVEX.W).
4340 // _ ____ ____
4341 x ^= 0x087CF000u | kX86ByteEvex; // [zLLbVaaa|Wvvvv1pp|RBBR00mm|01100010].
4342
4343 writer.emit32uLE(x);
4344 writer.emit8(opcode.v);
4345
4346 if (x & 0x10000000u) {
4347 // Broadcast, change the compressed displacement scale to either x4 (SHL 2) or x8 (SHL 3)
4348 // depending on instruction's W. If 'W' is 1 'SHL' must be 3, otherwise it must be 2.
4349 opcode &=~uint32_t(Opcode::kCDSHL_Mask);
4350 opcode |= ((x & 0x00800000u) ? 3u : 2u) << Opcode::kCDSHL_Shift;
4351 }
4352 else {
4353 // Add the compressed displacement 'SHF' to the opcode based on 'TTWLL'.
4354 // The index to `x86CDisp8SHL` is composed as `CDTT[4:3] | W[2] | LL[1:0]`.
4355 uint32_t TTWLL = ((opcode >> (Opcode::kCDTT_Shift - 3)) & 0x18) +
4356 ((opcode >> (Opcode::kW_Shift - 2)) & 0x04) +
4357 ((x >> 29) & 0x3);
4358 opcode += x86CDisp8SHL[TTWLL];
4359 }
4360 }
4361 else {
4362 // Not EVEX, prepare `x` for VEX2 or VEX3: x = [........|00L00000|0vvvv000|RXB0mmmm].
4363 x |= ((opcode >> (kVSHR_W + 8)) & 0x8000u) | // [00000000|00L00000|Wvvvv000|RXB0mmmm].
4364 ((opcode >> (kVSHR_PP + 8)) & 0x0300u) | // [00000000|00L00000|Wvvvv0pp|RXB0mmmm].
4365 ((x >> 11 ) & 0x0400u) ; // [00000000|00L00000|WvvvvLpp|RXB0mmmm].
4366
4367 // Clear a possible CDisp specified by EVEX.
4368 opcode &= ~Opcode::kCDSHL_Mask;
4369
4370 // Check if VEX3 is required / forced: [........|........|x.......|.xx..x..].
4371 if (x & 0x0008064u) {
4372 uint32_t xorMsk = x86VEXPrefix[x & 0xF] | (opcode << 24);
4373
4374 // Clear 'FORCE-VEX3' bit and all high bits.
4375 x = (x & (0x4 ^ 0xFFFF)) << 8; // [00000000|WvvvvLpp|RXB0m0mm|00000000].
4376 // ____ ___
4377 x ^= xorMsk; // [_OPCODE_|WvvvvLpp|RXBmmmmm|VEX3_XOP].
4378 writer.emit32uLE(x);
4379 }
4380 else {
4381 // 'mmmmm' must be '00001'.
4382 ASMJIT_ASSERT((x & 0x1F) == 0x01);
4383
4384 x = ((x >> 8) ^ x) ^ 0xF9;
4385 writer.emit8(kX86ByteVex2);
4386 writer.emit8(x);
4387 writer.emit8(opcode.v);
4388 }
4389 }
4390 }
4391
4392 // MOD|SIB address.
4393 if (!commonInfo->hasFlag(InstDB::kFlagVsib))
4394 goto EmitModSib;
4395
4396 // MOD|VSIB address without INDEX is invalid.
4397 if (rmInfo & kX86MemInfo_Index)
4398 goto EmitModVSib;
4399 goto InvalidInstruction;
4400
4401 // --------------------------------------------------------------------------
4402 // [Emit - Jmp/Jcc/Call]
4403 // --------------------------------------------------------------------------
4404
4405 EmitJmpCall:
4406 {
4407 // Emit REX prefix if asked for (64-bit only).
4408 uint32_t rex = opcode.extractRex(options);
4409 if (ASMJIT_UNLIKELY(x86IsRexInvalid(rex)))
4410 goto InvalidRexPrefix;
4411 rex &= ~kX86ByteInvalidRex & 0xFF;
4412 writer.emit8If(rex | kX86ByteRex, rex != 0);
4413
4414 uint64_t ip = uint64_t(writer.offsetFrom(_bufferData));
4415 uint32_t rel32 = 0;
4416 uint32_t opCode8 = x86AltOpcodeOf(instInfo);
4417
4418 uint32_t inst8Size = 1 + 1; // OPCODE + REL8 .
4419 uint32_t inst32Size = 1 + 4; // [PREFIX] OPCODE + REL32.
4420
4421 // Jcc instructions with 32-bit displacement use 0x0F prefix,
4422 // other instructions don't. No other prefixes are used by X86.
4423 ASMJIT_ASSERT((opCode8 & Opcode::kMM_Mask) == 0);
4424 ASMJIT_ASSERT((opcode & Opcode::kMM_Mask) == 0 ||
4425 (opcode & Opcode::kMM_Mask) == Opcode::kMM_0F);
4426
4427 // Only one of these should be used at the same time.
4428 inst32Size += uint32_t(opReg != 0);
4429 inst32Size += uint32_t((opcode & Opcode::kMM_Mask) == Opcode::kMM_0F);
4430
4431 if (rmRel->isLabel()) {
4432 label = _code->labelEntry(rmRel->as<Label>());
4433 if (ASMJIT_UNLIKELY(!label))
4434 goto InvalidLabel;
4435
4436 if (label->isBoundTo(_section)) {
4437 // Label bound to the current section.
4438 rel32 = uint32_t((label->offset() - ip - inst32Size) & 0xFFFFFFFFu);
4439 goto EmitJmpCallRel;
4440 }
4441 else {
4442 // Non-bound label or label bound to a different section.
4443 if (opCode8 && (!opcode.v || (options & Inst::kOptionShortForm))) {
4444 writer.emit8(opCode8);
4445
4446 // Record DISP8 (non-bound label).
4447 relOffset = -1;
4448 relSize = 1;
4449 goto EmitRel;
4450 }
4451 else {
4452 // Refuse also 'short' prefix, if specified.
4453 if (ASMJIT_UNLIKELY(!opcode.v || (options & Inst::kOptionShortForm) != 0))
4454 goto InvalidDisplacement;
4455
4456 writer.emit8If(0x0F, (opcode & Opcode::kMM_Mask) != 0);// Emit 0F prefix.
4457 writer.emit8(opcode.v); // Emit opcode.
4458 writer.emit8If(x86EncodeMod(3, opReg, 0), opReg != 0); // Emit MOD.
4459
4460 // Record DISP32 (non-bound label).
4461 relOffset = -4;
4462 relSize = 4;
4463 goto EmitRel;
4464 }
4465 }
4466 }
4467
4468 if (rmRel->isImm()) {
4469 uint64_t baseAddress = code()->baseAddress();
4470 uint64_t jumpAddress = rmRel->as<Imm>().valueAs<uint64_t>();
4471
4472 // If the base-address is known calculate a relative displacement and
4473 // check if it fits in 32 bits (which is always true in 32-bit mode).
4474 // Emit relative displacement as it was a bound label if all checks are ok.
4475 if (baseAddress != Globals::kNoBaseAddress) {
4476 uint64_t rel64 = jumpAddress - (ip + baseAddress) - inst32Size;
4477 if (Environment::is32Bit(arch()) || Support::isInt32(int64_t(rel64))) {
4478 rel32 = uint32_t(rel64 & 0xFFFFFFFFu);
4479 goto EmitJmpCallRel;
4480 }
4481 else {
4482 // Relative displacement exceeds 32-bits - relocator can only
4483 // insert trampoline for jmp/call, but not for jcc/jecxz.
4484 if (ASMJIT_UNLIKELY(!x86IsJmpOrCall(instId)))
4485 goto InvalidDisplacement;
4486 }
4487 }
4488
4489 err = _code->newRelocEntry(&re, RelocEntry::kTypeAbsToRel, 0);
4490 if (ASMJIT_UNLIKELY(err))
4491 goto Failed;
4492
4493 re->_sourceOffset = offset();
4494 re->_sourceSectionId = _section->id();
4495 re->_payload = jumpAddress;
4496
4497 if (ASMJIT_LIKELY(opcode.v)) {
4498 // 64-bit: Emit REX prefix so the instruction can be patched later.
4499 // REX prefix does nothing if not patched, but allows to patch the
4500 // instruction to use MOD/M and to point to a memory where the final
4501 // 64-bit address is stored.
4502 if (Environment::is64Bit(arch()) && x86IsJmpOrCall(instId)) {
4503 if (!rex)
4504 writer.emit8(kX86ByteRex);
4505
4506 err = _code->addAddressToAddressTable(jumpAddress);
4507 if (ASMJIT_UNLIKELY(err))
4508 goto Failed;
4509
4510 re->_relocType = RelocEntry::kTypeX64AddressEntry;
4511 }
4512
4513 writer.emit8If(0x0F, (opcode & Opcode::kMM_Mask) != 0); // Emit 0F prefix.
4514 writer.emit8(opcode.v); // Emit opcode.
4515 writer.emit8If(x86EncodeMod(3, opReg, 0), opReg != 0); // Emit MOD.
4516 writer.emit32uLE(0); // Emit DISP32.
4517
4518 re->_valueSize = 4;
4519 re->_leadingSize = uint8_t(writer.offsetFrom(_bufferPtr) - 4);
4520 re->_trailingSize = uint8_t(immSize);
4521 }
4522 else {
4523 writer.emit8(opCode8); // Emit opcode.
4524 writer.emit8(0); // Emit DISP8 (zero).
4525
4526 re->_valueSize = 1;
4527 re->_leadingSize = uint8_t(writer.offsetFrom(_bufferPtr) - 1);
4528 re->_trailingSize = uint8_t(immSize);
4529 }
4530 goto EmitDone;
4531 }
4532
4533 // Not Label|Imm -> Invalid.
4534 goto InvalidInstruction;
4535
4536 // Emit jmp/call with relative displacement known at assembly-time. Decide
4537 // between 8-bit and 32-bit displacement encoding. Some instructions only
4538 // allow either 8-bit or 32-bit encoding, others allow both encodings.
4539 EmitJmpCallRel:
4540 if (Support::isInt8(int32_t(rel32 + inst32Size - inst8Size)) && opCode8 && !(options & Inst::kOptionLongForm)) {
4541 options |= Inst::kOptionShortForm;
4542 writer.emit8(opCode8); // Emit opcode
4543 writer.emit8(rel32 + inst32Size - inst8Size); // Emit DISP8.
4544 goto EmitDone;
4545 }
4546 else {
4547 if (ASMJIT_UNLIKELY(!opcode.v || (options & Inst::kOptionShortForm) != 0))
4548 goto InvalidDisplacement;
4549
4550 options &= ~Inst::kOptionShortForm;
4551 writer.emit8If(0x0F, (opcode & Opcode::kMM_Mask) != 0); // Emit 0x0F prefix.
4552 writer.emit8(opcode.v); // Emit Opcode.
4553 writer.emit8If(x86EncodeMod(3, opReg, 0), opReg != 0); // Emit MOD.
4554 writer.emit32uLE(rel32); // Emit DISP32.
4555 goto EmitDone;
4556 }
4557 }
4558
4559 // --------------------------------------------------------------------------
4560 // [Emit - Relative]
4561 // --------------------------------------------------------------------------
4562
4563 EmitRel:
4564 {
4565 ASMJIT_ASSERT(relSize == 1 || relSize == 4);
4566
4567 // Chain with label.
4568 size_t offset = size_t(writer.offsetFrom(_bufferData));
4569 LabelLink* link = _code->newLabelLink(label, _section->id(), offset, relOffset);
4570
4571 if (ASMJIT_UNLIKELY(!link))
4572 goto OutOfMemory;
4573
4574 if (re)
4575 link->relocId = re->id();
4576
4577 // Emit label size as dummy data.
4578 if (relSize == 1)
4579 writer.emit8(0x01);
4580 else // if (relSize == 4)
4581 writer.emit32uLE(0x04040404);
4582 }
4583 writer.emitImmediate(uint64_t(immValue), immSize);
4584
4585 // --------------------------------------------------------------------------
4586 // [Done]
4587 // --------------------------------------------------------------------------
4588
4589 EmitDone:
4590 if (ASMJIT_UNLIKELY(options & Inst::kOptionReserved)) {
4591 #ifndef ASMJIT_NO_LOGGING
4592 if (_logger)
4593 EmitterUtils::logInstructionEmitted(this, instId, options, o0, o1, o2, opExt, relSize, immSize, writer.cursor());
4594 #endif
4595 }
4596
4597 resetExtraReg();
4598 resetInstOptions();
4599 resetInlineComment();
4600
4601 writer.done(this);
4602 return kErrorOk;
4603
4604 // --------------------------------------------------------------------------
4605 // [Error Cases]
4606 // --------------------------------------------------------------------------
4607
4608 #define ERROR_HANDLER(ERROR) \
4609 ERROR: \
4610 err = DebugUtils::errored(kError##ERROR); \
4611 goto Failed;
4612
4613 ERROR_HANDLER(OutOfMemory)
4614 ERROR_HANDLER(InvalidLabel)
4615 ERROR_HANDLER(InvalidInstruction)
4616 ERROR_HANDLER(InvalidLockPrefix)
4617 ERROR_HANDLER(InvalidXAcquirePrefix)
4618 ERROR_HANDLER(InvalidXReleasePrefix)
4619 ERROR_HANDLER(InvalidRepPrefix)
4620 ERROR_HANDLER(InvalidRexPrefix)
4621 ERROR_HANDLER(InvalidEROrSAE)
4622 ERROR_HANDLER(InvalidAddress)
4623 ERROR_HANDLER(InvalidAddressIndex)
4624 ERROR_HANDLER(InvalidAddress64Bit)
4625 ERROR_HANDLER(InvalidDisplacement)
4626 ERROR_HANDLER(InvalidSegment)
4627 ERROR_HANDLER(InvalidImmediate)
4628 ERROR_HANDLER(OperandSizeMismatch)
4629 ERROR_HANDLER(AmbiguousOperandSize)
4630 ERROR_HANDLER(NotConsecutiveRegs)
4631
4632 #undef ERROR_HANDLER
4633
4634 Failed:
4635 #ifndef ASMJIT_NO_LOGGING
4636 return EmitterUtils::logInstructionFailed(this, err, instId, options, o0, o1, o2, opExt);
4637 #else
4638 resetExtraReg();
4639 resetInstOptions();
4640 resetInlineComment();
4641 return reportError(err);
4642 #endif
4643 }
4644
4645 // ============================================================================
4646 // [asmjit::x86::Assembler - Align]
4647 // ============================================================================
4648
align(uint32_t alignMode,uint32_t alignment)4649 Error Assembler::align(uint32_t alignMode, uint32_t alignment) {
4650 if (ASMJIT_UNLIKELY(!_code))
4651 return reportError(DebugUtils::errored(kErrorNotInitialized));
4652
4653 if (ASMJIT_UNLIKELY(alignMode >= kAlignCount))
4654 return reportError(DebugUtils::errored(kErrorInvalidArgument));
4655
4656 if (alignment <= 1)
4657 return kErrorOk;
4658
4659 if (ASMJIT_UNLIKELY(!Support::isPowerOf2(alignment) || alignment > Globals::kMaxAlignment))
4660 return reportError(DebugUtils::errored(kErrorInvalidArgument));
4661
4662 uint32_t i = uint32_t(Support::alignUpDiff<size_t>(offset(), alignment));
4663 if (i > 0) {
4664 CodeBufferWriter writer(this);
4665 ASMJIT_PROPAGATE(writer.ensureSpace(this, i));
4666
4667 uint8_t pattern = 0x00;
4668 switch (alignMode) {
4669 case kAlignCode: {
4670 if (hasEncodingOption(kEncodingOptionOptimizedAlign)) {
4671 // Intel 64 and IA-32 Architectures Software Developer's Manual - Volume 2B (NOP).
4672 enum { kMaxNopSize = 9 };
4673
4674 static const uint8_t nopData[kMaxNopSize][kMaxNopSize] = {
4675 { 0x90 },
4676 { 0x66, 0x90 },
4677 { 0x0F, 0x1F, 0x00 },
4678 { 0x0F, 0x1F, 0x40, 0x00 },
4679 { 0x0F, 0x1F, 0x44, 0x00, 0x00 },
4680 { 0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00 },
4681 { 0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00 },
4682 { 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 },
4683 { 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 }
4684 };
4685
4686 do {
4687 uint32_t n = Support::min<uint32_t>(i, kMaxNopSize);
4688 const uint8_t* src = nopData[n - 1];
4689
4690 i -= n;
4691 do {
4692 writer.emit8(*src++);
4693 } while (--n);
4694 } while (i);
4695 }
4696
4697 pattern = 0x90;
4698 break;
4699 }
4700
4701 case kAlignData:
4702 pattern = 0xCC;
4703 break;
4704
4705 case kAlignZero:
4706 // Pattern already set to zero.
4707 break;
4708 }
4709
4710 while (i) {
4711 writer.emit8(pattern);
4712 i--;
4713 }
4714
4715 writer.done(this);
4716 }
4717
4718 #ifndef ASMJIT_NO_LOGGING
4719 if (_logger) {
4720 StringTmp<128> sb;
4721 sb.appendChars(' ', _logger->indentation(FormatOptions::kIndentationCode));
4722 sb.appendFormat("align %u\n", alignment);
4723 _logger->log(sb);
4724 }
4725 #endif
4726
4727 return kErrorOk;
4728 }
4729
4730 // ============================================================================
4731 // [asmjit::x86::Assembler - Events]
4732 // ============================================================================
4733
onAttach(CodeHolder * code)4734 Error Assembler::onAttach(CodeHolder* code) noexcept {
4735 uint32_t arch = code->arch();
4736 if (!Environment::isFamilyX86(arch))
4737 return DebugUtils::errored(kErrorInvalidArch);
4738
4739 ASMJIT_PROPAGATE(Base::onAttach(code));
4740
4741 if (Environment::is32Bit(arch)) {
4742 // 32 bit architecture - X86.
4743 _gpRegInfo.setSignature(Gpd::kSignature);
4744 _forcedInstOptions |= Inst::_kOptionInvalidRex;
4745 _setAddressOverrideMask(kX86MemInfo_67H_X86);
4746 }
4747 else {
4748 // 64 bit architecture - X64.
4749 _gpRegInfo.setSignature(Gpq::kSignature);
4750 _forcedInstOptions &= ~Inst::_kOptionInvalidRex;
4751 _setAddressOverrideMask(kX86MemInfo_67H_X64);
4752 }
4753
4754 return kErrorOk;
4755 }
4756
onDetach(CodeHolder * code)4757 Error Assembler::onDetach(CodeHolder* code) noexcept {
4758 _forcedInstOptions &= ~Inst::_kOptionInvalidRex;
4759 _setAddressOverrideMask(0);
4760
4761 return Base::onDetach(code);
4762 }
4763
4764 ASMJIT_END_SUB_NAMESPACE
4765
4766 #endif // ASMJIT_BUILD_X86
4767