1 #pragma once
2 #ifndef XBYAK_XBYAK_H_
3 #define XBYAK_XBYAK_H_
4 /*!
5 @file xbyak.h
6 @brief Xbyak ; JIT assembler for x86(IA32)/x64 by C++
7 @author herumi
8 @url https://github.com/herumi/xbyak
9 @note modified new BSD license
10 http://opensource.org/licenses/BSD-3-Clause
11 */
12 #if (not +0) && !defined(XBYAK_NO_OP_NAMES) // trick to detect whether 'not' is operator or not
13 #define XBYAK_NO_OP_NAMES
14 #endif
15
16 #include <stdio.h> // for debug print
17 #include <assert.h>
18 #include <list>
19 #include <string>
20 #include <algorithm>
21 #ifndef NDEBUG
22 #include <iostream>
23 #endif
24
25 // #define XBYAK_DISABLE_AVX512
26
27 #if !defined(XBYAK_USE_MMAP_ALLOCATOR) && !defined(XBYAK_DONT_USE_MMAP_ALLOCATOR)
28 #define XBYAK_USE_MMAP_ALLOCATOR
29 #endif
30 #if !defined(__GNUC__) || defined(__MINGW32__)
31 #undef XBYAK_USE_MMAP_ALLOCATOR
32 #endif
33
34 #ifdef __GNUC__
35 #define XBYAK_GNUC_PREREQ(major, minor) ((__GNUC__) * 100 + (__GNUC_MINOR__) >= (major) * 100 + (minor))
36 #else
37 #define XBYAK_GNUC_PREREQ(major, minor) 0
38 #endif
39
40 // This covers -std=(gnu|c)++(0x|11|1y), -stdlib=libc++, and modern Microsoft.
41 #if ((defined(_MSC_VER) && (_MSC_VER >= 1600)) || defined(_LIBCPP_VERSION) ||\
42 ((__cplusplus >= 201103) || defined(__GXX_EXPERIMENTAL_CXX0X__)))
43 #include <unordered_set>
44 #define XBYAK_STD_UNORDERED_SET std::unordered_set
45 #include <unordered_map>
46 #define XBYAK_STD_UNORDERED_MAP std::unordered_map
47 #define XBYAK_STD_UNORDERED_MULTIMAP std::unordered_multimap
48
49 /*
50 Clang/llvm-gcc and ICC-EDG in 'GCC-mode' always claim to be GCC 4.2, using
51 libstdcxx 20070719 (from GCC 4.2.1, the last GPL 2 version).
52 */
53 #elif XBYAK_GNUC_PREREQ(4, 5) || (XBYAK_GNUC_PREREQ(4, 2) && __GLIBCXX__ >= 20070719) || defined(__INTEL_COMPILER) || defined(__llvm__)
54 #include <tr1/unordered_set>
55 #define XBYAK_STD_UNORDERED_SET std::tr1::unordered_set
56 #include <tr1/unordered_map>
57 #define XBYAK_STD_UNORDERED_MAP std::tr1::unordered_map
58 #define XBYAK_STD_UNORDERED_MULTIMAP std::tr1::unordered_multimap
59
60 #elif defined(_MSC_VER) && (_MSC_VER >= 1500) && (_MSC_VER < 1600)
61 #include <unordered_set>
62 #define XBYAK_STD_UNORDERED_SET std::tr1::unordered_set
63 #include <unordered_map>
64 #define XBYAK_STD_UNORDERED_MAP std::tr1::unordered_map
65 #define XBYAK_STD_UNORDERED_MULTIMAP std::tr1::unordered_multimap
66
67 #else
68 #include <set>
69 #define XBYAK_STD_UNORDERED_SET std::set
70 #include <map>
71 #define XBYAK_STD_UNORDERED_MAP std::map
72 #define XBYAK_STD_UNORDERED_MULTIMAP std::multimap
73 #endif
74 #ifdef _WIN32
75 #ifndef WIN32_LEAN_AND_MEAN
76 #define WIN32_LEAN_AND_MEAN
77 #endif
78 #include <windows.h>
79 #include <malloc.h>
80 #define XBYAK_TLS __declspec(thread)
81 #elif defined(__GNUC__)
82 #include <unistd.h>
83 #include <sys/mman.h>
84 #include <stdlib.h>
85 #define XBYAK_TLS __thread
86 #endif
87 #if defined(__APPLE__) && !defined(XBYAK_DONT_USE_MAP_JIT)
88 #define XBYAK_USE_MAP_JIT
89 #include <sys/sysctl.h>
90 #ifndef MAP_JIT
91 #define MAP_JIT 0x800
92 #endif
93 #endif
94 #if !defined(_MSC_VER) || (_MSC_VER >= 1600)
95 #include <stdint.h>
96 #endif
97
98 #if !defined(MFD_CLOEXEC) // defined only linux 3.17 or later
99 #undef XBYAK_USE_MEMFD
100 #endif
101
102 #if defined(_WIN64) || defined(__MINGW64__) || (defined(__CYGWIN__) && defined(__x86_64__))
103 #define XBYAK64_WIN
104 #elif defined(__x86_64__)
105 #define XBYAK64_GCC
106 #endif
107 #if !defined(XBYAK64) && !defined(XBYAK32)
108 #if defined(XBYAK64_GCC) || defined(XBYAK64_WIN)
109 #define XBYAK64
110 #else
111 #define XBYAK32
112 #endif
113 #endif
114
115 #if (__cplusplus >= 201103) || (defined(_MSC_VER) && _MSC_VER >= 1800)
116 #undef XBYAK_TLS
117 #define XBYAK_TLS thread_local
118 #define XBYAK_VARIADIC_TEMPLATE
119 #define XBYAK_NOEXCEPT noexcept
120 #else
121 #define XBYAK_NOEXCEPT throw()
122 #endif
123
124 // require c++14 or later
125 // Visual Studio 2017 version 15.0 or later
126 // g++-6 or later
127 #if ((__cplusplus >= 201402L) && !(!defined(__clang__) && defined(__GNUC__) && (__GNUC__ <= 5))) || (defined(_MSC_VER) && _MSC_VER >= 1910)
128 #define XBYAK_CONSTEXPR constexpr
129 #else
130 #define XBYAK_CONSTEXPR
131 #endif
132
133 #ifdef _MSC_VER
134 #pragma warning(push)
135 #pragma warning(disable : 4514) /* remove inline function */
136 #pragma warning(disable : 4786) /* identifier is too long */
137 #pragma warning(disable : 4503) /* name is too long */
138 #pragma warning(disable : 4127) /* constant expresison */
139 #endif
140
141 namespace Xbyak {
142
143 enum {
144 DEFAULT_MAX_CODE_SIZE = 4096,
145 VERSION = 0x6010 /* 0xABCD = A.BC(D) */
146 };
147
148 #ifndef MIE_INTEGER_TYPE_DEFINED
149 #define MIE_INTEGER_TYPE_DEFINED
150 // for backward compatibility
151 typedef uint64_t uint64;
152 typedef int64_t sint64;
153 typedef uint32_t uint32;
154 typedef uint16_t uint16;
155 typedef uint8_t uint8;
156 #endif
157
158 #ifndef MIE_ALIGN
159 #ifdef _MSC_VER
160 #define MIE_ALIGN(x) __declspec(align(x))
161 #else
162 #define MIE_ALIGN(x) __attribute__((aligned(x)))
163 #endif
164 #endif
165 #ifndef MIE_PACK // for shufps
166 #define MIE_PACK(x, y, z, w) ((x) * 64 + (y) * 16 + (z) * 4 + (w))
167 #endif
168
169 enum {
170 ERR_NONE = 0,
171 ERR_BAD_ADDRESSING,
172 ERR_CODE_IS_TOO_BIG,
173 ERR_BAD_SCALE,
174 ERR_ESP_CANT_BE_INDEX,
175 ERR_BAD_COMBINATION,
176 ERR_BAD_SIZE_OF_REGISTER,
177 ERR_IMM_IS_TOO_BIG,
178 ERR_BAD_ALIGN,
179 ERR_LABEL_IS_REDEFINED,
180 ERR_LABEL_IS_TOO_FAR,
181 ERR_LABEL_IS_NOT_FOUND,
182 ERR_CODE_ISNOT_COPYABLE,
183 ERR_BAD_PARAMETER,
184 ERR_CANT_PROTECT,
185 ERR_CANT_USE_64BIT_DISP,
186 ERR_OFFSET_IS_TOO_BIG,
187 ERR_MEM_SIZE_IS_NOT_SPECIFIED,
188 ERR_BAD_MEM_SIZE,
189 ERR_BAD_ST_COMBINATION,
190 ERR_OVER_LOCAL_LABEL, // not used
191 ERR_UNDER_LOCAL_LABEL,
192 ERR_CANT_ALLOC,
193 ERR_ONLY_T_NEAR_IS_SUPPORTED_IN_AUTO_GROW,
194 ERR_BAD_PROTECT_MODE,
195 ERR_BAD_PNUM,
196 ERR_BAD_TNUM,
197 ERR_BAD_VSIB_ADDRESSING,
198 ERR_CANT_CONVERT,
199 ERR_LABEL_ISNOT_SET_BY_L,
200 ERR_LABEL_IS_ALREADY_SET_BY_L,
201 ERR_BAD_LABEL_STR,
202 ERR_MUNMAP,
203 ERR_OPMASK_IS_ALREADY_SET,
204 ERR_ROUNDING_IS_ALREADY_SET,
205 ERR_K0_IS_INVALID,
206 ERR_EVEX_IS_INVALID,
207 ERR_SAE_IS_INVALID,
208 ERR_ER_IS_INVALID,
209 ERR_INVALID_BROADCAST,
210 ERR_INVALID_OPMASK_WITH_MEMORY,
211 ERR_INVALID_ZERO,
212 ERR_INVALID_RIP_IN_AUTO_GROW,
213 ERR_INVALID_MIB_ADDRESS,
214 ERR_X2APIC_IS_NOT_SUPPORTED,
215 ERR_NOT_SUPPORTED,
216 ERR_SAME_REGS_ARE_INVALID,
217 ERR_INTERNAL // Put it at last.
218 };
219
ConvertErrorToString(int err)220 inline const char *ConvertErrorToString(int err)
221 {
222 static const char *errTbl[] = {
223 "none",
224 "bad addressing",
225 "code is too big",
226 "bad scale",
227 "esp can't be index",
228 "bad combination",
229 "bad size of register",
230 "imm is too big",
231 "bad align",
232 "label is redefined",
233 "label is too far",
234 "label is not found",
235 "code is not copyable",
236 "bad parameter",
237 "can't protect",
238 "can't use 64bit disp(use (void*))",
239 "offset is too big",
240 "MEM size is not specified",
241 "bad mem size",
242 "bad st combination",
243 "over local label",
244 "under local label",
245 "can't alloc",
246 "T_SHORT is not supported in AutoGrow",
247 "bad protect mode",
248 "bad pNum",
249 "bad tNum",
250 "bad vsib addressing",
251 "can't convert",
252 "label is not set by L()",
253 "label is already set by L()",
254 "bad label string",
255 "err munmap",
256 "opmask is already set",
257 "rounding is already set",
258 "k0 is invalid",
259 "evex is invalid",
260 "sae(suppress all exceptions) is invalid",
261 "er(embedded rounding) is invalid",
262 "invalid broadcast",
263 "invalid opmask with memory",
264 "invalid zero",
265 "invalid rip in AutoGrow",
266 "invalid mib address",
267 "x2APIC is not supported",
268 "not supported",
269 "same regs are invalid",
270 "internal error"
271 };
272 assert(ERR_INTERNAL + 1 == sizeof(errTbl) / sizeof(*errTbl));
273 return err <= ERR_INTERNAL ? errTbl[err] : "unknown err";
274 }
275
276 #ifdef XBYAK_NO_EXCEPTION
277 namespace local {
278
GetErrorRef()279 inline int& GetErrorRef() {
280 static XBYAK_TLS int err = 0;
281 return err;
282 }
283
SetError(int err)284 inline void SetError(int err) {
285 if (local::GetErrorRef()) return; // keep the first err code
286 local::GetErrorRef() = err;
287 }
288
289 } // local
290
ClearError()291 inline void ClearError() {
292 local::GetErrorRef() = 0;
293 }
GetError()294 inline int GetError() { return local::GetErrorRef(); }
295
296 #define XBYAK_THROW(err) { local::SetError(err); return; }
297 #define XBYAK_THROW_RET(err, r) { local::SetError(err); return r; }
298
299 #else
300 class Error : public std::exception {
301 int err_;
302 public:
Error(int err)303 explicit Error(int err) : err_(err)
304 {
305 if (err_ < 0 || err_ > ERR_INTERNAL) {
306 err_ = ERR_INTERNAL;
307 }
308 }
309 operator int() const { return err_; }
what()310 const char *what() const XBYAK_NOEXCEPT
311 {
312 return ConvertErrorToString(err_);
313 }
314 };
315
316 // dummy functions
ClearError()317 inline void ClearError() { }
GetError()318 inline int GetError() { return 0; }
319
ConvertErrorToString(const Error & err)320 inline const char *ConvertErrorToString(const Error& err)
321 {
322 return err.what();
323 }
324
325 #define XBYAK_THROW(err) { throw Error(err); }
326 #define XBYAK_THROW_RET(err, r) { throw Error(err); }
327
328 #endif
329
AlignedMalloc(size_t size,size_t alignment)330 inline void *AlignedMalloc(size_t size, size_t alignment)
331 {
332 #ifdef __MINGW32__
333 return __mingw_aligned_malloc(size, alignment);
334 #elif defined(_WIN32)
335 return _aligned_malloc(size, alignment);
336 #else
337 void *p;
338 int ret = posix_memalign(&p, alignment, size);
339 return (ret == 0) ? p : 0;
340 #endif
341 }
342
AlignedFree(void * p)343 inline void AlignedFree(void *p)
344 {
345 #ifdef __MINGW32__
346 __mingw_aligned_free(p);
347 #elif defined(_MSC_VER)
348 _aligned_free(p);
349 #else
350 free(p);
351 #endif
352 }
353
354 template<class To, class From>
CastTo(From p)355 inline const To CastTo(From p) XBYAK_NOEXCEPT
356 {
357 return (const To)(size_t)(p);
358 }
359 namespace inner {
360
361 static const size_t ALIGN_PAGE_SIZE = 4096;
362
IsInDisp8(uint32_t x)363 inline bool IsInDisp8(uint32_t x) { return 0xFFFFFF80 <= x || x <= 0x7F; }
IsInInt32(uint64_t x)364 inline bool IsInInt32(uint64_t x) { return ~uint64_t(0x7fffffffu) <= x || x <= 0x7FFFFFFFU; }
365
VerifyInInt32(uint64_t x)366 inline uint32_t VerifyInInt32(uint64_t x)
367 {
368 #ifdef XBYAK64
369 if (!IsInInt32(x)) XBYAK_THROW_RET(ERR_OFFSET_IS_TOO_BIG, 0)
370 #endif
371 return static_cast<uint32_t>(x);
372 }
373
374 enum LabelMode {
375 LasIs, // as is
376 Labs, // absolute
377 LaddTop // (addr + top) for mov(reg, label) with AutoGrow
378 };
379
380 } // inner
381
382 /*
383 custom allocator
384 */
385 struct Allocator {
allocAllocator386 virtual uint8_t *alloc(size_t size) { return reinterpret_cast<uint8_t*>(AlignedMalloc(size, inner::ALIGN_PAGE_SIZE)); }
freeAllocator387 virtual void free(uint8_t *p) { AlignedFree(p); }
~AllocatorAllocator388 virtual ~Allocator() {}
389 /* override to return false if you call protect() manually */
useProtectAllocator390 virtual bool useProtect() const { return true; }
391 };
392
393 #ifdef XBYAK_USE_MMAP_ALLOCATOR
394 #ifdef XBYAK_USE_MAP_JIT
395 namespace util {
396
getMacOsVersionPure()397 inline int getMacOsVersionPure()
398 {
399 char buf[64];
400 size_t size = sizeof(buf);
401 int err = sysctlbyname("kern.osrelease", buf, &size, NULL, 0);
402 if (err != 0) return 0;
403 char *endp;
404 int major = strtol(buf, &endp, 10);
405 if (*endp != '.') return 0;
406 return major;
407 }
408
getMacOsVersion()409 inline int getMacOsVersion()
410 {
411 static const int version = getMacOsVersionPure();
412 return version;
413 }
414
415 } // util
416 #endif
417 class MmapAllocator : Allocator {
418 typedef XBYAK_STD_UNORDERED_MAP<uintptr_t, size_t> SizeList;
419 SizeList sizeList_;
420 public:
alloc(size_t size)421 uint8_t *alloc(size_t size)
422 {
423 const size_t alignedSizeM1 = inner::ALIGN_PAGE_SIZE - 1;
424 size = (size + alignedSizeM1) & ~alignedSizeM1;
425 #if defined(MAP_ANONYMOUS)
426 int mode = MAP_PRIVATE | MAP_ANONYMOUS;
427 #elif defined(MAP_ANON)
428 int mode = MAP_PRIVATE | MAP_ANON;
429 #else
430 #error "not supported"
431 #endif
432 #if defined(XBYAK_USE_MAP_JIT)
433 const int mojaveVersion = 18;
434 if (util::getMacOsVersion() >= mojaveVersion) mode |= MAP_JIT;
435 #endif
436 int fd = -1;
437 #if defined(XBYAK_USE_MEMFD)
438 fd = memfd_create("xbyak", MFD_CLOEXEC);
439 if (fd != -1) {
440 mode = MAP_SHARED;
441 if (ftruncate(fd, size) != 0) XBYAK_THROW_RET(ERR_CANT_ALLOC, 0)
442 }
443 #endif
444 void *p = mmap(NULL, size, PROT_READ | PROT_WRITE, mode, fd, 0);
445 #if defined(XBYAK_USE_MEMFD)
446 if (fd != -1) close(fd);
447 #endif
448 if (p == MAP_FAILED) XBYAK_THROW_RET(ERR_CANT_ALLOC, 0)
449 assert(p);
450 sizeList_[(uintptr_t)p] = size;
451 return (uint8_t*)p;
452 }
free(uint8_t * p)453 void free(uint8_t *p)
454 {
455 if (p == 0) return;
456 SizeList::iterator i = sizeList_.find((uintptr_t)p);
457 if (i == sizeList_.end()) XBYAK_THROW(ERR_BAD_PARAMETER)
458 if (munmap((void*)i->first, i->second) < 0) XBYAK_THROW(ERR_MUNMAP)
459 sizeList_.erase(i);
460 }
461 };
462 #endif
463
464 class Address;
465 class Reg;
466
467 class Operand {
468 static const uint8_t EXT8BIT = 0x20;
469 unsigned int idx_:6; // 0..31 + EXT8BIT = 1 if spl/bpl/sil/dil
470 unsigned int kind_:10;
471 unsigned int bit_:14;
472 protected:
473 unsigned int zero_:1;
474 unsigned int mask_:3;
475 unsigned int rounding_:3;
setIdx(int idx)476 void setIdx(int idx) { idx_ = idx; }
477 public:
478 enum Kind {
479 NONE = 0,
480 MEM = 1 << 0,
481 REG = 1 << 1,
482 MMX = 1 << 2,
483 FPU = 1 << 3,
484 XMM = 1 << 4,
485 YMM = 1 << 5,
486 ZMM = 1 << 6,
487 OPMASK = 1 << 7,
488 BNDREG = 1 << 8,
489 TMM = 1 << 9
490 };
491 enum Code {
492 #ifdef XBYAK64
493 RAX = 0, RCX, RDX, RBX, RSP, RBP, RSI, RDI, R8, R9, R10, R11, R12, R13, R14, R15,
494 R8D = 8, R9D, R10D, R11D, R12D, R13D, R14D, R15D,
495 R8W = 8, R9W, R10W, R11W, R12W, R13W, R14W, R15W,
496 R8B = 8, R9B, R10B, R11B, R12B, R13B, R14B, R15B,
497 SPL = 4, BPL, SIL, DIL,
498 #endif
499 EAX = 0, ECX, EDX, EBX, ESP, EBP, ESI, EDI,
500 AX = 0, CX, DX, BX, SP, BP, SI, DI,
501 AL = 0, CL, DL, BL, AH, CH, DH, BH
502 };
Operand()503 XBYAK_CONSTEXPR Operand() : idx_(0), kind_(0), bit_(0), zero_(0), mask_(0), rounding_(0) { }
504 XBYAK_CONSTEXPR Operand(int idx, Kind kind, int bit, bool ext8bit = 0)
505 : idx_(static_cast<uint8_t>(idx | (ext8bit ? EXT8BIT : 0)))
506 , kind_(kind)
507 , bit_(bit)
508 , zero_(0), mask_(0), rounding_(0)
509 {
510 assert((bit_ & (bit_ - 1)) == 0); // bit must be power of two
511 }
getKind()512 XBYAK_CONSTEXPR Kind getKind() const { return static_cast<Kind>(kind_); }
getIdx()513 XBYAK_CONSTEXPR int getIdx() const { return idx_ & (EXT8BIT - 1); }
isNone()514 XBYAK_CONSTEXPR bool isNone() const { return kind_ == 0; }
isMMX()515 XBYAK_CONSTEXPR bool isMMX() const { return is(MMX); }
isXMM()516 XBYAK_CONSTEXPR bool isXMM() const { return is(XMM); }
isYMM()517 XBYAK_CONSTEXPR bool isYMM() const { return is(YMM); }
isZMM()518 XBYAK_CONSTEXPR bool isZMM() const { return is(ZMM); }
isTMM()519 XBYAK_CONSTEXPR bool isTMM() const { return is(TMM); }
isXMEM()520 XBYAK_CONSTEXPR bool isXMEM() const { return is(XMM | MEM); }
isYMEM()521 XBYAK_CONSTEXPR bool isYMEM() const { return is(YMM | MEM); }
isZMEM()522 XBYAK_CONSTEXPR bool isZMEM() const { return is(ZMM | MEM); }
isOPMASK()523 XBYAK_CONSTEXPR bool isOPMASK() const { return is(OPMASK); }
isBNDREG()524 XBYAK_CONSTEXPR bool isBNDREG() const { return is(BNDREG); }
525 XBYAK_CONSTEXPR bool isREG(int bit = 0) const { return is(REG, bit); }
526 XBYAK_CONSTEXPR bool isMEM(int bit = 0) const { return is(MEM, bit); }
isFPU()527 XBYAK_CONSTEXPR bool isFPU() const { return is(FPU); }
isExt8bit()528 XBYAK_CONSTEXPR bool isExt8bit() const { return (idx_ & EXT8BIT) != 0; }
isExtIdx()529 XBYAK_CONSTEXPR bool isExtIdx() const { return (getIdx() & 8) != 0; }
isExtIdx2()530 XBYAK_CONSTEXPR bool isExtIdx2() const { return (getIdx() & 16) != 0; }
hasEvex()531 XBYAK_CONSTEXPR bool hasEvex() const { return isZMM() || isExtIdx2() || getOpmaskIdx() || getRounding(); }
hasRex()532 XBYAK_CONSTEXPR bool hasRex() const { return isExt8bit() || isREG(64) || isExtIdx(); }
hasZero()533 XBYAK_CONSTEXPR bool hasZero() const { return zero_; }
getOpmaskIdx()534 XBYAK_CONSTEXPR int getOpmaskIdx() const { return mask_; }
getRounding()535 XBYAK_CONSTEXPR int getRounding() const { return rounding_; }
setKind(Kind kind)536 void setKind(Kind kind)
537 {
538 if ((kind & (XMM|YMM|ZMM|TMM)) == 0) return;
539 kind_ = kind;
540 bit_ = kind == XMM ? 128 : kind == YMM ? 256 : kind == ZMM ? 512 : 8192;
541 }
542 // err if MMX/FPU/OPMASK/BNDREG
543 void setBit(int bit);
544 void setOpmaskIdx(int idx, bool /*ignore_idx0*/ = true)
545 {
546 if (mask_) XBYAK_THROW(ERR_OPMASK_IS_ALREADY_SET)
547 mask_ = idx;
548 }
setRounding(int idx)549 void setRounding(int idx)
550 {
551 if (rounding_) XBYAK_THROW(ERR_ROUNDING_IS_ALREADY_SET)
552 rounding_ = idx;
553 }
setZero()554 void setZero() { zero_ = true; }
555 // ah, ch, dh, bh?
isHigh8bit()556 bool isHigh8bit() const
557 {
558 if (!isBit(8)) return false;
559 if (isExt8bit()) return false;
560 const int idx = getIdx();
561 return AH <= idx && idx <= BH;
562 }
563 // any bit is accetable if bit == 0
564 XBYAK_CONSTEXPR bool is(int kind, uint32_t bit = 0) const
565 {
566 return (kind == 0 || (kind_ & kind)) && (bit == 0 || (bit_ & bit)); // cf. you can set (8|16)
567 }
isBit(uint32_t bit)568 XBYAK_CONSTEXPR bool isBit(uint32_t bit) const { return (bit_ & bit) != 0; }
getBit()569 XBYAK_CONSTEXPR uint32_t getBit() const { return bit_; }
toString()570 const char *toString() const
571 {
572 const int idx = getIdx();
573 if (kind_ == REG) {
574 if (isExt8bit()) {
575 static const char *tbl[4] = { "spl", "bpl", "sil", "dil" };
576 return tbl[idx - 4];
577 }
578 static const char *tbl[4][16] = {
579 { "al", "cl", "dl", "bl", "ah", "ch", "dh", "bh", "r8b", "r9b", "r10b", "r11b", "r12b", "r13b", "r14b", "r15b" },
580 { "ax", "cx", "dx", "bx", "sp", "bp", "si", "di", "r8w", "r9w", "r10w", "r11w", "r12w", "r13w", "r14w", "r15w" },
581 { "eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi", "r8d", "r9d", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d" },
582 { "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" },
583 };
584 return tbl[bit_ == 8 ? 0 : bit_ == 16 ? 1 : bit_ == 32 ? 2 : 3][idx];
585 } else if (isOPMASK()) {
586 static const char *tbl[8] = { "k0", "k1", "k2", "k3", "k4", "k5", "k6", "k7" };
587 return tbl[idx];
588 } else if (isTMM()) {
589 static const char *tbl[8] = {
590 "tmm0", "tmm1", "tmm2", "tmm3", "tmm4", "tmm5", "tmm6", "tmm7"
591 };
592 return tbl[idx];
593 } else if (isZMM()) {
594 static const char *tbl[32] = {
595 "zmm0", "zmm1", "zmm2", "zmm3", "zmm4", "zmm5", "zmm6", "zmm7", "zmm8", "zmm9", "zmm10", "zmm11", "zmm12", "zmm13", "zmm14", "zmm15",
596 "zmm16", "zmm17", "zmm18", "zmm19", "zmm20", "zmm21", "zmm22", "zmm23", "zmm24", "zmm25", "zmm26", "zmm27", "zmm28", "zmm29", "zmm30", "zmm31"
597 };
598 return tbl[idx];
599 } else if (isYMM()) {
600 static const char *tbl[32] = {
601 "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7", "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15",
602 "ymm16", "ymm17", "ymm18", "ymm19", "ymm20", "ymm21", "ymm22", "ymm23", "ymm24", "ymm25", "ymm26", "ymm27", "ymm28", "ymm29", "ymm30", "ymm31"
603 };
604 return tbl[idx];
605 } else if (isXMM()) {
606 static const char *tbl[32] = {
607 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15",
608 "xmm16", "xmm17", "xmm18", "xmm19", "xmm20", "xmm21", "xmm22", "xmm23", "xmm24", "xmm25", "xmm26", "xmm27", "xmm28", "xmm29", "xmm30", "xmm31"
609 };
610 return tbl[idx];
611 } else if (isMMX()) {
612 static const char *tbl[8] = { "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7" };
613 return tbl[idx];
614 } else if (isFPU()) {
615 static const char *tbl[8] = { "st0", "st1", "st2", "st3", "st4", "st5", "st6", "st7" };
616 return tbl[idx];
617 } else if (isBNDREG()) {
618 static const char *tbl[4] = { "bnd0", "bnd1", "bnd2", "bnd3" };
619 return tbl[idx];
620 }
621 XBYAK_THROW_RET(ERR_INTERNAL, 0);
622 }
isEqualIfNotInherited(const Operand & rhs)623 bool isEqualIfNotInherited(const Operand& rhs) const { return idx_ == rhs.idx_ && kind_ == rhs.kind_ && bit_ == rhs.bit_ && zero_ == rhs.zero_ && mask_ == rhs.mask_ && rounding_ == rhs.rounding_; }
624 bool operator==(const Operand& rhs) const;
625 bool operator!=(const Operand& rhs) const { return !operator==(rhs); }
626 const Address& getAddress() const;
627 const Reg& getReg() const;
628 };
629
setBit(int bit)630 inline void Operand::setBit(int bit)
631 {
632 if (bit != 8 && bit != 16 && bit != 32 && bit != 64 && bit != 128 && bit != 256 && bit != 512 && bit != 8192) goto ERR;
633 if (isBit(bit)) return;
634 if (is(MEM | OPMASK)) {
635 bit_ = bit;
636 return;
637 }
638 if (is(REG | XMM | YMM | ZMM | TMM)) {
639 int idx = getIdx();
640 // err if converting ah, bh, ch, dh
641 if (isREG(8) && (4 <= idx && idx < 8) && !isExt8bit()) goto ERR;
642 Kind kind = REG;
643 switch (bit) {
644 case 8:
645 if (idx >= 16) goto ERR;
646 #ifdef XBYAK32
647 if (idx >= 4) goto ERR;
648 #else
649 if (4 <= idx && idx < 8) idx |= EXT8BIT;
650 #endif
651 break;
652 case 16:
653 case 32:
654 case 64:
655 if (idx >= 16) goto ERR;
656 break;
657 case 128: kind = XMM; break;
658 case 256: kind = YMM; break;
659 case 512: kind = ZMM; break;
660 case 8192: kind = TMM; break;
661 }
662 idx_ = idx;
663 kind_ = kind;
664 bit_ = bit;
665 if (bit >= 128) return; // keep mask_ and rounding_
666 mask_ = 0;
667 rounding_ = 0;
668 return;
669 }
670 ERR:
671 XBYAK_THROW(ERR_CANT_CONVERT)
672 }
673
674 class Label;
675
676 struct Reg8;
677 struct Reg16;
678 struct Reg32;
679 #ifdef XBYAK64
680 struct Reg64;
681 #endif
682 class Reg : public Operand {
683 public:
Reg()684 XBYAK_CONSTEXPR Reg() { }
Operand(idx,kind,bit,ext8bit)685 XBYAK_CONSTEXPR Reg(int idx, Kind kind, int bit = 0, bool ext8bit = false) : Operand(idx, kind, bit, ext8bit) { }
686 // convert to Reg8/Reg16/Reg32/Reg64/XMM/YMM/ZMM
changeBit(int bit)687 Reg changeBit(int bit) const { Reg r(*this); r.setBit(bit); return r; }
getRexW()688 uint8_t getRexW() const { return isREG(64) ? 8 : 0; }
getRexR()689 uint8_t getRexR() const { return isExtIdx() ? 4 : 0; }
getRexX()690 uint8_t getRexX() const { return isExtIdx() ? 2 : 0; }
getRexB()691 uint8_t getRexB() const { return isExtIdx() ? 1 : 0; }
692 uint8_t getRex(const Reg& base = Reg()) const
693 {
694 uint8_t rex = getRexW() | getRexR() | base.getRexW() | base.getRexB();
695 if (rex || isExt8bit() || base.isExt8bit()) rex |= 0x40;
696 return rex;
697 }
698 Reg8 cvt8() const;
699 Reg16 cvt16() const;
700 Reg32 cvt32() const;
701 #ifdef XBYAK64
702 Reg64 cvt64() const;
703 #endif
704 };
705
getReg()706 inline const Reg& Operand::getReg() const
707 {
708 assert(!isMEM());
709 return static_cast<const Reg&>(*this);
710 }
711
712 struct Reg8 : public Reg {
713 explicit XBYAK_CONSTEXPR Reg8(int idx = 0, bool ext8bit = false) : Reg(idx, Operand::REG, 8, ext8bit) { }
714 };
715
716 struct Reg16 : public Reg {
717 explicit XBYAK_CONSTEXPR Reg16(int idx = 0) : Reg(idx, Operand::REG, 16) { }
718 };
719
720 struct Mmx : public Reg {
RegMmx721 explicit XBYAK_CONSTEXPR Mmx(int idx = 0, Kind kind = Operand::MMX, int bit = 64) : Reg(idx, kind, bit) { }
722 };
723
724 struct EvexModifierRounding {
725 enum {
726 T_RN_SAE = 1,
727 T_RD_SAE = 2,
728 T_RU_SAE = 3,
729 T_RZ_SAE = 4,
730 T_SAE = 5
731 };
EvexModifierRoundingEvexModifierRounding732 explicit XBYAK_CONSTEXPR EvexModifierRounding(int rounding) : rounding(rounding) {}
733 int rounding;
734 };
EvexModifierZeroEvexModifierZero735 struct EvexModifierZero{ XBYAK_CONSTEXPR EvexModifierZero() {}};
736
737 struct Xmm : public Mmx {
MmxXmm738 explicit XBYAK_CONSTEXPR Xmm(int idx = 0, Kind kind = Operand::XMM, int bit = 128) : Mmx(idx, kind, bit) { }
XmmXmm739 XBYAK_CONSTEXPR Xmm(Kind kind, int idx) : Mmx(idx, kind, kind == XMM ? 128 : kind == YMM ? 256 : 512) { }
740 Xmm operator|(const EvexModifierRounding& emr) const { Xmm r(*this); r.setRounding(emr.rounding); return r; }
copyAndSetIdxXmm741 Xmm copyAndSetIdx(int idx) const { Xmm ret(*this); ret.setIdx(idx); return ret; }
copyAndSetKindXmm742 Xmm copyAndSetKind(Operand::Kind kind) const { Xmm ret(*this); ret.setKind(kind); return ret; }
743 };
744
745 struct Ymm : public Xmm {
XmmYmm746 explicit XBYAK_CONSTEXPR Ymm(int idx = 0, Kind kind = Operand::YMM, int bit = 256) : Xmm(idx, kind, bit) { }
747 Ymm operator|(const EvexModifierRounding& emr) const { Ymm r(*this); r.setRounding(emr.rounding); return r; }
748 };
749
750 struct Zmm : public Ymm {
751 explicit XBYAK_CONSTEXPR Zmm(int idx = 0) : Ymm(idx, Operand::ZMM, 512) { }
752 Zmm operator|(const EvexModifierRounding& emr) const { Zmm r(*this); r.setRounding(emr.rounding); return r; }
753 };
754
755 #ifdef XBYAK64
756 struct Tmm : public Reg {
RegTmm757 explicit XBYAK_CONSTEXPR Tmm(int idx = 0, Kind kind = Operand::TMM, int bit = 8192) : Reg(idx, kind, bit) { }
758 };
759 #endif
760
761 struct Opmask : public Reg {
762 explicit XBYAK_CONSTEXPR Opmask(int idx = 0) : Reg(idx, Operand::OPMASK, 64) {}
763 };
764
765 struct BoundsReg : public Reg {
766 explicit XBYAK_CONSTEXPR BoundsReg(int idx = 0) : Reg(idx, Operand::BNDREG, 128) {}
767 };
768
769 template<class T>T operator|(const T& x, const Opmask& k) { T r(x); r.setOpmaskIdx(k.getIdx()); return r; }
770 template<class T>T operator|(const T& x, const EvexModifierZero&) { T r(x); r.setZero(); return r; }
771 template<class T>T operator|(const T& x, const EvexModifierRounding& emr) { T r(x); r.setRounding(emr.rounding); return r; }
772
773 struct Fpu : public Reg {
774 explicit XBYAK_CONSTEXPR Fpu(int idx = 0) : Reg(idx, Operand::FPU, 32) { }
775 };
776
777 struct Reg32e : public Reg {
Reg32eReg32e778 explicit XBYAK_CONSTEXPR Reg32e(int idx, int bit) : Reg(idx, Operand::REG, bit) {}
779 };
780 struct Reg32 : public Reg32e {
781 explicit XBYAK_CONSTEXPR Reg32(int idx = 0) : Reg32e(idx, 32) {}
782 };
783 #ifdef XBYAK64
784 struct Reg64 : public Reg32e {
785 explicit XBYAK_CONSTEXPR Reg64(int idx = 0) : Reg32e(idx, 64) {}
786 };
787 struct RegRip {
788 int64_t disp_;
789 const Label* label_;
790 bool isAddr_;
disp_RegRip791 explicit XBYAK_CONSTEXPR RegRip(int64_t disp = 0, const Label* label = 0, bool isAddr = false) : disp_(disp), label_(label), isAddr_(isAddr) {}
792 friend const RegRip operator+(const RegRip& r, int disp) {
793 return RegRip(r.disp_ + disp, r.label_, r.isAddr_);
794 }
795 friend const RegRip operator-(const RegRip& r, int disp) {
796 return RegRip(r.disp_ - disp, r.label_, r.isAddr_);
797 }
798 friend const RegRip operator+(const RegRip& r, int64_t disp) {
799 return RegRip(r.disp_ + disp, r.label_, r.isAddr_);
800 }
801 friend const RegRip operator-(const RegRip& r, int64_t disp) {
802 return RegRip(r.disp_ - disp, r.label_, r.isAddr_);
803 }
804 friend const RegRip operator+(const RegRip& r, const Label& label) {
805 if (r.label_ || r.isAddr_) XBYAK_THROW_RET(ERR_BAD_ADDRESSING, RegRip());
806 return RegRip(r.disp_, &label);
807 }
808 friend const RegRip operator+(const RegRip& r, const void *addr) {
809 if (r.label_ || r.isAddr_) XBYAK_THROW_RET(ERR_BAD_ADDRESSING, RegRip());
810 return RegRip(r.disp_ + (int64_t)addr, 0, true);
811 }
812 };
813 #endif
814
cvt8()815 inline Reg8 Reg::cvt8() const
816 {
817 Reg r = changeBit(8); return Reg8(r.getIdx(), r.isExt8bit());
818 }
819
cvt16()820 inline Reg16 Reg::cvt16() const
821 {
822 return Reg16(changeBit(16).getIdx());
823 }
824
cvt32()825 inline Reg32 Reg::cvt32() const
826 {
827 return Reg32(changeBit(32).getIdx());
828 }
829
830 #ifdef XBYAK64
cvt64()831 inline Reg64 Reg::cvt64() const
832 {
833 return Reg64(changeBit(64).getIdx());
834 }
835 #endif
836
837 #ifndef XBYAK_DISABLE_SEGMENT
838 // not derived from Reg
839 class Segment {
840 int idx_;
841 public:
842 enum {
843 es, cs, ss, ds, fs, gs
844 };
Segment(int idx)845 explicit XBYAK_CONSTEXPR Segment(int idx) : idx_(idx) { assert(0 <= idx_ && idx_ < 6); }
getIdx()846 int getIdx() const { return idx_; }
toString()847 const char *toString() const
848 {
849 static const char tbl[][3] = {
850 "es", "cs", "ss", "ds", "fs", "gs"
851 };
852 return tbl[idx_];
853 }
854 };
855 #endif
856
857 class RegExp {
858 public:
859 #ifdef XBYAK64
860 enum { i32e = 32 | 64 };
861 #else
862 enum { i32e = 32 };
863 #endif
864 XBYAK_CONSTEXPR RegExp(size_t disp = 0) : scale_(0), disp_(disp) { }
865 XBYAK_CONSTEXPR RegExp(const Reg& r, int scale = 1)
scale_(scale)866 : scale_(scale)
867 , disp_(0)
868 {
869 if (!r.isREG(i32e) && !r.is(Reg::XMM|Reg::YMM|Reg::ZMM|Reg::TMM)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER)
870 if (scale == 0) return;
871 if (scale != 1 && scale != 2 && scale != 4 && scale != 8) XBYAK_THROW(ERR_BAD_SCALE)
872 if (r.getBit() >= 128 || scale != 1) { // xmm/ymm is always index
873 index_ = r;
874 } else {
875 base_ = r;
876 }
877 }
878 bool isVsib(int bit = 128 | 256 | 512) const { return index_.isBit(bit); }
optimize()879 RegExp optimize() const
880 {
881 RegExp exp = *this;
882 // [reg * 2] => [reg + reg]
883 if (index_.isBit(i32e) && !base_.getBit() && scale_ == 2) {
884 exp.base_ = index_;
885 exp.scale_ = 1;
886 }
887 return exp;
888 }
889 bool operator==(const RegExp& rhs) const
890 {
891 return base_ == rhs.base_ && index_ == rhs.index_ && disp_ == rhs.disp_ && scale_ == rhs.scale_;
892 }
getBase()893 const Reg& getBase() const { return base_; }
getIndex()894 const Reg& getIndex() const { return index_; }
getScale()895 int getScale() const { return scale_; }
getDisp()896 size_t getDisp() const { return disp_; }
verify()897 XBYAK_CONSTEXPR void verify() const
898 {
899 if (base_.getBit() >= 128) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER)
900 if (index_.getBit() && index_.getBit() <= 64) {
901 if (index_.getIdx() == Operand::ESP) XBYAK_THROW(ERR_ESP_CANT_BE_INDEX)
902 if (base_.getBit() && base_.getBit() != index_.getBit()) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER)
903 }
904 }
905 friend RegExp operator+(const RegExp& a, const RegExp& b);
906 friend RegExp operator-(const RegExp& e, size_t disp);
getRex()907 uint8_t getRex() const
908 {
909 uint8_t rex = index_.getRexX() | base_.getRexB();
910 return rex ? uint8_t(rex | 0x40) : 0;
911 }
912 private:
913 /*
914 [base_ + index_ * scale_ + disp_]
915 base : Reg32e, index : Reg32e(w/o esp), Xmm, Ymm
916 */
917 Reg base_;
918 Reg index_;
919 int scale_;
920 size_t disp_;
921 };
922
923 inline RegExp operator+(const RegExp& a, const RegExp& b)
924 {
925 if (a.index_.getBit() && b.index_.getBit()) XBYAK_THROW_RET(ERR_BAD_ADDRESSING, RegExp())
926 RegExp ret = a;
927 if (!ret.index_.getBit()) { ret.index_ = b.index_; ret.scale_ = b.scale_; }
928 if (b.base_.getBit()) {
929 if (ret.base_.getBit()) {
930 if (ret.index_.getBit()) XBYAK_THROW_RET(ERR_BAD_ADDRESSING, RegExp())
931 // base + base => base + index * 1
932 ret.index_ = b.base_;
933 // [reg + esp] => [esp + reg]
934 if (ret.index_.getIdx() == Operand::ESP) std::swap(ret.base_, ret.index_);
935 ret.scale_ = 1;
936 } else {
937 ret.base_ = b.base_;
938 }
939 }
940 ret.disp_ += b.disp_;
941 return ret;
942 }
943 inline RegExp operator*(const Reg& r, int scale)
944 {
945 return RegExp(r, scale);
946 }
947 inline RegExp operator*(int scale, const Reg& r)
948 {
949 return r * scale;
950 }
951 inline RegExp operator-(const RegExp& e, size_t disp)
952 {
953 RegExp ret = e;
954 ret.disp_ -= disp;
955 return ret;
956 }
957
958 // 2nd parameter for constructor of CodeArray(maxSize, userPtr, alloc)
959 void *const AutoGrow = (void*)1; //-V566
960 void *const DontSetProtectRWE = (void*)2; //-V566
961
962 class CodeArray {
963 enum Type {
964 USER_BUF = 1, // use userPtr(non alignment, non protect)
965 ALLOC_BUF, // use new(alignment, protect)
966 AUTO_GROW // automatically move and grow memory if necessary
967 };
968 CodeArray(const CodeArray& rhs);
969 void operator=(const CodeArray&);
isAllocType()970 bool isAllocType() const { return type_ == ALLOC_BUF || type_ == AUTO_GROW; }
971 struct AddrInfo {
972 size_t codeOffset; // position to write
973 size_t jmpAddr; // value to write
974 int jmpSize; // size of jmpAddr
975 inner::LabelMode mode;
AddrInfoAddrInfo976 AddrInfo(size_t _codeOffset, size_t _jmpAddr, int _jmpSize, inner::LabelMode _mode)
977 : codeOffset(_codeOffset), jmpAddr(_jmpAddr), jmpSize(_jmpSize), mode(_mode) {}
getValAddrInfo978 uint64_t getVal(const uint8_t *top) const
979 {
980 uint64_t disp = (mode == inner::LaddTop) ? jmpAddr + size_t(top) : (mode == inner::LasIs) ? jmpAddr : jmpAddr - size_t(top);
981 if (jmpSize == 4) disp = inner::VerifyInInt32(disp);
982 return disp;
983 }
984 };
985 typedef std::list<AddrInfo> AddrInfoList;
986 AddrInfoList addrInfoList_;
987 const Type type_;
988 #ifdef XBYAK_USE_MMAP_ALLOCATOR
989 MmapAllocator defaultAllocator_;
990 #else
991 Allocator defaultAllocator_;
992 #endif
993 Allocator *alloc_;
994 protected:
995 size_t maxSize_;
996 uint8_t *top_;
997 size_t size_;
998 bool isCalledCalcJmpAddress_;
999
useProtect()1000 bool useProtect() const { return alloc_->useProtect(); }
1001 /*
1002 allocate new memory and copy old data to the new area
1003 */
growMemory()1004 void growMemory()
1005 {
1006 const size_t newSize = (std::max<size_t>)(DEFAULT_MAX_CODE_SIZE, maxSize_ * 2);
1007 uint8_t *newTop = alloc_->alloc(newSize);
1008 if (newTop == 0) XBYAK_THROW(ERR_CANT_ALLOC)
1009 for (size_t i = 0; i < size_; i++) newTop[i] = top_[i];
1010 alloc_->free(top_);
1011 top_ = newTop;
1012 maxSize_ = newSize;
1013 }
1014 /*
1015 calc jmp address for AutoGrow mode
1016 */
calcJmpAddress()1017 void calcJmpAddress()
1018 {
1019 if (isCalledCalcJmpAddress_) return;
1020 for (AddrInfoList::const_iterator i = addrInfoList_.begin(), ie = addrInfoList_.end(); i != ie; ++i) {
1021 uint64_t disp = i->getVal(top_);
1022 rewrite(i->codeOffset, disp, i->jmpSize);
1023 }
1024 isCalledCalcJmpAddress_ = true;
1025 }
1026 public:
1027 enum ProtectMode {
1028 PROTECT_RW = 0, // read/write
1029 PROTECT_RWE = 1, // read/write/exec
1030 PROTECT_RE = 2 // read/exec
1031 };
1032 explicit CodeArray(size_t maxSize, void *userPtr = 0, Allocator *allocator = 0)
1033 : type_(userPtr == AutoGrow ? AUTO_GROW : (userPtr == 0 || userPtr == DontSetProtectRWE) ? ALLOC_BUF : USER_BUF)
1034 , alloc_(allocator ? allocator : (Allocator*)&defaultAllocator_)
1035 , maxSize_(maxSize)
1036 , top_(type_ == USER_BUF ? reinterpret_cast<uint8_t*>(userPtr) : alloc_->alloc((std::max<size_t>)(maxSize, 1)))
1037 , size_(0)
1038 , isCalledCalcJmpAddress_(false)
1039 {
1040 if (maxSize_ > 0 && top_ == 0) XBYAK_THROW(ERR_CANT_ALLOC)
1041 if ((type_ == ALLOC_BUF && userPtr != DontSetProtectRWE && useProtect()) && !setProtectMode(PROTECT_RWE, false)) {
1042 alloc_->free(top_);
1043 XBYAK_THROW(ERR_CANT_PROTECT)
1044 }
1045 }
~CodeArray()1046 virtual ~CodeArray()
1047 {
1048 if (isAllocType()) {
1049 if (useProtect()) setProtectModeRW(false);
1050 alloc_->free(top_);
1051 }
1052 }
1053 bool setProtectMode(ProtectMode mode, bool throwException = true)
1054 {
1055 bool isOK = protect(top_, maxSize_, mode);
1056 if (isOK) return true;
1057 if (throwException) XBYAK_THROW_RET(ERR_CANT_PROTECT, false)
1058 return false;
1059 }
1060 bool setProtectModeRE(bool throwException = true) { return setProtectMode(PROTECT_RE, throwException); }
1061 bool setProtectModeRW(bool throwException = true) { return setProtectMode(PROTECT_RW, throwException); }
resetSize()1062 void resetSize()
1063 {
1064 size_ = 0;
1065 addrInfoList_.clear();
1066 isCalledCalcJmpAddress_ = false;
1067 }
db(int code)1068 void db(int code)
1069 {
1070 if (size_ >= maxSize_) {
1071 if (type_ == AUTO_GROW) {
1072 growMemory();
1073 } else {
1074 XBYAK_THROW(ERR_CODE_IS_TOO_BIG)
1075 }
1076 }
1077 top_[size_++] = static_cast<uint8_t>(code);
1078 }
db(const uint8_t * code,size_t codeSize)1079 void db(const uint8_t *code, size_t codeSize)
1080 {
1081 for (size_t i = 0; i < codeSize; i++) db(code[i]);
1082 }
db(uint64_t code,size_t codeSize)1083 void db(uint64_t code, size_t codeSize)
1084 {
1085 if (codeSize > 8) XBYAK_THROW(ERR_BAD_PARAMETER)
1086 for (size_t i = 0; i < codeSize; i++) db(static_cast<uint8_t>(code >> (i * 8)));
1087 }
dw(uint32_t code)1088 void dw(uint32_t code) { db(code, 2); }
dd(uint32_t code)1089 void dd(uint32_t code) { db(code, 4); }
dq(uint64_t code)1090 void dq(uint64_t code) { db(code, 8); }
getCode()1091 const uint8_t *getCode() const { return top_; }
1092 template<class F>
getCode()1093 const F getCode() const { return reinterpret_cast<F>(top_); }
getCurr()1094 const uint8_t *getCurr() const { return &top_[size_]; }
1095 template<class F>
getCurr()1096 const F getCurr() const { return reinterpret_cast<F>(&top_[size_]); }
getSize()1097 size_t getSize() const { return size_; }
setSize(size_t size)1098 void setSize(size_t size)
1099 {
1100 if (size > maxSize_) XBYAK_THROW(ERR_OFFSET_IS_TOO_BIG)
1101 size_ = size;
1102 }
dump()1103 void dump() const
1104 {
1105 const uint8_t *p = getCode();
1106 size_t bufSize = getSize();
1107 size_t remain = bufSize;
1108 for (int i = 0; i < 4; i++) {
1109 size_t disp = 16;
1110 if (remain < 16) {
1111 disp = remain;
1112 }
1113 for (size_t j = 0; j < 16; j++) {
1114 if (j < disp) {
1115 printf("%02X", p[i * 16 + j]);
1116 }
1117 }
1118 putchar('\n');
1119 remain -= disp;
1120 if (remain == 0) {
1121 break;
1122 }
1123 }
1124 }
1125 /*
1126 @param offset [in] offset from top
1127 @param disp [in] offset from the next of jmp
1128 @param size [in] write size(1, 2, 4, 8)
1129 */
rewrite(size_t offset,uint64_t disp,size_t size)1130 void rewrite(size_t offset, uint64_t disp, size_t size)
1131 {
1132 assert(offset < maxSize_);
1133 if (size != 1 && size != 2 && size != 4 && size != 8) XBYAK_THROW(ERR_BAD_PARAMETER)
1134 uint8_t *const data = top_ + offset;
1135 for (size_t i = 0; i < size; i++) {
1136 data[i] = static_cast<uint8_t>(disp >> (i * 8));
1137 }
1138 }
save(size_t offset,size_t val,int size,inner::LabelMode mode)1139 void save(size_t offset, size_t val, int size, inner::LabelMode mode)
1140 {
1141 addrInfoList_.push_back(AddrInfo(offset, val, size, mode));
1142 }
isAutoGrow()1143 bool isAutoGrow() const { return type_ == AUTO_GROW; }
isCalledCalcJmpAddress()1144 bool isCalledCalcJmpAddress() const { return isCalledCalcJmpAddress_; }
1145 /**
1146 change exec permission of memory
1147 @param addr [in] buffer address
1148 @param size [in] buffer size
1149 @param protectMode [in] mode(RW/RWE/RE)
1150 @return true(success), false(failure)
1151 */
protect(const void * addr,size_t size,int protectMode)1152 static inline bool protect(const void *addr, size_t size, int protectMode)
1153 {
1154 #if defined(_WIN32)
1155 const DWORD c_rw = PAGE_READWRITE;
1156 const DWORD c_rwe = PAGE_EXECUTE_READWRITE;
1157 const DWORD c_re = PAGE_EXECUTE_READ;
1158 DWORD mode;
1159 #else
1160 const int c_rw = PROT_READ | PROT_WRITE;
1161 const int c_rwe = PROT_READ | PROT_WRITE | PROT_EXEC;
1162 const int c_re = PROT_READ | PROT_EXEC;
1163 int mode;
1164 #endif
1165 switch (protectMode) {
1166 case PROTECT_RW: mode = c_rw; break;
1167 case PROTECT_RWE: mode = c_rwe; break;
1168 case PROTECT_RE: mode = c_re; break;
1169 default:
1170 return false;
1171 }
1172 #if defined(_WIN32)
1173 DWORD oldProtect;
1174 return VirtualProtect(const_cast<void*>(addr), size, mode, &oldProtect) != 0;
1175 #elif defined(__GNUC__)
1176 size_t pageSize = sysconf(_SC_PAGESIZE);
1177 size_t iaddr = reinterpret_cast<size_t>(addr);
1178 size_t roundAddr = iaddr & ~(pageSize - static_cast<size_t>(1));
1179 #ifndef NDEBUG
1180 if (pageSize != 4096) fprintf(stderr, "large page(%zd) is used. not tested enough.\n", pageSize);
1181 #endif
1182 return mprotect(reinterpret_cast<void*>(roundAddr), size + (iaddr - roundAddr), mode) == 0;
1183 #else
1184 return true;
1185 #endif
1186 }
1187 /**
1188 get aligned memory pointer
1189 @param addr [in] address
1190 @param alignedSize [in] power of two
1191 @return aligned addr by alingedSize
1192 */
1193 static inline uint8_t *getAlignedAddress(uint8_t *addr, size_t alignedSize = 16)
1194 {
1195 return reinterpret_cast<uint8_t*>((reinterpret_cast<size_t>(addr) + alignedSize - 1) & ~(alignedSize - static_cast<size_t>(1)));
1196 }
1197 };
1198
1199 class Address : public Operand {
1200 public:
1201 enum Mode {
1202 M_ModRM,
1203 M_64bitDisp,
1204 M_rip,
1205 M_ripAddr
1206 };
Address(uint32_t sizeBit,bool broadcast,const RegExp & e)1207 XBYAK_CONSTEXPR Address(uint32_t sizeBit, bool broadcast, const RegExp& e)
1208 : Operand(0, MEM, sizeBit), e_(e), label_(0), mode_(M_ModRM), broadcast_(broadcast)
1209 {
1210 e_.verify();
1211 }
1212 #ifdef XBYAK64
Address(size_t disp)1213 explicit XBYAK_CONSTEXPR Address(size_t disp)
1214 : Operand(0, MEM, 64), e_(disp), label_(0), mode_(M_64bitDisp), broadcast_(false){ }
Address(uint32_t sizeBit,bool broadcast,const RegRip & addr)1215 XBYAK_CONSTEXPR Address(uint32_t sizeBit, bool broadcast, const RegRip& addr)
1216 : Operand(0, MEM, sizeBit), e_(addr.disp_), label_(addr.label_), mode_(addr.isAddr_ ? M_ripAddr : M_rip), broadcast_(broadcast) { }
1217 #endif
1218 RegExp getRegExp(bool optimize = true) const
1219 {
1220 return optimize ? e_.optimize() : e_;
1221 }
getMode()1222 Mode getMode() const { return mode_; }
is32bit()1223 bool is32bit() const { return e_.getBase().getBit() == 32 || e_.getIndex().getBit() == 32; }
isOnlyDisp()1224 bool isOnlyDisp() const { return !e_.getBase().getBit() && !e_.getIndex().getBit(); } // for mov eax
getDisp()1225 size_t getDisp() const { return e_.getDisp(); }
getRex()1226 uint8_t getRex() const
1227 {
1228 if (mode_ != M_ModRM) return 0;
1229 return getRegExp().getRex();
1230 }
is64bitDisp()1231 bool is64bitDisp() const { return mode_ == M_64bitDisp; } // for moffset
isBroadcast()1232 bool isBroadcast() const { return broadcast_; }
getLabel()1233 const Label* getLabel() const { return label_; }
1234 bool operator==(const Address& rhs) const
1235 {
1236 return getBit() == rhs.getBit() && e_ == rhs.e_ && label_ == rhs.label_ && mode_ == rhs.mode_ && broadcast_ == rhs.broadcast_;
1237 }
1238 bool operator!=(const Address& rhs) const { return !operator==(rhs); }
isVsib()1239 bool isVsib() const { return e_.isVsib(); }
1240 private:
1241 RegExp e_;
1242 const Label* label_;
1243 Mode mode_;
1244 bool broadcast_;
1245 };
1246
getAddress()1247 inline const Address& Operand::getAddress() const
1248 {
1249 assert(isMEM());
1250 return static_cast<const Address&>(*this);
1251 }
1252
1253 inline bool Operand::operator==(const Operand& rhs) const
1254 {
1255 if (isMEM() && rhs.isMEM()) return this->getAddress() == rhs.getAddress();
1256 return isEqualIfNotInherited(rhs);
1257 }
1258
1259 class AddressFrame {
1260 void operator=(const AddressFrame&);
1261 AddressFrame(const AddressFrame&);
1262 public:
1263 const uint32_t bit_;
1264 const bool broadcast_;
bit_(bit)1265 explicit XBYAK_CONSTEXPR AddressFrame(uint32_t bit, bool broadcast = false) : bit_(bit), broadcast_(broadcast) { }
1266 Address operator[](const RegExp& e) const
1267 {
1268 return Address(bit_, broadcast_, e);
1269 }
1270 Address operator[](const void *disp) const
1271 {
1272 return Address(bit_, broadcast_, RegExp(reinterpret_cast<size_t>(disp)));
1273 }
1274 #ifdef XBYAK64
1275 Address operator[](uint64_t disp) const { return Address(disp); }
1276 Address operator[](const RegRip& addr) const { return Address(bit_, broadcast_, addr); }
1277 #endif
1278 };
1279
1280 struct JmpLabel {
1281 size_t endOfJmp; /* offset from top to the end address of jmp */
1282 int jmpSize;
1283 inner::LabelMode mode;
1284 size_t disp; // disp for [rip + disp]
1285 explicit JmpLabel(size_t endOfJmp = 0, int jmpSize = 0, inner::LabelMode mode = inner::LasIs, size_t disp = 0)
endOfJmpJmpLabel1286 : endOfJmp(endOfJmp), jmpSize(jmpSize), mode(mode), disp(disp)
1287 {
1288 }
1289 };
1290
1291 class LabelManager;
1292
1293 class Label {
1294 mutable LabelManager *mgr;
1295 mutable int id;
1296 friend class LabelManager;
1297 public:
Label()1298 Label() : mgr(0), id(0) {}
1299 Label(const Label& rhs);
1300 Label& operator=(const Label& rhs);
1301 ~Label();
clear()1302 void clear() { mgr = 0; id = 0; }
getId()1303 int getId() const { return id; }
1304 const uint8_t *getAddress() const;
1305
1306 // backward compatibility
toStr(int num)1307 static inline std::string toStr(int num)
1308 {
1309 char buf[16];
1310 #if defined(_MSC_VER) && (_MSC_VER < 1900)
1311 _snprintf_s
1312 #else
1313 snprintf
1314 #endif
1315 (buf, sizeof(buf), ".%08x", num);
1316 return buf;
1317 }
1318 };
1319
1320 class LabelManager {
1321 // for string label
1322 struct SlabelVal {
1323 size_t offset;
SlabelValSlabelVal1324 SlabelVal(size_t offset) : offset(offset) {}
1325 };
1326 typedef XBYAK_STD_UNORDERED_MAP<std::string, SlabelVal> SlabelDefList;
1327 typedef XBYAK_STD_UNORDERED_MULTIMAP<std::string, const JmpLabel> SlabelUndefList;
1328 struct SlabelState {
1329 SlabelDefList defList;
1330 SlabelUndefList undefList;
1331 };
1332 typedef std::list<SlabelState> StateList;
1333 // for Label class
1334 struct ClabelVal {
offsetClabelVal1335 ClabelVal(size_t offset = 0) : offset(offset), refCount(1) {}
1336 size_t offset;
1337 int refCount;
1338 };
1339 typedef XBYAK_STD_UNORDERED_MAP<int, ClabelVal> ClabelDefList;
1340 typedef XBYAK_STD_UNORDERED_MULTIMAP<int, const JmpLabel> ClabelUndefList;
1341 typedef XBYAK_STD_UNORDERED_SET<Label*> LabelPtrList;
1342
1343 CodeArray *base_;
1344 // global : stateList_.front(), local : stateList_.back()
1345 StateList stateList_;
1346 mutable int labelId_;
1347 ClabelDefList clabelDefList_;
1348 ClabelUndefList clabelUndefList_;
1349 LabelPtrList labelPtrList_;
1350
getId(const Label & label)1351 int getId(const Label& label) const
1352 {
1353 if (label.id == 0) label.id = labelId_++;
1354 return label.id;
1355 }
1356 template<class DefList, class UndefList, class T>
define_inner(DefList & defList,UndefList & undefList,const T & labelId,size_t addrOffset)1357 void define_inner(DefList& defList, UndefList& undefList, const T& labelId, size_t addrOffset)
1358 {
1359 // add label
1360 typename DefList::value_type item(labelId, addrOffset);
1361 std::pair<typename DefList::iterator, bool> ret = defList.insert(item);
1362 if (!ret.second) XBYAK_THROW(ERR_LABEL_IS_REDEFINED)
1363 // search undefined label
1364 for (;;) {
1365 typename UndefList::iterator itr = undefList.find(labelId);
1366 if (itr == undefList.end()) break;
1367 const JmpLabel *jmp = &itr->second;
1368 const size_t offset = jmp->endOfJmp - jmp->jmpSize;
1369 size_t disp;
1370 if (jmp->mode == inner::LaddTop) {
1371 disp = addrOffset;
1372 } else if (jmp->mode == inner::Labs) {
1373 disp = size_t(base_->getCurr());
1374 } else {
1375 disp = addrOffset - jmp->endOfJmp + jmp->disp;
1376 #ifdef XBYAK64
1377 if (jmp->jmpSize <= 4 && !inner::IsInInt32(disp)) XBYAK_THROW(ERR_OFFSET_IS_TOO_BIG)
1378 #endif
1379 if (jmp->jmpSize == 1 && !inner::IsInDisp8((uint32_t)disp)) XBYAK_THROW(ERR_LABEL_IS_TOO_FAR)
1380 }
1381 if (base_->isAutoGrow()) {
1382 base_->save(offset, disp, jmp->jmpSize, jmp->mode);
1383 } else {
1384 base_->rewrite(offset, disp, jmp->jmpSize);
1385 }
1386 undefList.erase(itr);
1387 }
1388 }
1389 template<class DefList, class T>
getOffset_inner(const DefList & defList,size_t * offset,const T & label)1390 bool getOffset_inner(const DefList& defList, size_t *offset, const T& label) const
1391 {
1392 typename DefList::const_iterator i = defList.find(label);
1393 if (i == defList.end()) return false;
1394 *offset = i->second.offset;
1395 return true;
1396 }
1397 friend class Label;
incRefCount(int id,Label * label)1398 void incRefCount(int id, Label *label)
1399 {
1400 clabelDefList_[id].refCount++;
1401 labelPtrList_.insert(label);
1402 }
decRefCount(int id,Label * label)1403 void decRefCount(int id, Label *label)
1404 {
1405 labelPtrList_.erase(label);
1406 ClabelDefList::iterator i = clabelDefList_.find(id);
1407 if (i == clabelDefList_.end()) return;
1408 if (i->second.refCount == 1) {
1409 clabelDefList_.erase(id);
1410 } else {
1411 --i->second.refCount;
1412 }
1413 }
1414 template<class T>
hasUndefinedLabel_inner(const T & list)1415 bool hasUndefinedLabel_inner(const T& list) const
1416 {
1417 #ifndef NDEBUG
1418 for (typename T::const_iterator i = list.begin(); i != list.end(); ++i) {
1419 std::cerr << "undefined label:" << i->first << std::endl;
1420 }
1421 #endif
1422 return !list.empty();
1423 }
1424 // detach all labels linked to LabelManager
resetLabelPtrList()1425 void resetLabelPtrList()
1426 {
1427 for (LabelPtrList::iterator i = labelPtrList_.begin(), ie = labelPtrList_.end(); i != ie; ++i) {
1428 (*i)->clear();
1429 }
1430 labelPtrList_.clear();
1431 }
1432 public:
LabelManager()1433 LabelManager()
1434 {
1435 reset();
1436 }
~LabelManager()1437 ~LabelManager()
1438 {
1439 resetLabelPtrList();
1440 }
reset()1441 void reset()
1442 {
1443 base_ = 0;
1444 labelId_ = 1;
1445 stateList_.clear();
1446 stateList_.push_back(SlabelState());
1447 stateList_.push_back(SlabelState());
1448 clabelDefList_.clear();
1449 clabelUndefList_.clear();
1450 resetLabelPtrList();
1451 ClearError();
1452 }
enterLocal()1453 void enterLocal()
1454 {
1455 stateList_.push_back(SlabelState());
1456 }
leaveLocal()1457 void leaveLocal()
1458 {
1459 if (stateList_.size() <= 2) XBYAK_THROW(ERR_UNDER_LOCAL_LABEL)
1460 if (hasUndefinedLabel_inner(stateList_.back().undefList)) XBYAK_THROW(ERR_LABEL_IS_NOT_FOUND)
1461 stateList_.pop_back();
1462 }
set(CodeArray * base)1463 void set(CodeArray *base) { base_ = base; }
defineSlabel(std::string label)1464 void defineSlabel(std::string label)
1465 {
1466 if (label == "@b" || label == "@f") XBYAK_THROW(ERR_BAD_LABEL_STR)
1467 if (label == "@@") {
1468 SlabelDefList& defList = stateList_.front().defList;
1469 SlabelDefList::iterator i = defList.find("@f");
1470 if (i != defList.end()) {
1471 defList.erase(i);
1472 label = "@b";
1473 } else {
1474 i = defList.find("@b");
1475 if (i != defList.end()) {
1476 defList.erase(i);
1477 }
1478 label = "@f";
1479 }
1480 }
1481 SlabelState& st = *label.c_str() == '.' ? stateList_.back() : stateList_.front();
1482 define_inner(st.defList, st.undefList, label, base_->getSize());
1483 }
defineClabel(Label & label)1484 void defineClabel(Label& label)
1485 {
1486 define_inner(clabelDefList_, clabelUndefList_, getId(label), base_->getSize());
1487 label.mgr = this;
1488 labelPtrList_.insert(&label);
1489 }
assign(Label & dst,const Label & src)1490 void assign(Label& dst, const Label& src)
1491 {
1492 ClabelDefList::const_iterator i = clabelDefList_.find(src.id);
1493 if (i == clabelDefList_.end()) XBYAK_THROW(ERR_LABEL_ISNOT_SET_BY_L)
1494 define_inner(clabelDefList_, clabelUndefList_, dst.id, i->second.offset);
1495 dst.mgr = this;
1496 labelPtrList_.insert(&dst);
1497 }
getOffset(size_t * offset,std::string & label)1498 bool getOffset(size_t *offset, std::string& label) const
1499 {
1500 const SlabelDefList& defList = stateList_.front().defList;
1501 if (label == "@b") {
1502 if (defList.find("@f") != defList.end()) {
1503 label = "@f";
1504 } else if (defList.find("@b") == defList.end()) {
1505 XBYAK_THROW_RET(ERR_LABEL_IS_NOT_FOUND, false)
1506 }
1507 } else if (label == "@f") {
1508 if (defList.find("@f") != defList.end()) {
1509 label = "@b";
1510 }
1511 }
1512 const SlabelState& st = *label.c_str() == '.' ? stateList_.back() : stateList_.front();
1513 return getOffset_inner(st.defList, offset, label);
1514 }
getOffset(size_t * offset,const Label & label)1515 bool getOffset(size_t *offset, const Label& label) const
1516 {
1517 return getOffset_inner(clabelDefList_, offset, getId(label));
1518 }
addUndefinedLabel(const std::string & label,const JmpLabel & jmp)1519 void addUndefinedLabel(const std::string& label, const JmpLabel& jmp)
1520 {
1521 SlabelState& st = *label.c_str() == '.' ? stateList_.back() : stateList_.front();
1522 st.undefList.insert(SlabelUndefList::value_type(label, jmp));
1523 }
addUndefinedLabel(const Label & label,const JmpLabel & jmp)1524 void addUndefinedLabel(const Label& label, const JmpLabel& jmp)
1525 {
1526 clabelUndefList_.insert(ClabelUndefList::value_type(label.id, jmp));
1527 }
hasUndefSlabel()1528 bool hasUndefSlabel() const
1529 {
1530 for (StateList::const_iterator i = stateList_.begin(), ie = stateList_.end(); i != ie; ++i) {
1531 if (hasUndefinedLabel_inner(i->undefList)) return true;
1532 }
1533 return false;
1534 }
hasUndefClabel()1535 bool hasUndefClabel() const { return hasUndefinedLabel_inner(clabelUndefList_); }
getCode()1536 const uint8_t *getCode() const { return base_->getCode(); }
isReady()1537 bool isReady() const { return !base_->isAutoGrow() || base_->isCalledCalcJmpAddress(); }
1538 };
1539
Label(const Label & rhs)1540 inline Label::Label(const Label& rhs)
1541 {
1542 id = rhs.id;
1543 mgr = rhs.mgr;
1544 if (mgr) mgr->incRefCount(id, this);
1545 }
1546 inline Label& Label::operator=(const Label& rhs)
1547 {
1548 if (id) XBYAK_THROW_RET(ERR_LABEL_IS_ALREADY_SET_BY_L, *this)
1549 id = rhs.id;
1550 mgr = rhs.mgr;
1551 if (mgr) mgr->incRefCount(id, this);
1552 return *this;
1553 }
~Label()1554 inline Label::~Label()
1555 {
1556 if (id && mgr) mgr->decRefCount(id, this);
1557 }
getAddress()1558 inline const uint8_t* Label::getAddress() const
1559 {
1560 if (mgr == 0 || !mgr->isReady()) return 0;
1561 size_t offset;
1562 if (!mgr->getOffset(&offset, *this)) return 0;
1563 return mgr->getCode() + offset;
1564 }
1565
1566 typedef enum {
1567 DefaultEncoding,
1568 VexEncoding,
1569 EvexEncoding
1570 } PreferredEncoding;
1571
1572 class CodeGenerator : public CodeArray {
1573 public:
1574 enum LabelType {
1575 T_SHORT,
1576 T_NEAR,
1577 T_FAR, // far jump
1578 T_AUTO // T_SHORT if possible
1579 };
1580 private:
1581 CodeGenerator operator=(const CodeGenerator&); // don't call
1582 #ifdef XBYAK64
1583 enum { i32e = 32 | 64, BIT = 64 };
1584 static const uint64_t dummyAddr = uint64_t(0x1122334455667788ull);
1585 typedef Reg64 NativeReg;
1586 #else
1587 enum { i32e = 32, BIT = 32 };
1588 static const size_t dummyAddr = 0x12345678;
1589 typedef Reg32 NativeReg;
1590 #endif
1591 // (XMM, XMM|MEM)
isXMM_XMMorMEM(const Operand & op1,const Operand & op2)1592 static inline bool isXMM_XMMorMEM(const Operand& op1, const Operand& op2)
1593 {
1594 return op1.isXMM() && (op2.isXMM() || op2.isMEM());
1595 }
1596 // (MMX, MMX|MEM) or (XMM, XMM|MEM)
isXMMorMMX_MEM(const Operand & op1,const Operand & op2)1597 static inline bool isXMMorMMX_MEM(const Operand& op1, const Operand& op2)
1598 {
1599 return (op1.isMMX() && (op2.isMMX() || op2.isMEM())) || isXMM_XMMorMEM(op1, op2);
1600 }
1601 // (XMM, MMX|MEM)
isXMM_MMXorMEM(const Operand & op1,const Operand & op2)1602 static inline bool isXMM_MMXorMEM(const Operand& op1, const Operand& op2)
1603 {
1604 return op1.isXMM() && (op2.isMMX() || op2.isMEM());
1605 }
1606 // (MMX, XMM|MEM)
isMMX_XMMorMEM(const Operand & op1,const Operand & op2)1607 static inline bool isMMX_XMMorMEM(const Operand& op1, const Operand& op2)
1608 {
1609 return op1.isMMX() && (op2.isXMM() || op2.isMEM());
1610 }
1611 // (XMM, REG32|MEM)
isXMM_REG32orMEM(const Operand & op1,const Operand & op2)1612 static inline bool isXMM_REG32orMEM(const Operand& op1, const Operand& op2)
1613 {
1614 return op1.isXMM() && (op2.isREG(i32e) || op2.isMEM());
1615 }
1616 // (REG32, XMM|MEM)
isREG32_XMMorMEM(const Operand & op1,const Operand & op2)1617 static inline bool isREG32_XMMorMEM(const Operand& op1, const Operand& op2)
1618 {
1619 return op1.isREG(i32e) && (op2.isXMM() || op2.isMEM());
1620 }
1621 // (REG32, REG32|MEM)
isREG32_REG32orMEM(const Operand & op1,const Operand & op2)1622 static inline bool isREG32_REG32orMEM(const Operand& op1, const Operand& op2)
1623 {
1624 return op1.isREG(i32e) && ((op2.isREG(i32e) && op1.getBit() == op2.getBit()) || op2.isMEM());
1625 }
1626 void rex(const Operand& op1, const Operand& op2 = Operand())
1627 {
1628 uint8_t rex = 0;
1629 const Operand *p1 = &op1, *p2 = &op2;
1630 if (p1->isMEM()) std::swap(p1, p2);
1631 if (p1->isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION)
1632 if (p2->isMEM()) {
1633 const Address& addr = p2->getAddress();
1634 if (BIT == 64 && addr.is32bit()) db(0x67);
1635 rex = addr.getRex() | p1->getReg().getRex();
1636 } else {
1637 // ModRM(reg, base);
1638 rex = op2.getReg().getRex(op1.getReg());
1639 }
1640 // except movsx(16bit, 32/64bit)
1641 if ((op1.isBit(16) && !op2.isBit(i32e)) || (op2.isBit(16) && !op1.isBit(i32e))) db(0x66);
1642 if (rex) db(rex);
1643 }
1644 enum AVXtype {
1645 // low 3 bit
1646 T_N1 = 1,
1647 T_N2 = 2,
1648 T_N4 = 3,
1649 T_N8 = 4,
1650 T_N16 = 5,
1651 T_N32 = 6,
1652 T_NX_MASK = 7,
1653 //
1654 T_N_VL = 1 << 3, // N * (1, 2, 4) for VL
1655 T_DUP = 1 << 4, // N = (8, 32, 64)
1656 T_66 = 1 << 5, // pp = 1
1657 T_F3 = 1 << 6, // pp = 2
1658 T_F2 = T_66 | T_F3, // pp = 3
1659 T_ER_R = 1 << 7, // reg{er}
1660 T_0F = 1 << 8,
1661 T_0F38 = 1 << 9,
1662 T_0F3A = 1 << 10,
1663 T_L0 = 1 << 11,
1664 T_L1 = 1 << 12,
1665 T_W0 = 1 << 13,
1666 T_W1 = 1 << 14,
1667 T_EW0 = 1 << 15,
1668 T_EW1 = 1 << 16,
1669 T_YMM = 1 << 17, // support YMM, ZMM
1670 T_EVEX = 1 << 18,
1671 T_ER_X = 1 << 19, // xmm{er}
1672 T_ER_Y = 1 << 20, // ymm{er}
1673 T_ER_Z = 1 << 21, // zmm{er}
1674 T_SAE_X = 1 << 22, // xmm{sae}
1675 T_SAE_Y = 1 << 23, // ymm{sae}
1676 T_SAE_Z = 1 << 24, // zmm{sae}
1677 T_MUST_EVEX = 1 << 25, // contains T_EVEX
1678 T_B32 = 1 << 26, // m32bcst
1679 T_B64 = 1 << 27, // m64bcst
1680 T_B16 = T_B32 | T_B64, // m16bcst (Be careful)
1681 T_M_K = 1 << 28, // mem{k}
1682 T_VSIB = 1 << 29,
1683 T_MEM_EVEX = 1 << 30, // use evex if mem
1684 T_FP16 = 1 << 31, // avx512-fp16
1685 T_MAP5 = T_FP16 | T_0F,
1686 T_MAP6 = T_FP16 | T_0F38,
1687 T_XXX
1688 };
1689 // T_66 = 1, T_F3 = 2, T_F2 = 3
getPP(int type)1690 uint32_t getPP(int type) const { return (type >> 5) & 3; }
1691 void vex(const Reg& reg, const Reg& base, const Operand *v, int type, int code, bool x = false)
1692 {
1693 int w = (type & T_W1) ? 1 : 0;
1694 bool is256 = (type & T_L1) ? true : (type & T_L0) ? false : reg.isYMM();
1695 bool r = reg.isExtIdx();
1696 bool b = base.isExtIdx();
1697 int idx = v ? v->getIdx() : 0;
1698 if ((idx | reg.getIdx() | base.getIdx()) >= 16) XBYAK_THROW(ERR_BAD_COMBINATION)
1699 uint32_t pp = getPP(type);
1700 uint32_t vvvv = (((~idx) & 15) << 3) | (is256 ? 4 : 0) | pp;
1701 if (!b && !x && !w && (type & T_0F)) {
1702 db(0xC5); db((r ? 0 : 0x80) | vvvv);
1703 } else {
1704 uint32_t mmmm = (type & T_0F) ? 1 : (type & T_0F38) ? 2 : (type & T_0F3A) ? 3 : 0;
1705 db(0xC4); db((r ? 0 : 0x80) | (x ? 0 : 0x40) | (b ? 0 : 0x20) | mmmm); db((w << 7) | vvvv);
1706 }
1707 db(code);
1708 }
verifySAE(const Reg & r,int type)1709 void verifySAE(const Reg& r, int type) const
1710 {
1711 if (((type & T_SAE_X) && r.isXMM()) || ((type & T_SAE_Y) && r.isYMM()) || ((type & T_SAE_Z) && r.isZMM())) return;
1712 XBYAK_THROW(ERR_SAE_IS_INVALID)
1713 }
verifyER(const Reg & r,int type)1714 void verifyER(const Reg& r, int type) const
1715 {
1716 if ((type & T_ER_R) && r.isREG(32|64)) return;
1717 if (((type & T_ER_X) && r.isXMM()) || ((type & T_ER_Y) && r.isYMM()) || ((type & T_ER_Z) && r.isZMM())) return;
1718 XBYAK_THROW(ERR_ER_IS_INVALID)
1719 }
1720 // (a, b, c) contains non zero two or three values then err
verifyDuplicate(int a,int b,int c,int err)1721 int verifyDuplicate(int a, int b, int c, int err)
1722 {
1723 int v = a | b | c;
1724 if ((a > 0 && a != v) + (b > 0 && b != v) + (c > 0 && c != v) > 0) XBYAK_THROW_RET(err, 0)
1725 return v;
1726 }
1727 int evex(const Reg& reg, const Reg& base, const Operand *v, int type, int code, bool x = false, bool b = false, int aaa = 0, uint32_t VL = 0, bool Hi16Vidx = false)
1728 {
1729 if (!(type & (T_EVEX | T_MUST_EVEX))) XBYAK_THROW_RET(ERR_EVEX_IS_INVALID, 0)
1730 int w = (type & T_EW1) ? 1 : 0;
1731 uint32_t mmm = (type & T_0F) ? 1 : (type & T_0F38) ? 2 : (type & T_0F3A) ? 3 : 0;
1732 if (type & T_FP16) mmm |= 4;
1733 uint32_t pp = getPP(type);
1734 int idx = v ? v->getIdx() : 0;
1735 uint32_t vvvv = ~idx;
1736
1737 bool R = !reg.isExtIdx();
1738 bool X = x ? false : !base.isExtIdx2();
1739 bool B = !base.isExtIdx();
1740 bool Rp = !reg.isExtIdx2();
1741 int LL;
1742 int rounding = verifyDuplicate(reg.getRounding(), base.getRounding(), v ? v->getRounding() : 0, ERR_ROUNDING_IS_ALREADY_SET);
1743 int disp8N = 1;
1744 if (rounding) {
1745 if (rounding == EvexModifierRounding::T_SAE) {
1746 verifySAE(base, type); LL = 0;
1747 } else {
1748 verifyER(base, type); LL = rounding - 1;
1749 }
1750 b = true;
1751 } else {
1752 if (v) VL = (std::max)(VL, v->getBit());
1753 VL = (std::max)((std::max)(reg.getBit(), base.getBit()), VL);
1754 LL = (VL == 512) ? 2 : (VL == 256) ? 1 : 0;
1755 if (b) {
1756 disp8N = ((type & T_B16) == T_B16) ? 2 : (type & T_B32) ? 4 : 8;
1757 } else if (type & T_DUP) {
1758 disp8N = VL == 128 ? 8 : VL == 256 ? 32 : 64;
1759 } else {
1760 if ((type & (T_NX_MASK | T_N_VL)) == 0) {
1761 type |= T_N16 | T_N_VL; // default
1762 }
1763 int low = type & T_NX_MASK;
1764 if (low > 0) {
1765 disp8N = 1 << (low - 1);
1766 if (type & T_N_VL) disp8N *= (VL == 512 ? 4 : VL == 256 ? 2 : 1);
1767 }
1768 }
1769 }
1770 bool Vp = !((v ? v->isExtIdx2() : 0) | Hi16Vidx);
1771 bool z = reg.hasZero() || base.hasZero() || (v ? v->hasZero() : false);
1772 if (aaa == 0) aaa = verifyDuplicate(base.getOpmaskIdx(), reg.getOpmaskIdx(), (v ? v->getOpmaskIdx() : 0), ERR_OPMASK_IS_ALREADY_SET);
1773 if (aaa == 0) z = 0; // clear T_z if mask is not set
1774 db(0x62);
1775 db((R ? 0x80 : 0) | (X ? 0x40 : 0) | (B ? 0x20 : 0) | (Rp ? 0x10 : 0) | mmm);
1776 db((w == 1 ? 0x80 : 0) | ((vvvv & 15) << 3) | 4 | (pp & 3));
1777 db((z ? 0x80 : 0) | ((LL & 3) << 5) | (b ? 0x10 : 0) | (Vp ? 8 : 0) | (aaa & 7));
1778 db(code);
1779 return disp8N;
1780 }
setModRM(int mod,int r1,int r2)1781 void setModRM(int mod, int r1, int r2)
1782 {
1783 db(static_cast<uint8_t>((mod << 6) | ((r1 & 7) << 3) | (r2 & 7)));
1784 }
1785 void setSIB(const RegExp& e, int reg, int disp8N = 0)
1786 {
1787 uint64_t disp64 = e.getDisp();
1788 #ifdef XBYAK64
1789 uint64_t high = disp64 >> 32;
1790 if (high != 0 && high != 0xFFFFFFFF) XBYAK_THROW(ERR_OFFSET_IS_TOO_BIG)
1791 #endif
1792 uint32_t disp = static_cast<uint32_t>(disp64);
1793 const Reg& base = e.getBase();
1794 const Reg& index = e.getIndex();
1795 const int baseIdx = base.getIdx();
1796 const int baseBit = base.getBit();
1797 const int indexBit = index.getBit();
1798 enum {
1799 mod00 = 0, mod01 = 1, mod10 = 2
1800 };
1801 int mod = mod10; // disp32
1802 if (!baseBit || ((baseIdx & 7) != Operand::EBP && disp == 0)) {
1803 mod = mod00;
1804 } else {
1805 if (disp8N == 0) {
1806 if (inner::IsInDisp8(disp)) {
1807 mod = mod01;
1808 }
1809 } else {
1810 // disp must be casted to signed
1811 uint32_t t = static_cast<uint32_t>(static_cast<int>(disp) / disp8N);
1812 if ((disp % disp8N) == 0 && inner::IsInDisp8(t)) {
1813 disp = t;
1814 mod = mod01;
1815 }
1816 }
1817 }
1818 const int newBaseIdx = baseBit ? (baseIdx & 7) : Operand::EBP;
1819 /* ModR/M = [2:3:3] = [Mod:reg/code:R/M] */
1820 bool hasSIB = indexBit || (baseIdx & 7) == Operand::ESP;
1821 #ifdef XBYAK64
1822 if (!baseBit && !indexBit) hasSIB = true;
1823 #endif
1824 if (hasSIB) {
1825 setModRM(mod, reg, Operand::ESP);
1826 /* SIB = [2:3:3] = [SS:index:base(=rm)] */
1827 const int idx = indexBit ? (index.getIdx() & 7) : Operand::ESP;
1828 const int scale = e.getScale();
1829 const int SS = (scale == 8) ? 3 : (scale == 4) ? 2 : (scale == 2) ? 1 : 0;
1830 setModRM(SS, idx, newBaseIdx);
1831 } else {
1832 setModRM(mod, reg, newBaseIdx);
1833 }
1834 if (mod == mod01) {
1835 db(disp);
1836 } else if (mod == mod10 || (mod == mod00 && !baseBit)) {
1837 dd(disp);
1838 }
1839 }
1840 LabelManager labelMgr_;
isInDisp16(uint32_t x)1841 bool isInDisp16(uint32_t x) const { return 0xFFFF8000 <= x || x <= 0x7FFF; }
1842 void opModR(const Reg& reg1, const Reg& reg2, int code0, int code1 = NONE, int code2 = NONE)
1843 {
1844 rex(reg2, reg1);
1845 db(code0 | (reg1.isBit(8) ? 0 : 1)); if (code1 != NONE) db(code1); if (code2 != NONE) db(code2);
1846 setModRM(3, reg1.getIdx(), reg2.getIdx());
1847 }
1848 void opModM(const Address& addr, const Reg& reg, int code0, int code1 = NONE, int code2 = NONE, int immSize = 0)
1849 {
1850 if (addr.is64bitDisp()) XBYAK_THROW(ERR_CANT_USE_64BIT_DISP)
1851 rex(addr, reg);
1852 db(code0 | (reg.isBit(8) ? 0 : 1)); if (code1 != NONE) db(code1); if (code2 != NONE) db(code2);
1853 opAddr(addr, reg.getIdx(), immSize);
1854 }
1855 void opLoadSeg(const Address& addr, const Reg& reg, int code0, int code1 = NONE)
1856 {
1857 if (addr.is64bitDisp()) XBYAK_THROW(ERR_CANT_USE_64BIT_DISP)
1858 if (reg.isBit(8)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER)
1859 rex(addr, reg);
1860 db(code0); if (code1 != NONE) db(code1);
1861 opAddr(addr, reg.getIdx());
1862 }
opMIB(const Address & addr,const Reg & reg,int code0,int code1)1863 void opMIB(const Address& addr, const Reg& reg, int code0, int code1)
1864 {
1865 if (addr.is64bitDisp()) XBYAK_THROW(ERR_CANT_USE_64BIT_DISP)
1866 if (addr.getMode() != Address::M_ModRM) XBYAK_THROW(ERR_INVALID_MIB_ADDRESS)
1867 if (BIT == 64 && addr.is32bit()) db(0x67);
1868 const RegExp& regExp = addr.getRegExp(false);
1869 uint8_t rex = regExp.getRex();
1870 if (rex) db(rex);
1871 db(code0); db(code1);
1872 setSIB(regExp, reg.getIdx());
1873 }
makeJmp(uint32_t disp,LabelType type,uint8_t shortCode,uint8_t longCode,uint8_t longPref)1874 void makeJmp(uint32_t disp, LabelType type, uint8_t shortCode, uint8_t longCode, uint8_t longPref)
1875 {
1876 const int shortJmpSize = 2;
1877 const int longHeaderSize = longPref ? 2 : 1;
1878 const int longJmpSize = longHeaderSize + 4;
1879 if (type != T_NEAR && inner::IsInDisp8(disp - shortJmpSize)) {
1880 db(shortCode); db(disp - shortJmpSize);
1881 } else {
1882 if (type == T_SHORT) XBYAK_THROW(ERR_LABEL_IS_TOO_FAR)
1883 if (longPref) db(longPref);
1884 db(longCode); dd(disp - longJmpSize);
1885 }
1886 }
isNEAR(LabelType type)1887 bool isNEAR(LabelType type) const { return type == T_NEAR || (type == T_AUTO && isDefaultJmpNEAR_); }
1888 template<class T>
opJmp(T & label,LabelType type,uint8_t shortCode,uint8_t longCode,uint8_t longPref)1889 void opJmp(T& label, LabelType type, uint8_t shortCode, uint8_t longCode, uint8_t longPref)
1890 {
1891 if (type == T_FAR) XBYAK_THROW(ERR_NOT_SUPPORTED)
1892 if (isAutoGrow() && size_ + 16 >= maxSize_) growMemory(); /* avoid splitting code of jmp */
1893 size_t offset = 0;
1894 if (labelMgr_.getOffset(&offset, label)) { /* label exists */
1895 makeJmp(inner::VerifyInInt32(offset - size_), type, shortCode, longCode, longPref);
1896 } else {
1897 int jmpSize = 0;
1898 if (isNEAR(type)) {
1899 jmpSize = 4;
1900 if (longPref) db(longPref);
1901 db(longCode); dd(0);
1902 } else {
1903 jmpSize = 1;
1904 db(shortCode); db(0);
1905 }
1906 JmpLabel jmp(size_, jmpSize, inner::LasIs);
1907 labelMgr_.addUndefinedLabel(label, jmp);
1908 }
1909 }
1910 void opJmpAbs(const void *addr, LabelType type, uint8_t shortCode, uint8_t longCode, uint8_t longPref = 0)
1911 {
1912 if (type == T_FAR) XBYAK_THROW(ERR_NOT_SUPPORTED)
1913 if (isAutoGrow()) {
1914 if (!isNEAR(type)) XBYAK_THROW(ERR_ONLY_T_NEAR_IS_SUPPORTED_IN_AUTO_GROW)
1915 if (size_ + 16 >= maxSize_) growMemory();
1916 if (longPref) db(longPref);
1917 db(longCode);
1918 dd(0);
1919 save(size_ - 4, size_t(addr) - size_, 4, inner::Labs);
1920 } else {
1921 makeJmp(inner::VerifyInInt32(reinterpret_cast<const uint8_t*>(addr) - getCurr()), type, shortCode, longCode, longPref);
1922 }
1923
1924 }
opJmpOp(const Operand & op,LabelType type,int ext)1925 void opJmpOp(const Operand& op, LabelType type, int ext)
1926 {
1927 const int bit = 16|i32e;
1928 if (type == T_FAR) {
1929 if (!op.isMEM(bit)) XBYAK_THROW(ERR_NOT_SUPPORTED)
1930 opR_ModM(op, bit, ext + 1, 0xFF, NONE, NONE, false);
1931 } else {
1932 opR_ModM(op, bit, ext, 0xFF, NONE, NONE, true);
1933 }
1934 }
1935 // reg is reg field of ModRM
1936 // immSize is the size for immediate value
1937 // disp8N = 0(normal), disp8N = 1(force disp32), disp8N = {2, 4, 8} ; compressed displacement
1938 void opAddr(const Address &addr, int reg, int immSize = 0, int disp8N = 0, bool permitVisb = false)
1939 {
1940 if (!permitVisb && addr.isVsib()) XBYAK_THROW(ERR_BAD_VSIB_ADDRESSING)
1941 if (addr.getMode() == Address::M_ModRM) {
1942 setSIB(addr.getRegExp(), reg, disp8N);
1943 } else if (addr.getMode() == Address::M_rip || addr.getMode() == Address::M_ripAddr) {
1944 setModRM(0, reg, 5);
1945 if (addr.getLabel()) { // [rip + Label]
1946 putL_inner(*addr.getLabel(), true, addr.getDisp() - immSize);
1947 } else {
1948 size_t disp = addr.getDisp();
1949 if (addr.getMode() == Address::M_ripAddr) {
1950 if (isAutoGrow()) XBYAK_THROW(ERR_INVALID_RIP_IN_AUTO_GROW)
1951 disp -= (size_t)getCurr() + 4 + immSize;
1952 }
1953 dd(inner::VerifyInInt32(disp));
1954 }
1955 }
1956 }
1957 /* preCode is for SSSE3/SSE4 */
1958 void opGen(const Operand& reg, const Operand& op, int code, int pref, bool isValid(const Operand&, const Operand&), int imm8 = NONE, int preCode = NONE)
1959 {
1960 if (isValid && !isValid(reg, op)) XBYAK_THROW(ERR_BAD_COMBINATION)
1961 if (pref != NONE) db(pref);
1962 if (op.isMEM()) {
1963 opModM(op.getAddress(), reg.getReg(), 0x0F, preCode, code, (imm8 != NONE) ? 1 : 0);
1964 } else {
1965 opModR(reg.getReg(), op.getReg(), 0x0F, preCode, code);
1966 }
1967 if (imm8 != NONE) db(imm8);
1968 }
opMMX_IMM(const Mmx & mmx,int imm8,int code,int ext)1969 void opMMX_IMM(const Mmx& mmx, int imm8, int code, int ext)
1970 {
1971 if (mmx.isXMM()) db(0x66);
1972 opModR(Reg32(ext), mmx, 0x0F, code);
1973 db(imm8);
1974 }
1975 void opMMX(const Mmx& mmx, const Operand& op, int code, int pref = 0x66, int imm8 = NONE, int preCode = NONE)
1976 {
1977 opGen(mmx, op, code, mmx.isXMM() ? pref : NONE, isXMMorMMX_MEM, imm8, preCode);
1978 }
opMovXMM(const Operand & op1,const Operand & op2,int code,int pref)1979 void opMovXMM(const Operand& op1, const Operand& op2, int code, int pref)
1980 {
1981 if (pref != NONE) db(pref);
1982 if (op1.isXMM() && op2.isMEM()) {
1983 opModM(op2.getAddress(), op1.getReg(), 0x0F, code);
1984 } else if (op1.isMEM() && op2.isXMM()) {
1985 opModM(op1.getAddress(), op2.getReg(), 0x0F, code | 1);
1986 } else {
1987 XBYAK_THROW(ERR_BAD_COMBINATION)
1988 }
1989 }
1990 void opExt(const Operand& op, const Mmx& mmx, int code, int imm, bool hasMMX2 = false)
1991 {
1992 if (hasMMX2 && op.isREG(i32e)) { /* pextrw is special */
1993 if (mmx.isXMM()) db(0x66);
1994 opModR(op.getReg(), mmx, 0x0F, 0xC5); db(imm);
1995 } else {
1996 opGen(mmx, op, code, 0x66, isXMM_REG32orMEM, imm, 0x3A);
1997 }
1998 }
1999 void opR_ModM(const Operand& op, int bit, int ext, int code0, int code1 = NONE, int code2 = NONE, bool disableRex = false, int immSize = 0)
2000 {
2001 int opBit = op.getBit();
2002 if (disableRex && opBit == 64) opBit = 32;
2003 if (op.isREG(bit)) {
2004 opModR(Reg(ext, Operand::REG, opBit), op.getReg().changeBit(opBit), code0, code1, code2);
2005 } else if (op.isMEM()) {
2006 opModM(op.getAddress(), Reg(ext, Operand::REG, opBit), code0, code1, code2, immSize);
2007 } else {
2008 XBYAK_THROW(ERR_BAD_COMBINATION)
2009 }
2010 }
opShift(const Operand & op,int imm,int ext)2011 void opShift(const Operand& op, int imm, int ext)
2012 {
2013 verifyMemHasSize(op);
2014 opR_ModM(op, 0, ext, (0xC0 | ((imm == 1 ? 1 : 0) << 4)), NONE, NONE, false, (imm != 1) ? 1 : 0);
2015 if (imm != 1) db(imm);
2016 }
opShift(const Operand & op,const Reg8 & _cl,int ext)2017 void opShift(const Operand& op, const Reg8& _cl, int ext)
2018 {
2019 if (_cl.getIdx() != Operand::CL) XBYAK_THROW(ERR_BAD_COMBINATION)
2020 opR_ModM(op, 0, ext, 0xD2);
2021 }
2022 void opModRM(const Operand& op1, const Operand& op2, bool condR, bool condM, int code0, int code1 = NONE, int code2 = NONE, int immSize = 0)
2023 {
2024 if (condR) {
2025 opModR(op1.getReg(), op2.getReg(), code0, code1, code2);
2026 } else if (condM) {
2027 opModM(op2.getAddress(), op1.getReg(), code0, code1, code2, immSize);
2028 } else {
2029 XBYAK_THROW(ERR_BAD_COMBINATION)
2030 }
2031 }
2032 void opShxd(const Operand& op, const Reg& reg, uint8_t imm, int code, const Reg8 *_cl = 0)
2033 {
2034 if (_cl && _cl->getIdx() != Operand::CL) XBYAK_THROW(ERR_BAD_COMBINATION)
2035 opModRM(reg, op, (op.isREG(16 | i32e) && op.getBit() == reg.getBit()), op.isMEM() && (reg.isREG(16 | i32e)), 0x0F, code | (_cl ? 1 : 0), NONE, _cl ? 0 : 1);
2036 if (!_cl) db(imm);
2037 }
2038 // (REG, REG|MEM), (MEM, REG)
opRM_RM(const Operand & op1,const Operand & op2,int code)2039 void opRM_RM(const Operand& op1, const Operand& op2, int code)
2040 {
2041 if (op1.isREG() && op2.isMEM()) {
2042 opModM(op2.getAddress(), op1.getReg(), code | 2);
2043 } else {
2044 opModRM(op2, op1, op1.isREG() && op1.getKind() == op2.getKind(), op1.isMEM() && op2.isREG(), code);
2045 }
2046 }
2047 // (REG|MEM, IMM)
opRM_I(const Operand & op,uint32_t imm,int code,int ext)2048 void opRM_I(const Operand& op, uint32_t imm, int code, int ext)
2049 {
2050 verifyMemHasSize(op);
2051 uint32_t immBit = inner::IsInDisp8(imm) ? 8 : isInDisp16(imm) ? 16 : 32;
2052 if (op.isBit(8)) immBit = 8;
2053 if (op.getBit() < immBit) XBYAK_THROW(ERR_IMM_IS_TOO_BIG)
2054 if (op.isBit(32|64) && immBit == 16) immBit = 32; /* don't use MEM16 if 32/64bit mode */
2055 if (op.isREG() && op.getIdx() == 0 && (op.getBit() == immBit || (op.isBit(64) && immBit == 32))) { // rax, eax, ax, al
2056 rex(op);
2057 db(code | 4 | (immBit == 8 ? 0 : 1));
2058 } else {
2059 int tmp = immBit < (std::min)(op.getBit(), 32U) ? 2 : 0;
2060 opR_ModM(op, 0, ext, 0x80 | tmp, NONE, NONE, false, immBit / 8);
2061 }
2062 db(imm, immBit / 8);
2063 }
opIncDec(const Operand & op,int code,int ext)2064 void opIncDec(const Operand& op, int code, int ext)
2065 {
2066 verifyMemHasSize(op);
2067 #ifndef XBYAK64
2068 if (op.isREG() && !op.isBit(8)) {
2069 rex(op); db(code | op.getIdx());
2070 return;
2071 }
2072 #endif
2073 code = 0xFE;
2074 if (op.isREG()) {
2075 opModR(Reg(ext, Operand::REG, op.getBit()), op.getReg(), code);
2076 } else {
2077 opModM(op.getAddress(), Reg(ext, Operand::REG, op.getBit()), code);
2078 }
2079 }
opPushPop(const Operand & op,int code,int ext,int alt)2080 void opPushPop(const Operand& op, int code, int ext, int alt)
2081 {
2082 int bit = op.getBit();
2083 if (bit == 16 || bit == BIT) {
2084 if (bit == 16) db(0x66);
2085 if (op.isREG()) {
2086 if (op.getReg().getIdx() >= 8) db(0x41);
2087 db(alt | (op.getIdx() & 7));
2088 return;
2089 }
2090 if (op.isMEM()) {
2091 opModM(op.getAddress(), Reg(ext, Operand::REG, 32), code);
2092 return;
2093 }
2094 }
2095 XBYAK_THROW(ERR_BAD_COMBINATION)
2096 }
verifyMemHasSize(const Operand & op)2097 void verifyMemHasSize(const Operand& op) const
2098 {
2099 if (op.isMEM() && op.getBit() == 0) XBYAK_THROW(ERR_MEM_SIZE_IS_NOT_SPECIFIED)
2100 }
2101 /*
2102 mov(r, imm) = db(imm, mov_imm(r, imm))
2103 */
mov_imm(const Reg & reg,uint64_t imm)2104 int mov_imm(const Reg& reg, uint64_t imm)
2105 {
2106 int bit = reg.getBit();
2107 const int idx = reg.getIdx();
2108 int code = 0xB0 | ((bit == 8 ? 0 : 1) << 3);
2109 if (bit == 64 && (imm & ~uint64_t(0xffffffffu)) == 0) {
2110 rex(Reg32(idx));
2111 bit = 32;
2112 } else {
2113 rex(reg);
2114 if (bit == 64 && inner::IsInInt32(imm)) {
2115 db(0xC7);
2116 code = 0xC0;
2117 bit = 32;
2118 }
2119 }
2120 db(code | (idx & 7));
2121 return bit / 8;
2122 }
2123 template<class T>
2124 void putL_inner(T& label, bool relative = false, size_t disp = 0)
2125 {
2126 const int jmpSize = relative ? 4 : (int)sizeof(size_t);
2127 if (isAutoGrow() && size_ + 16 >= maxSize_) growMemory();
2128 size_t offset = 0;
2129 if (labelMgr_.getOffset(&offset, label)) {
2130 if (relative) {
2131 db(inner::VerifyInInt32(offset + disp - size_ - jmpSize), jmpSize);
2132 } else if (isAutoGrow()) {
2133 db(uint64_t(0), jmpSize);
2134 save(size_ - jmpSize, offset, jmpSize, inner::LaddTop);
2135 } else {
2136 db(size_t(top_) + offset, jmpSize);
2137 }
2138 return;
2139 }
2140 db(uint64_t(0), jmpSize);
2141 JmpLabel jmp(size_, jmpSize, (relative ? inner::LasIs : isAutoGrow() ? inner::LaddTop : inner::Labs), disp);
2142 labelMgr_.addUndefinedLabel(label, jmp);
2143 }
opMovxx(const Reg & reg,const Operand & op,uint8_t code)2144 void opMovxx(const Reg& reg, const Operand& op, uint8_t code)
2145 {
2146 if (op.isBit(32)) XBYAK_THROW(ERR_BAD_COMBINATION)
2147 int w = op.isBit(16);
2148 #ifdef XBYAK64
2149 if (op.isHigh8bit()) XBYAK_THROW(ERR_BAD_COMBINATION)
2150 #endif
2151 bool cond = reg.isREG() && (reg.getBit() > op.getBit());
2152 opModRM(reg, op, cond && op.isREG(), cond && op.isMEM(), 0x0F, code | w);
2153 }
opFpuMem(const Address & addr,uint8_t m16,uint8_t m32,uint8_t m64,uint8_t ext,uint8_t m64ext)2154 void opFpuMem(const Address& addr, uint8_t m16, uint8_t m32, uint8_t m64, uint8_t ext, uint8_t m64ext)
2155 {
2156 if (addr.is64bitDisp()) XBYAK_THROW(ERR_CANT_USE_64BIT_DISP)
2157 uint8_t code = addr.isBit(16) ? m16 : addr.isBit(32) ? m32 : addr.isBit(64) ? m64 : 0;
2158 if (!code) XBYAK_THROW(ERR_BAD_MEM_SIZE)
2159 if (m64ext && addr.isBit(64)) ext = m64ext;
2160
2161 rex(addr, st0);
2162 db(code);
2163 opAddr(addr, ext);
2164 }
2165 // use code1 if reg1 == st0
2166 // use code2 if reg1 != st0 && reg2 == st0
opFpuFpu(const Fpu & reg1,const Fpu & reg2,uint32_t code1,uint32_t code2)2167 void opFpuFpu(const Fpu& reg1, const Fpu& reg2, uint32_t code1, uint32_t code2)
2168 {
2169 uint32_t code = reg1.getIdx() == 0 ? code1 : reg2.getIdx() == 0 ? code2 : 0;
2170 if (!code) XBYAK_THROW(ERR_BAD_ST_COMBINATION)
2171 db(uint8_t(code >> 8));
2172 db(uint8_t(code | (reg1.getIdx() | reg2.getIdx())));
2173 }
opFpu(const Fpu & reg,uint8_t code1,uint8_t code2)2174 void opFpu(const Fpu& reg, uint8_t code1, uint8_t code2)
2175 {
2176 db(code1); db(code2 | reg.getIdx());
2177 }
2178 void opVex(const Reg& r, const Operand *p1, const Operand& op2, int type, int code, int imm8 = NONE)
2179 {
2180 if (op2.isMEM()) {
2181 const Address& addr = op2.getAddress();
2182 const RegExp& regExp = addr.getRegExp();
2183 const Reg& base = regExp.getBase();
2184 const Reg& index = regExp.getIndex();
2185 if (BIT == 64 && addr.is32bit()) db(0x67);
2186 int disp8N = 0;
2187 bool x = index.isExtIdx();
2188 if ((type & (T_MUST_EVEX|T_MEM_EVEX)) || r.hasEvex() || (p1 && p1->hasEvex()) || addr.isBroadcast() || addr.getOpmaskIdx()) {
2189 int aaa = addr.getOpmaskIdx();
2190 if (aaa && !(type & T_M_K)) XBYAK_THROW(ERR_INVALID_OPMASK_WITH_MEMORY)
2191 bool b = false;
2192 if (addr.isBroadcast()) {
2193 if (!(type & (T_B32 | T_B64))) XBYAK_THROW(ERR_INVALID_BROADCAST)
2194 b = true;
2195 }
2196 int VL = regExp.isVsib() ? index.getBit() : 0;
2197 disp8N = evex(r, base, p1, type, code, x, b, aaa, VL, index.isExtIdx2());
2198 } else {
2199 vex(r, base, p1, type, code, x);
2200 }
2201 opAddr(addr, r.getIdx(), (imm8 != NONE) ? 1 : 0, disp8N, (type & T_VSIB) != 0);
2202 } else {
2203 const Reg& base = op2.getReg();
2204 if ((type & T_MUST_EVEX) || r.hasEvex() || (p1 && p1->hasEvex()) || base.hasEvex()) {
2205 evex(r, base, p1, type, code);
2206 } else {
2207 vex(r, base, p1, type, code);
2208 }
2209 setModRM(3, r.getIdx(), base.getIdx());
2210 }
2211 if (imm8 != NONE) db(imm8);
2212 }
2213 // (r, r, r/m) if isR_R_RM
2214 // (r, r/m, r)
2215 void opGpr(const Reg32e& r, const Operand& op1, const Operand& op2, int type, uint8_t code, bool isR_R_RM, int imm8 = NONE)
2216 {
2217 const Operand *p1 = &op1;
2218 const Operand *p2 = &op2;
2219 if (!isR_R_RM) std::swap(p1, p2);
2220 const unsigned int bit = r.getBit();
2221 if (p1->getBit() != bit || (p2->isREG() && p2->getBit() != bit)) XBYAK_THROW(ERR_BAD_COMBINATION)
2222 type |= (bit == 64) ? T_W1 : T_W0;
2223 opVex(r, p1, *p2, type, code, imm8);
2224 }
2225 void opAVX_X_X_XM(const Xmm& x1, const Operand& op1, const Operand& op2, int type, int code0, int imm8 = NONE)
2226 {
2227 const Xmm *x2 = static_cast<const Xmm*>(&op1);
2228 const Operand *op = &op2;
2229 if (op2.isNone()) { // (x1, op1) -> (x1, x1, op1)
2230 x2 = &x1;
2231 op = &op1;
2232 }
2233 // (x1, x2, op)
2234 if (!((x1.isXMM() && x2->isXMM()) || ((type & T_YMM) && ((x1.isYMM() && x2->isYMM()) || (x1.isZMM() && x2->isZMM()))))) XBYAK_THROW(ERR_BAD_COMBINATION)
2235 opVex(x1, x2, *op, type, code0, imm8);
2236 }
2237 void opAVX_K_X_XM(const Opmask& k, const Xmm& x2, const Operand& op3, int type, int code0, int imm8 = NONE)
2238 {
2239 if (!op3.isMEM() && (x2.getKind() != op3.getKind())) XBYAK_THROW(ERR_BAD_COMBINATION)
2240 opVex(k, &x2, op3, type, code0, imm8);
2241 }
2242 // (x, x/m), (y, x/m256), (z, y/m)
checkCvt1(const Operand & x,const Operand & op)2243 void checkCvt1(const Operand& x, const Operand& op) const
2244 {
2245 if (!op.isMEM() && !(x.is(Operand::XMM | Operand::YMM) && op.isXMM()) && !(x.isZMM() && op.isYMM())) XBYAK_THROW(ERR_BAD_COMBINATION)
2246 }
2247 // (x, x/m), (x, y/m256), (y, z/m)
checkCvt2(const Xmm & x,const Operand & op)2248 void checkCvt2(const Xmm& x, const Operand& op) const
2249 {
2250 if (!(x.isXMM() && op.is(Operand::XMM | Operand::YMM | Operand::MEM)) && !(x.isYMM() && op.is(Operand::ZMM | Operand::MEM))) XBYAK_THROW(ERR_BAD_COMBINATION)
2251 }
opCvt(const Xmm & x,const Operand & op,int type,int code)2252 void opCvt(const Xmm& x, const Operand& op, int type, int code)
2253 {
2254 Operand::Kind kind = x.isXMM() ? (op.isBit(256) ? Operand::YMM : Operand::XMM) : Operand::ZMM;
2255 opVex(x.copyAndSetKind(kind), &xm0, op, type, code);
2256 }
opCvt2(const Xmm & x,const Operand & op,int type,int code)2257 void opCvt2(const Xmm& x, const Operand& op, int type, int code)
2258 {
2259 checkCvt2(x, op);
2260 opCvt(x, op, type, code);
2261 }
opCvt3(const Xmm & x1,const Xmm & x2,const Operand & op,int type,int type64,int type32,uint8_t code)2262 void opCvt3(const Xmm& x1, const Xmm& x2, const Operand& op, int type, int type64, int type32, uint8_t code)
2263 {
2264 if (!(x1.isXMM() && x2.isXMM() && (op.isREG(i32e) || op.isMEM()))) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER)
2265 Xmm x(op.getIdx());
2266 const Operand *p = op.isREG() ? &x : &op;
2267 opVex(x1, &x2, *p, type | (op.isBit(64) ? type64 : type32), code);
2268 }
2269 // (x, x/y/xword/yword), (y, z/m)
checkCvt4(const Xmm & x,const Operand & op)2270 void checkCvt4(const Xmm& x, const Operand& op) const
2271 {
2272 if (!(x.isXMM() && op.is(Operand::XMM | Operand::YMM | Operand::MEM) && op.isBit(128|256)) && !(x.isYMM() && op.is(Operand::ZMM | Operand::MEM))) XBYAK_THROW(ERR_BAD_COMBINATION)
2273 }
2274 // (x, x/y/z/xword/yword/zword)
opCvt5(const Xmm & x,const Operand & op,int type,int code)2275 void opCvt5(const Xmm& x, const Operand& op, int type, int code)
2276 {
2277 if (!(x.isXMM() && op.isBit(128|256|512))) XBYAK_THROW(ERR_BAD_COMBINATION)
2278 Operand::Kind kind = op.isBit(128) ? Operand::XMM : op.isBit(256) ? Operand::YMM : Operand::ZMM;
2279 opVex(x.copyAndSetKind(kind), &xm0, op, type, code);
2280 }
cvtIdx0(const Operand & x)2281 const Xmm& cvtIdx0(const Operand& x) const
2282 {
2283 return x.isZMM() ? zm0 : x.isYMM() ? ym0 : xm0;
2284 }
2285 // support (x, x/m, imm), (y, y/m, imm)
2286 void opAVX_X_XM_IMM(const Xmm& x, const Operand& op, int type, int code, int imm8 = NONE)
2287 {
2288 opAVX_X_X_XM(x, cvtIdx0(x), op, type, code, imm8);
2289 }
2290 // QQQ:need to refactor
opSp1(const Reg & reg,const Operand & op,uint8_t pref,uint8_t code0,uint8_t code1)2291 void opSp1(const Reg& reg, const Operand& op, uint8_t pref, uint8_t code0, uint8_t code1)
2292 {
2293 if (reg.isBit(8)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER)
2294 bool is16bit = reg.isREG(16) && (op.isREG(16) || op.isMEM());
2295 if (!is16bit && !(reg.isREG(i32e) && (op.isREG(reg.getBit()) || op.isMEM()))) XBYAK_THROW(ERR_BAD_COMBINATION)
2296 if (is16bit) db(0x66);
2297 db(pref); opModRM(reg.changeBit(i32e == 32 ? 32 : reg.getBit()), op, op.isREG(), true, code0, code1);
2298 }
opGather(const Xmm & x1,const Address & addr,const Xmm & x2,int type,uint8_t code,int mode)2299 void opGather(const Xmm& x1, const Address& addr, const Xmm& x2, int type, uint8_t code, int mode)
2300 {
2301 const RegExp& regExp = addr.getRegExp();
2302 if (!regExp.isVsib(128 | 256)) XBYAK_THROW(ERR_BAD_VSIB_ADDRESSING)
2303 const int y_vx_y = 0;
2304 const int y_vy_y = 1;
2305 // const int x_vy_x = 2;
2306 const bool isAddrYMM = regExp.getIndex().getBit() == 256;
2307 if (!x1.isXMM() || isAddrYMM || !x2.isXMM()) {
2308 bool isOK = false;
2309 if (mode == y_vx_y) {
2310 isOK = x1.isYMM() && !isAddrYMM && x2.isYMM();
2311 } else if (mode == y_vy_y) {
2312 isOK = x1.isYMM() && isAddrYMM && x2.isYMM();
2313 } else { // x_vy_x
2314 isOK = !x1.isYMM() && isAddrYMM && !x2.isYMM();
2315 }
2316 if (!isOK) XBYAK_THROW(ERR_BAD_VSIB_ADDRESSING)
2317 }
2318 int i1 = x1.getIdx();
2319 int i2 = regExp.getIndex().getIdx();
2320 int i3 = x2.getIdx();
2321 if (i1 == i2 || i1 == i3 || i2 == i3) XBYAK_THROW(ERR_SAME_REGS_ARE_INVALID);
2322 opAVX_X_X_XM(isAddrYMM ? Ymm(i1) : x1, isAddrYMM ? Ymm(i3) : x2, addr, type, code);
2323 }
2324 enum {
2325 xx_yy_zz = 0,
2326 xx_yx_zy = 1,
2327 xx_xy_yz = 2
2328 };
checkGather2(const Xmm & x1,const Reg & x2,int mode)2329 void checkGather2(const Xmm& x1, const Reg& x2, int mode) const
2330 {
2331 if (x1.isXMM() && x2.isXMM()) return;
2332 switch (mode) {
2333 case xx_yy_zz: if ((x1.isYMM() && x2.isYMM()) || (x1.isZMM() && x2.isZMM())) return;
2334 break;
2335 case xx_yx_zy: if ((x1.isYMM() && x2.isXMM()) || (x1.isZMM() && x2.isYMM())) return;
2336 break;
2337 case xx_xy_yz: if ((x1.isXMM() && x2.isYMM()) || (x1.isYMM() && x2.isZMM())) return;
2338 break;
2339 }
2340 XBYAK_THROW(ERR_BAD_VSIB_ADDRESSING)
2341 }
opGather2(const Xmm & x,const Address & addr,int type,uint8_t code,int mode)2342 void opGather2(const Xmm& x, const Address& addr, int type, uint8_t code, int mode)
2343 {
2344 if (x.hasZero()) XBYAK_THROW(ERR_INVALID_ZERO)
2345 const RegExp& regExp = addr.getRegExp();
2346 checkGather2(x, regExp.getIndex(), mode);
2347 int maskIdx = x.getOpmaskIdx();
2348 if ((type & T_M_K) && addr.getOpmaskIdx()) maskIdx = addr.getOpmaskIdx();
2349 if (maskIdx == 0) XBYAK_THROW(ERR_K0_IS_INVALID);
2350 if (!(type & T_M_K) && x.getIdx() == regExp.getIndex().getIdx()) XBYAK_THROW(ERR_SAME_REGS_ARE_INVALID);
2351 opVex(x, 0, addr, type, code);
2352 }
2353 /*
2354 xx_xy_yz ; mode = true
2355 xx_xy_xz ; mode = false
2356 */
opVmov(const Operand & op,const Xmm & x,int type,uint8_t code,bool mode)2357 void opVmov(const Operand& op, const Xmm& x, int type, uint8_t code, bool mode)
2358 {
2359 if (mode) {
2360 if (!op.isMEM() && !((op.isXMM() && x.isXMM()) || (op.isXMM() && x.isYMM()) || (op.isYMM() && x.isZMM()))) XBYAK_THROW(ERR_BAD_COMBINATION)
2361 } else {
2362 if (!op.isMEM() && !op.isXMM()) XBYAK_THROW(ERR_BAD_COMBINATION)
2363 }
2364 opVex(x, 0, op, type, code);
2365 }
opGatherFetch(const Address & addr,const Xmm & x,int type,uint8_t code,Operand::Kind kind)2366 void opGatherFetch(const Address& addr, const Xmm& x, int type, uint8_t code, Operand::Kind kind)
2367 {
2368 if (addr.hasZero()) XBYAK_THROW(ERR_INVALID_ZERO)
2369 if (addr.getRegExp().getIndex().getKind() != kind) XBYAK_THROW(ERR_BAD_VSIB_ADDRESSING)
2370 opVex(x, 0, addr, type, code);
2371 }
opVnni(const Xmm & x1,const Xmm & x2,const Operand & op,int type,int code0,PreferredEncoding encoding)2372 void opVnni(const Xmm& x1, const Xmm& x2, const Operand& op, int type, int code0, PreferredEncoding encoding)
2373 {
2374 if (encoding == DefaultEncoding) {
2375 encoding = EvexEncoding;
2376 }
2377 if (encoding == EvexEncoding) {
2378 #ifdef XBYAK_DISABLE_AVX512
2379 XBYAK_THROW(ERR_EVEX_IS_INVALID)
2380 #endif
2381 type |= T_MUST_EVEX;
2382 }
2383 opAVX_X_X_XM(x1, x2, op, type, code0);
2384 }
opInOut(const Reg & a,const Reg & d,uint8_t code)2385 void opInOut(const Reg& a, const Reg& d, uint8_t code)
2386 {
2387 if (a.getIdx() == Operand::AL && d.getIdx() == Operand::DX && d.getBit() == 16) {
2388 switch (a.getBit()) {
2389 case 8: db(code); return;
2390 case 16: db(0x66); db(code + 1); return;
2391 case 32: db(code + 1); return;
2392 }
2393 }
2394 XBYAK_THROW(ERR_BAD_COMBINATION)
2395 }
opInOut(const Reg & a,uint8_t code,uint8_t v)2396 void opInOut(const Reg& a, uint8_t code, uint8_t v)
2397 {
2398 if (a.getIdx() == Operand::AL) {
2399 switch (a.getBit()) {
2400 case 8: db(code); db(v); return;
2401 case 16: db(0x66); db(code + 1); db(v); return;
2402 case 32: db(code + 1); db(v); return;
2403 }
2404 }
2405 XBYAK_THROW(ERR_BAD_COMBINATION)
2406 }
2407 #ifdef XBYAK64
opAMX(const Tmm & t1,const Address & addr,int type,int code0)2408 void opAMX(const Tmm& t1, const Address& addr, int type, int code0)
2409 {
2410 // require both base and index
2411 const RegExp exp = addr.getRegExp(false);
2412 if (exp.getBase().getBit() == 0 || exp.getIndex().getBit() == 0) XBYAK_THROW(ERR_NOT_SUPPORTED)
2413 opVex(t1, &tmm0, addr, type, code0);
2414 }
2415 #endif
2416 public:
getVersion()2417 unsigned int getVersion() const { return VERSION; }
2418 using CodeArray::db;
2419 const Mmx mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7;
2420 const Xmm xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;
2421 const Ymm ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7;
2422 const Zmm zmm0, zmm1, zmm2, zmm3, zmm4, zmm5, zmm6, zmm7;
2423 const Xmm &xm0, &xm1, &xm2, &xm3, &xm4, &xm5, &xm6, &xm7;
2424 const Ymm &ym0, &ym1, &ym2, &ym3, &ym4, &ym5, &ym6, &ym7;
2425 const Zmm &zm0, &zm1, &zm2, &zm3, &zm4, &zm5, &zm6, &zm7;
2426 const Reg32 eax, ecx, edx, ebx, esp, ebp, esi, edi;
2427 const Reg16 ax, cx, dx, bx, sp, bp, si, di;
2428 const Reg8 al, cl, dl, bl, ah, ch, dh, bh;
2429 const AddressFrame ptr, byte, word, dword, qword, xword, yword, zword; // xword is same as oword of NASM
2430 const AddressFrame ptr_b, xword_b, yword_b, zword_b; // broadcast such as {1to2}, {1to4}, {1to8}, {1to16}, {b}
2431 const Fpu st0, st1, st2, st3, st4, st5, st6, st7;
2432 const Opmask k0, k1, k2, k3, k4, k5, k6, k7;
2433 const BoundsReg bnd0, bnd1, bnd2, bnd3;
2434 const EvexModifierRounding T_sae, T_rn_sae, T_rd_sae, T_ru_sae, T_rz_sae; // {sae}, {rn-sae}, {rd-sae}, {ru-sae}, {rz-sae}
2435 const EvexModifierZero T_z; // {z}
2436 #ifdef XBYAK64
2437 const Reg64 rax, rcx, rdx, rbx, rsp, rbp, rsi, rdi, r8, r9, r10, r11, r12, r13, r14, r15;
2438 const Reg32 r8d, r9d, r10d, r11d, r12d, r13d, r14d, r15d;
2439 const Reg16 r8w, r9w, r10w, r11w, r12w, r13w, r14w, r15w;
2440 const Reg8 r8b, r9b, r10b, r11b, r12b, r13b, r14b, r15b;
2441 const Reg8 spl, bpl, sil, dil;
2442 const Xmm xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15;
2443 const Xmm xmm16, xmm17, xmm18, xmm19, xmm20, xmm21, xmm22, xmm23;
2444 const Xmm xmm24, xmm25, xmm26, xmm27, xmm28, xmm29, xmm30, xmm31;
2445 const Ymm ymm8, ymm9, ymm10, ymm11, ymm12, ymm13, ymm14, ymm15;
2446 const Ymm ymm16, ymm17, ymm18, ymm19, ymm20, ymm21, ymm22, ymm23;
2447 const Ymm ymm24, ymm25, ymm26, ymm27, ymm28, ymm29, ymm30, ymm31;
2448 const Zmm zmm8, zmm9, zmm10, zmm11, zmm12, zmm13, zmm14, zmm15;
2449 const Zmm zmm16, zmm17, zmm18, zmm19, zmm20, zmm21, zmm22, zmm23;
2450 const Zmm zmm24, zmm25, zmm26, zmm27, zmm28, zmm29, zmm30, zmm31;
2451 const Tmm tmm0, tmm1, tmm2, tmm3, tmm4, tmm5, tmm6, tmm7;
2452 const Xmm &xm8, &xm9, &xm10, &xm11, &xm12, &xm13, &xm14, &xm15; // for my convenience
2453 const Xmm &xm16, &xm17, &xm18, &xm19, &xm20, &xm21, &xm22, &xm23;
2454 const Xmm &xm24, &xm25, &xm26, &xm27, &xm28, &xm29, &xm30, &xm31;
2455 const Ymm &ym8, &ym9, &ym10, &ym11, &ym12, &ym13, &ym14, &ym15;
2456 const Ymm &ym16, &ym17, &ym18, &ym19, &ym20, &ym21, &ym22, &ym23;
2457 const Ymm &ym24, &ym25, &ym26, &ym27, &ym28, &ym29, &ym30, &ym31;
2458 const Zmm &zm8, &zm9, &zm10, &zm11, &zm12, &zm13, &zm14, &zm15;
2459 const Zmm &zm16, &zm17, &zm18, &zm19, &zm20, &zm21, &zm22, &zm23;
2460 const Zmm &zm24, &zm25, &zm26, &zm27, &zm28, &zm29, &zm30, &zm31;
2461 const RegRip rip;
2462 #endif
2463 #ifndef XBYAK_DISABLE_SEGMENT
2464 const Segment es, cs, ss, ds, fs, gs;
2465 #endif
2466 private:
2467 bool isDefaultJmpNEAR_;
2468 public:
L(const std::string & label)2469 void L(const std::string& label) { labelMgr_.defineSlabel(label); }
L(Label & label)2470 void L(Label& label) { labelMgr_.defineClabel(label); }
L()2471 Label L() { Label label; L(label); return label; }
inLocalLabel()2472 void inLocalLabel() { labelMgr_.enterLocal(); }
outLocalLabel()2473 void outLocalLabel() { labelMgr_.leaveLocal(); }
2474 /*
2475 assign src to dst
2476 require
2477 dst : does not used by L()
2478 src : used by L()
2479 */
assignL(Label & dst,const Label & src)2480 void assignL(Label& dst, const Label& src) { labelMgr_.assign(dst, src); }
2481 /*
2482 put address of label to buffer
2483 @note the put size is 4(32-bit), 8(64-bit)
2484 */
putL(std::string label)2485 void putL(std::string label) { putL_inner(label); }
putL(const Label & label)2486 void putL(const Label& label) { putL_inner(label); }
2487
2488 // set default type of `jmp` of undefined label to T_NEAR
setDefaultJmpNEAR(bool isNear)2489 void setDefaultJmpNEAR(bool isNear) { isDefaultJmpNEAR_ = isNear; }
2490 void jmp(const Operand& op, LabelType type = T_AUTO) { opJmpOp(op, type, 4); }
2491 void jmp(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0xEB, 0xE9, 0); }
2492 void jmp(const char *label, LabelType type = T_AUTO) { jmp(std::string(label), type); }
2493 void jmp(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0xEB, 0xE9, 0); }
2494 void jmp(const void *addr, LabelType type = T_AUTO) { opJmpAbs(addr, type, 0xEB, 0xE9); }
2495
2496 void call(const Operand& op, LabelType type = T_AUTO) { opJmpOp(op, type, 2); }
2497 // call(string label), not const std::string&
call(std::string label)2498 void call(std::string label) { opJmp(label, T_NEAR, 0, 0xE8, 0); }
call(const char * label)2499 void call(const char *label) { call(std::string(label)); }
call(const Label & label)2500 void call(const Label& label) { opJmp(label, T_NEAR, 0, 0xE8, 0); }
2501 // call(function pointer)
2502 #ifdef XBYAK_VARIADIC_TEMPLATE
2503 template<class Ret, class... Params>
call(Ret (* func)(Params...))2504 void call(Ret(*func)(Params...)) { call(reinterpret_cast<const void*>(func)); }
2505 #endif
call(const void * addr)2506 void call(const void *addr) { opJmpAbs(addr, T_NEAR, 0, 0xE8); }
2507
test(const Operand & op,const Reg & reg)2508 void test(const Operand& op, const Reg& reg)
2509 {
2510 opModRM(reg, op, op.isREG() && (op.getKind() == reg.getKind()), op.isMEM(), 0x84);
2511 }
test(const Operand & op,uint32_t imm)2512 void test(const Operand& op, uint32_t imm)
2513 {
2514 verifyMemHasSize(op);
2515 int immSize = (std::min)(op.getBit() / 8, 4U);
2516 if (op.isREG() && op.getIdx() == 0) { // al, ax, eax
2517 rex(op);
2518 db(0xA8 | (op.isBit(8) ? 0 : 1));
2519 } else {
2520 opR_ModM(op, 0, 0, 0xF6, NONE, NONE, false, immSize);
2521 }
2522 db(imm, immSize);
2523 }
imul(const Reg & reg,const Operand & op)2524 void imul(const Reg& reg, const Operand& op)
2525 {
2526 opModRM(reg, op, op.isREG() && (reg.getKind() == op.getKind()), op.isMEM(), 0x0F, 0xAF);
2527 }
imul(const Reg & reg,const Operand & op,int imm)2528 void imul(const Reg& reg, const Operand& op, int imm)
2529 {
2530 int s = inner::IsInDisp8(imm) ? 1 : 0;
2531 int immSize = s ? 1 : reg.isREG(16) ? 2 : 4;
2532 opModRM(reg, op, op.isREG() && (reg.getKind() == op.getKind()), op.isMEM(), 0x69 | (s << 1), NONE, NONE, immSize);
2533 db(imm, immSize);
2534 }
push(const Operand & op)2535 void push(const Operand& op) { opPushPop(op, 0xFF, 6, 0x50); }
pop(const Operand & op)2536 void pop(const Operand& op) { opPushPop(op, 0x8F, 0, 0x58); }
push(const AddressFrame & af,uint32_t imm)2537 void push(const AddressFrame& af, uint32_t imm)
2538 {
2539 if (af.bit_ == 8) {
2540 db(0x6A); db(imm);
2541 } else if (af.bit_ == 16) {
2542 db(0x66); db(0x68); dw(imm);
2543 } else {
2544 db(0x68); dd(imm);
2545 }
2546 }
2547 /* use "push(word, 4)" if you want "push word 4" */
push(uint32_t imm)2548 void push(uint32_t imm)
2549 {
2550 if (inner::IsInDisp8(imm)) {
2551 push(byte, imm);
2552 } else {
2553 push(dword, imm);
2554 }
2555 }
mov(const Operand & reg1,const Operand & reg2)2556 void mov(const Operand& reg1, const Operand& reg2)
2557 {
2558 const Reg *reg = 0;
2559 const Address *addr = 0;
2560 uint8_t code = 0;
2561 if (reg1.isREG() && reg1.getIdx() == 0 && reg2.isMEM()) { // mov eax|ax|al, [disp]
2562 reg = ®1.getReg();
2563 addr= ®2.getAddress();
2564 code = 0xA0;
2565 } else
2566 if (reg1.isMEM() && reg2.isREG() && reg2.getIdx() == 0) { // mov [disp], eax|ax|al
2567 reg = ®2.getReg();
2568 addr= ®1.getAddress();
2569 code = 0xA2;
2570 }
2571 #ifdef XBYAK64
2572 if (addr && addr->is64bitDisp()) {
2573 if (code) {
2574 rex(*reg);
2575 db(reg1.isREG(8) ? 0xA0 : reg1.isREG() ? 0xA1 : reg2.isREG(8) ? 0xA2 : 0xA3);
2576 db(addr->getDisp(), 8);
2577 } else {
2578 XBYAK_THROW(ERR_BAD_COMBINATION)
2579 }
2580 } else
2581 #else
2582 if (code && addr->isOnlyDisp()) {
2583 rex(*reg, *addr);
2584 db(code | (reg->isBit(8) ? 0 : 1));
2585 dd(static_cast<uint32_t>(addr->getDisp()));
2586 } else
2587 #endif
2588 {
2589 opRM_RM(reg1, reg2, 0x88);
2590 }
2591 }
mov(const Operand & op,uint64_t imm)2592 void mov(const Operand& op, uint64_t imm)
2593 {
2594 if (op.isREG()) {
2595 const int size = mov_imm(op.getReg(), imm);
2596 db(imm, size);
2597 } else if (op.isMEM()) {
2598 verifyMemHasSize(op);
2599 int immSize = op.getBit() / 8;
2600 if (immSize <= 4) {
2601 int64_t s = int64_t(imm) >> (immSize * 8);
2602 if (s != 0 && s != -1) XBYAK_THROW(ERR_IMM_IS_TOO_BIG)
2603 } else {
2604 if (!inner::IsInInt32(imm)) XBYAK_THROW(ERR_IMM_IS_TOO_BIG)
2605 immSize = 4;
2606 }
2607 opModM(op.getAddress(), Reg(0, Operand::REG, op.getBit()), 0xC6, NONE, NONE, immSize);
2608 db(static_cast<uint32_t>(imm), immSize);
2609 } else {
2610 XBYAK_THROW(ERR_BAD_COMBINATION)
2611 }
2612 }
2613
2614 // The template is used to avoid ambiguity when the 2nd argument is 0.
2615 // When the 2nd argument is 0 the call goes to
2616 // `void mov(const Operand& op, uint64_t imm)`.
2617 template <typename T1, typename T2>
mov(const T1 &,const T2 *)2618 void mov(const T1&, const T2 *) { T1::unexpected; }
mov(const NativeReg & reg,const Label & label)2619 void mov(const NativeReg& reg, const Label& label)
2620 {
2621 mov_imm(reg, dummyAddr);
2622 putL(label);
2623 }
xchg(const Operand & op1,const Operand & op2)2624 void xchg(const Operand& op1, const Operand& op2)
2625 {
2626 const Operand *p1 = &op1, *p2 = &op2;
2627 if (p1->isMEM() || (p2->isREG(16 | i32e) && p2->getIdx() == 0)) {
2628 p1 = &op2; p2 = &op1;
2629 }
2630 if (p1->isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION)
2631 if (p2->isREG() && (p1->isREG(16 | i32e) && p1->getIdx() == 0)
2632 #ifdef XBYAK64
2633 && (p2->getIdx() != 0 || !p1->isREG(32))
2634 #endif
2635 ) {
2636 rex(*p2, *p1); db(0x90 | (p2->getIdx() & 7));
2637 return;
2638 }
2639 opModRM(*p1, *p2, (p1->isREG() && p2->isREG() && (p1->getBit() == p2->getBit())), p2->isMEM(), 0x86 | (p1->isBit(8) ? 0 : 1));
2640 }
2641
2642 #ifndef XBYAK_DISABLE_SEGMENT
push(const Segment & seg)2643 void push(const Segment& seg)
2644 {
2645 switch (seg.getIdx()) {
2646 case Segment::es: db(0x06); break;
2647 case Segment::cs: db(0x0E); break;
2648 case Segment::ss: db(0x16); break;
2649 case Segment::ds: db(0x1E); break;
2650 case Segment::fs: db(0x0F); db(0xA0); break;
2651 case Segment::gs: db(0x0F); db(0xA8); break;
2652 default:
2653 assert(0);
2654 }
2655 }
pop(const Segment & seg)2656 void pop(const Segment& seg)
2657 {
2658 switch (seg.getIdx()) {
2659 case Segment::es: db(0x07); break;
2660 case Segment::cs: XBYAK_THROW(ERR_BAD_COMBINATION)
2661 case Segment::ss: db(0x17); break;
2662 case Segment::ds: db(0x1F); break;
2663 case Segment::fs: db(0x0F); db(0xA1); break;
2664 case Segment::gs: db(0x0F); db(0xA9); break;
2665 default:
2666 assert(0);
2667 }
2668 }
putSeg(const Segment & seg)2669 void putSeg(const Segment& seg)
2670 {
2671 switch (seg.getIdx()) {
2672 case Segment::es: db(0x2E); break;
2673 case Segment::cs: db(0x36); break;
2674 case Segment::ss: db(0x3E); break;
2675 case Segment::ds: db(0x26); break;
2676 case Segment::fs: db(0x64); break;
2677 case Segment::gs: db(0x65); break;
2678 default:
2679 assert(0);
2680 }
2681 }
mov(const Operand & op,const Segment & seg)2682 void mov(const Operand& op, const Segment& seg)
2683 {
2684 opModRM(Reg8(seg.getIdx()), op, op.isREG(16|i32e), op.isMEM(), 0x8C);
2685 }
mov(const Segment & seg,const Operand & op)2686 void mov(const Segment& seg, const Operand& op)
2687 {
2688 opModRM(Reg8(seg.getIdx()), op.isREG(16|i32e) ? static_cast<const Operand&>(op.getReg().cvt32()) : op, op.isREG(16|i32e), op.isMEM(), 0x8E);
2689 }
2690 #endif
2691
2692 enum { NONE = 256 };
2693 // constructor
2694 CodeGenerator(size_t maxSize = DEFAULT_MAX_CODE_SIZE, void *userPtr = 0, Allocator *allocator = 0)
CodeArray(maxSize,userPtr,allocator)2695 : CodeArray(maxSize, userPtr, allocator)
2696 , mm0(0), mm1(1), mm2(2), mm3(3), mm4(4), mm5(5), mm6(6), mm7(7)
2697 , xmm0(0), xmm1(1), xmm2(2), xmm3(3), xmm4(4), xmm5(5), xmm6(6), xmm7(7)
2698 , ymm0(0), ymm1(1), ymm2(2), ymm3(3), ymm4(4), ymm5(5), ymm6(6), ymm7(7)
2699 , zmm0(0), zmm1(1), zmm2(2), zmm3(3), zmm4(4), zmm5(5), zmm6(6), zmm7(7)
2700 // for my convenience
2701 , xm0(xmm0), xm1(xmm1), xm2(xmm2), xm3(xmm3), xm4(xmm4), xm5(xmm5), xm6(xmm6), xm7(xmm7)
2702 , ym0(ymm0), ym1(ymm1), ym2(ymm2), ym3(ymm3), ym4(ymm4), ym5(ymm5), ym6(ymm6), ym7(ymm7)
2703 , zm0(zmm0), zm1(zmm1), zm2(zmm2), zm3(zmm3), zm4(zmm4), zm5(zmm5), zm6(zmm6), zm7(zmm7)
2704
2705 , eax(Operand::EAX), ecx(Operand::ECX), edx(Operand::EDX), ebx(Operand::EBX), esp(Operand::ESP), ebp(Operand::EBP), esi(Operand::ESI), edi(Operand::EDI)
2706 , ax(Operand::AX), cx(Operand::CX), dx(Operand::DX), bx(Operand::BX), sp(Operand::SP), bp(Operand::BP), si(Operand::SI), di(Operand::DI)
2707 , al(Operand::AL), cl(Operand::CL), dl(Operand::DL), bl(Operand::BL), ah(Operand::AH), ch(Operand::CH), dh(Operand::DH), bh(Operand::BH)
2708 , ptr(0), byte(8), word(16), dword(32), qword(64), xword(128), yword(256), zword(512)
2709 , ptr_b(0, true), xword_b(128, true), yword_b(256, true), zword_b(512, true)
2710 , st0(0), st1(1), st2(2), st3(3), st4(4), st5(5), st6(6), st7(7)
2711 , k0(0), k1(1), k2(2), k3(3), k4(4), k5(5), k6(6), k7(7)
2712 , bnd0(0), bnd1(1), bnd2(2), bnd3(3)
2713 , T_sae(EvexModifierRounding::T_SAE), T_rn_sae(EvexModifierRounding::T_RN_SAE), T_rd_sae(EvexModifierRounding::T_RD_SAE), T_ru_sae(EvexModifierRounding::T_RU_SAE), T_rz_sae(EvexModifierRounding::T_RZ_SAE)
2714 , T_z()
2715 #ifdef XBYAK64
2716 , rax(Operand::RAX), rcx(Operand::RCX), rdx(Operand::RDX), rbx(Operand::RBX), rsp(Operand::RSP), rbp(Operand::RBP), rsi(Operand::RSI), rdi(Operand::RDI), r8(Operand::R8), r9(Operand::R9), r10(Operand::R10), r11(Operand::R11), r12(Operand::R12), r13(Operand::R13), r14(Operand::R14), r15(Operand::R15)
2717 , r8d(8), r9d(9), r10d(10), r11d(11), r12d(12), r13d(13), r14d(14), r15d(15)
2718 , r8w(8), r9w(9), r10w(10), r11w(11), r12w(12), r13w(13), r14w(14), r15w(15)
2719 , r8b(8), r9b(9), r10b(10), r11b(11), r12b(12), r13b(13), r14b(14), r15b(15)
2720 , spl(Operand::SPL, true), bpl(Operand::BPL, true), sil(Operand::SIL, true), dil(Operand::DIL, true)
2721 , xmm8(8), xmm9(9), xmm10(10), xmm11(11), xmm12(12), xmm13(13), xmm14(14), xmm15(15)
2722 , xmm16(16), xmm17(17), xmm18(18), xmm19(19), xmm20(20), xmm21(21), xmm22(22), xmm23(23)
2723 , xmm24(24), xmm25(25), xmm26(26), xmm27(27), xmm28(28), xmm29(29), xmm30(30), xmm31(31)
2724 , ymm8(8), ymm9(9), ymm10(10), ymm11(11), ymm12(12), ymm13(13), ymm14(14), ymm15(15)
2725 , ymm16(16), ymm17(17), ymm18(18), ymm19(19), ymm20(20), ymm21(21), ymm22(22), ymm23(23)
2726 , ymm24(24), ymm25(25), ymm26(26), ymm27(27), ymm28(28), ymm29(29), ymm30(30), ymm31(31)
2727 , zmm8(8), zmm9(9), zmm10(10), zmm11(11), zmm12(12), zmm13(13), zmm14(14), zmm15(15)
2728 , zmm16(16), zmm17(17), zmm18(18), zmm19(19), zmm20(20), zmm21(21), zmm22(22), zmm23(23)
2729 , zmm24(24), zmm25(25), zmm26(26), zmm27(27), zmm28(28), zmm29(29), zmm30(30), zmm31(31)
2730 , tmm0(0), tmm1(1), tmm2(2), tmm3(3), tmm4(4), tmm5(5), tmm6(6), tmm7(7)
2731 // for my convenience
2732 , xm8(xmm8), xm9(xmm9), xm10(xmm10), xm11(xmm11), xm12(xmm12), xm13(xmm13), xm14(xmm14), xm15(xmm15)
2733 , xm16(xmm16), xm17(xmm17), xm18(xmm18), xm19(xmm19), xm20(xmm20), xm21(xmm21), xm22(xmm22), xm23(xmm23)
2734 , xm24(xmm24), xm25(xmm25), xm26(xmm26), xm27(xmm27), xm28(xmm28), xm29(xmm29), xm30(xmm30), xm31(xmm31)
2735 , ym8(ymm8), ym9(ymm9), ym10(ymm10), ym11(ymm11), ym12(ymm12), ym13(ymm13), ym14(ymm14), ym15(ymm15)
2736 , ym16(ymm16), ym17(ymm17), ym18(ymm18), ym19(ymm19), ym20(ymm20), ym21(ymm21), ym22(ymm22), ym23(ymm23)
2737 , ym24(ymm24), ym25(ymm25), ym26(ymm26), ym27(ymm27), ym28(ymm28), ym29(ymm29), ym30(ymm30), ym31(ymm31)
2738 , zm8(zmm8), zm9(zmm9), zm10(zmm10), zm11(zmm11), zm12(zmm12), zm13(zmm13), zm14(zmm14), zm15(zmm15)
2739 , zm16(zmm16), zm17(zmm17), zm18(zmm18), zm19(zmm19), zm20(zmm20), zm21(zmm21), zm22(zmm22), zm23(zmm23)
2740 , zm24(zmm24), zm25(zmm25), zm26(zmm26), zm27(zmm27), zm28(zmm28), zm29(zmm29), zm30(zmm30), zm31(zmm31)
2741 , rip()
2742 #endif
2743 #ifndef XBYAK_DISABLE_SEGMENT
2744 , es(Segment::es), cs(Segment::cs), ss(Segment::ss), ds(Segment::ds), fs(Segment::fs), gs(Segment::gs)
2745 #endif
2746 , isDefaultJmpNEAR_(false)
2747 {
2748 labelMgr_.set(this);
2749 }
reset()2750 void reset()
2751 {
2752 resetSize();
2753 labelMgr_.reset();
2754 labelMgr_.set(this);
2755 }
hasUndefinedLabel()2756 bool hasUndefinedLabel() const { return labelMgr_.hasUndefSlabel() || labelMgr_.hasUndefClabel(); }
2757 /*
2758 MUST call ready() to complete generating code if you use AutoGrow mode.
2759 It is not necessary for the other mode if hasUndefinedLabel() is true.
2760 */
2761 void ready(ProtectMode mode = PROTECT_RWE)
2762 {
2763 if (hasUndefinedLabel()) XBYAK_THROW(ERR_LABEL_IS_NOT_FOUND)
2764 if (isAutoGrow()) {
2765 calcJmpAddress();
2766 if (useProtect()) setProtectMode(mode);
2767 }
2768 }
2769 // set read/exec
readyRE()2770 void readyRE() { return ready(PROTECT_RE); }
2771 #ifdef XBYAK_TEST
2772 void dump(bool doClear = true)
2773 {
2774 CodeArray::dump();
2775 if (doClear) size_ = 0;
2776 }
2777 #endif
2778
2779 #ifdef XBYAK_UNDEF_JNL
2780 #undef jnl
2781 #endif
2782
2783 /*
2784 use single byte nop if useMultiByteNop = false
2785 */
2786 void nop(size_t size = 1, bool useMultiByteNop = true)
2787 {
2788 if (!useMultiByteNop) {
2789 for (size_t i = 0; i < size; i++) {
2790 db(0x90);
2791 }
2792 return;
2793 }
2794 /*
2795 Intel Architectures Software Developer's Manual Volume 2
2796 recommended multi-byte sequence of NOP instruction
2797 AMD and Intel seem to agree on the same sequences for up to 9 bytes:
2798 https://support.amd.com/TechDocs/55723_SOG_Fam_17h_Processors_3.00.pdf
2799 */
2800 static const uint8_t nopTbl[9][9] = {
2801 {0x90},
2802 {0x66, 0x90},
2803 {0x0F, 0x1F, 0x00},
2804 {0x0F, 0x1F, 0x40, 0x00},
2805 {0x0F, 0x1F, 0x44, 0x00, 0x00},
2806 {0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00},
2807 {0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00},
2808 {0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
2809 {0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
2810 };
2811 const size_t n = sizeof(nopTbl) / sizeof(nopTbl[0]);
2812 while (size > 0) {
2813 size_t len = (std::min)(n, size);
2814 const uint8_t *seq = nopTbl[len - 1];
2815 db(seq, len);
2816 size -= len;
2817 }
2818 }
2819
2820 #ifndef XBYAK_DONT_READ_LIST
2821 #include "xbyak_mnemonic.h"
2822 /*
2823 use single byte nop if useMultiByteNop = false
2824 */
2825 void align(size_t x = 16, bool useMultiByteNop = true)
2826 {
2827 if (x == 1) return;
2828 if (x < 1 || (x & (x - 1))) XBYAK_THROW(ERR_BAD_ALIGN)
2829 if (isAutoGrow() && x > inner::ALIGN_PAGE_SIZE) fprintf(stderr, "warning:autoGrow mode does not support %d align\n", (int)x);
2830 size_t remain = size_t(getCurr()) % x;
2831 if (remain) {
2832 nop(x - remain, useMultiByteNop);
2833 }
2834 }
2835 #endif
2836 };
2837
2838 template <>
mov(const NativeReg & reg,const char * label)2839 inline void CodeGenerator::mov(const NativeReg& reg, const char *label) // can't use std::string
2840 {
2841 assert(label);
2842 mov_imm(reg, dummyAddr);
2843 putL(label);
2844 }
2845
2846 namespace util {
2847 static const XBYAK_CONSTEXPR Mmx mm0(0), mm1(1), mm2(2), mm3(3), mm4(4), mm5(5), mm6(6), mm7(7);
2848 static const XBYAK_CONSTEXPR Xmm xmm0(0), xmm1(1), xmm2(2), xmm3(3), xmm4(4), xmm5(5), xmm6(6), xmm7(7);
2849 static const XBYAK_CONSTEXPR Ymm ymm0(0), ymm1(1), ymm2(2), ymm3(3), ymm4(4), ymm5(5), ymm6(6), ymm7(7);
2850 static const XBYAK_CONSTEXPR Zmm zmm0(0), zmm1(1), zmm2(2), zmm3(3), zmm4(4), zmm5(5), zmm6(6), zmm7(7);
2851 static const XBYAK_CONSTEXPR Reg32 eax(Operand::EAX), ecx(Operand::ECX), edx(Operand::EDX), ebx(Operand::EBX), esp(Operand::ESP), ebp(Operand::EBP), esi(Operand::ESI), edi(Operand::EDI);
2852 static const XBYAK_CONSTEXPR Reg16 ax(Operand::AX), cx(Operand::CX), dx(Operand::DX), bx(Operand::BX), sp(Operand::SP), bp(Operand::BP), si(Operand::SI), di(Operand::DI);
2853 static const XBYAK_CONSTEXPR Reg8 al(Operand::AL), cl(Operand::CL), dl(Operand::DL), bl(Operand::BL), ah(Operand::AH), ch(Operand::CH), dh(Operand::DH), bh(Operand::BH);
2854 static const XBYAK_CONSTEXPR AddressFrame ptr(0), byte(8), word(16), dword(32), qword(64), xword(128), yword(256), zword(512);
2855 static const XBYAK_CONSTEXPR AddressFrame ptr_b(0, true), xword_b(128, true), yword_b(256, true), zword_b(512, true);
2856 static const XBYAK_CONSTEXPR Fpu st0(0), st1(1), st2(2), st3(3), st4(4), st5(5), st6(6), st7(7);
2857 static const XBYAK_CONSTEXPR Opmask k0(0), k1(1), k2(2), k3(3), k4(4), k5(5), k6(6), k7(7);
2858 static const XBYAK_CONSTEXPR BoundsReg bnd0(0), bnd1(1), bnd2(2), bnd3(3);
2859 static const XBYAK_CONSTEXPR EvexModifierRounding T_sae(EvexModifierRounding::T_SAE), T_rn_sae(EvexModifierRounding::T_RN_SAE), T_rd_sae(EvexModifierRounding::T_RD_SAE), T_ru_sae(EvexModifierRounding::T_RU_SAE), T_rz_sae(EvexModifierRounding::T_RZ_SAE);
2860 static const XBYAK_CONSTEXPR EvexModifierZero T_z;
2861 #ifdef XBYAK64
2862 static const XBYAK_CONSTEXPR Reg64 rax(Operand::RAX), rcx(Operand::RCX), rdx(Operand::RDX), rbx(Operand::RBX), rsp(Operand::RSP), rbp(Operand::RBP), rsi(Operand::RSI), rdi(Operand::RDI), r8(Operand::R8), r9(Operand::R9), r10(Operand::R10), r11(Operand::R11), r12(Operand::R12), r13(Operand::R13), r14(Operand::R14), r15(Operand::R15);
2863 static const XBYAK_CONSTEXPR Reg32 r8d(8), r9d(9), r10d(10), r11d(11), r12d(12), r13d(13), r14d(14), r15d(15);
2864 static const XBYAK_CONSTEXPR Reg16 r8w(8), r9w(9), r10w(10), r11w(11), r12w(12), r13w(13), r14w(14), r15w(15);
2865 static const XBYAK_CONSTEXPR Reg8 r8b(8), r9b(9), r10b(10), r11b(11), r12b(12), r13b(13), r14b(14), r15b(15), spl(Operand::SPL, true), bpl(Operand::BPL, true), sil(Operand::SIL, true), dil(Operand::DIL, true);
2866 static const XBYAK_CONSTEXPR Xmm xmm8(8), xmm9(9), xmm10(10), xmm11(11), xmm12(12), xmm13(13), xmm14(14), xmm15(15);
2867 static const XBYAK_CONSTEXPR Xmm xmm16(16), xmm17(17), xmm18(18), xmm19(19), xmm20(20), xmm21(21), xmm22(22), xmm23(23);
2868 static const XBYAK_CONSTEXPR Xmm xmm24(24), xmm25(25), xmm26(26), xmm27(27), xmm28(28), xmm29(29), xmm30(30), xmm31(31);
2869 static const XBYAK_CONSTEXPR Ymm ymm8(8), ymm9(9), ymm10(10), ymm11(11), ymm12(12), ymm13(13), ymm14(14), ymm15(15);
2870 static const XBYAK_CONSTEXPR Ymm ymm16(16), ymm17(17), ymm18(18), ymm19(19), ymm20(20), ymm21(21), ymm22(22), ymm23(23);
2871 static const XBYAK_CONSTEXPR Ymm ymm24(24), ymm25(25), ymm26(26), ymm27(27), ymm28(28), ymm29(29), ymm30(30), ymm31(31);
2872 static const XBYAK_CONSTEXPR Zmm zmm8(8), zmm9(9), zmm10(10), zmm11(11), zmm12(12), zmm13(13), zmm14(14), zmm15(15);
2873 static const XBYAK_CONSTEXPR Zmm zmm16(16), zmm17(17), zmm18(18), zmm19(19), zmm20(20), zmm21(21), zmm22(22), zmm23(23);
2874 static const XBYAK_CONSTEXPR Zmm zmm24(24), zmm25(25), zmm26(26), zmm27(27), zmm28(28), zmm29(29), zmm30(30), zmm31(31);
2875 static const XBYAK_CONSTEXPR Zmm tmm0(0), tmm1(1), tmm2(2), tmm3(3), tmm4(4), tmm5(5), tmm6(6), tmm7(7);
2876 static const XBYAK_CONSTEXPR RegRip rip;
2877 #endif
2878 #ifndef XBYAK_DISABLE_SEGMENT
2879 static const XBYAK_CONSTEXPR Segment es(Segment::es), cs(Segment::cs), ss(Segment::ss), ds(Segment::ds), fs(Segment::fs), gs(Segment::gs);
2880 #endif
2881 } // util
2882
2883 #ifdef _MSC_VER
2884 #pragma warning(pop)
2885 #endif
2886
2887 } // end of namespace
2888
2889 #endif // XBYAK_XBYAK_H_
2890