1 #pragma once
2 #ifndef XBYAK_XBYAK_H_
3 #define XBYAK_XBYAK_H_
4 /*!
5 	@file xbyak.h
6 	@brief Xbyak ; JIT assembler for x86(IA32)/x64 by C++
7 	@author herumi
8 	@url https://github.com/herumi/xbyak
9 	@note modified new BSD license
10 	http://opensource.org/licenses/BSD-3-Clause
11 */
12 #if (not +0) && !defined(XBYAK_NO_OP_NAMES) // trick to detect whether 'not' is operator or not
13 	#define XBYAK_NO_OP_NAMES
14 #endif
15 
16 #include <stdio.h> // for debug print
17 #include <assert.h>
18 #include <list>
19 #include <string>
20 #include <algorithm>
21 #ifndef NDEBUG
22 #include <iostream>
23 #endif
24 
25 // #define XBYAK_DISABLE_AVX512
26 
27 #if !defined(XBYAK_USE_MMAP_ALLOCATOR) && !defined(XBYAK_DONT_USE_MMAP_ALLOCATOR)
28 	#define XBYAK_USE_MMAP_ALLOCATOR
29 #endif
30 #if !defined(__GNUC__) || defined(__MINGW32__)
31 	#undef XBYAK_USE_MMAP_ALLOCATOR
32 #endif
33 
34 #ifdef __GNUC__
35 	#define XBYAK_GNUC_PREREQ(major, minor) ((__GNUC__) * 100 + (__GNUC_MINOR__) >= (major) * 100 + (minor))
36 #else
37 	#define XBYAK_GNUC_PREREQ(major, minor) 0
38 #endif
39 
40 // This covers -std=(gnu|c)++(0x|11|1y), -stdlib=libc++, and modern Microsoft.
41 #if ((defined(_MSC_VER) && (_MSC_VER >= 1600)) || defined(_LIBCPP_VERSION) ||\
42 	 			 ((__cplusplus >= 201103) || defined(__GXX_EXPERIMENTAL_CXX0X__)))
43 	#include <unordered_set>
44 	#define XBYAK_STD_UNORDERED_SET std::unordered_set
45 	#include <unordered_map>
46 	#define XBYAK_STD_UNORDERED_MAP std::unordered_map
47 	#define XBYAK_STD_UNORDERED_MULTIMAP std::unordered_multimap
48 
49 /*
50 	Clang/llvm-gcc and ICC-EDG in 'GCC-mode' always claim to be GCC 4.2, using
51 	libstdcxx 20070719 (from GCC 4.2.1, the last GPL 2 version).
52 */
53 #elif XBYAK_GNUC_PREREQ(4, 5) || (XBYAK_GNUC_PREREQ(4, 2) && __GLIBCXX__ >= 20070719) || defined(__INTEL_COMPILER) || defined(__llvm__)
54 	#include <tr1/unordered_set>
55 	#define XBYAK_STD_UNORDERED_SET std::tr1::unordered_set
56 	#include <tr1/unordered_map>
57 	#define XBYAK_STD_UNORDERED_MAP std::tr1::unordered_map
58 	#define XBYAK_STD_UNORDERED_MULTIMAP std::tr1::unordered_multimap
59 
60 #elif defined(_MSC_VER) && (_MSC_VER >= 1500) && (_MSC_VER < 1600)
61 	#include <unordered_set>
62 	#define XBYAK_STD_UNORDERED_SET std::tr1::unordered_set
63 	#include <unordered_map>
64 	#define XBYAK_STD_UNORDERED_MAP std::tr1::unordered_map
65 	#define XBYAK_STD_UNORDERED_MULTIMAP std::tr1::unordered_multimap
66 
67 #else
68 	#include <set>
69 	#define XBYAK_STD_UNORDERED_SET std::set
70 	#include <map>
71 	#define XBYAK_STD_UNORDERED_MAP std::map
72 	#define XBYAK_STD_UNORDERED_MULTIMAP std::multimap
73 #endif
74 #ifdef _WIN32
75 	#ifndef WIN32_LEAN_AND_MEAN
76 		#define WIN32_LEAN_AND_MEAN
77 	#endif
78 	#include <windows.h>
79 	#include <malloc.h>
80 	#define XBYAK_TLS __declspec(thread)
81 #elif defined(__GNUC__)
82 	#include <unistd.h>
83 	#include <sys/mman.h>
84 	#include <stdlib.h>
85 	#define XBYAK_TLS __thread
86 #endif
87 #if defined(__APPLE__) && !defined(XBYAK_DONT_USE_MAP_JIT)
88 	#define XBYAK_USE_MAP_JIT
89 	#include <sys/sysctl.h>
90 	#ifndef MAP_JIT
91 		#define MAP_JIT 0x800
92 	#endif
93 #endif
94 #if !defined(_MSC_VER) || (_MSC_VER >= 1600)
95 	#include <stdint.h>
96 #endif
97 
98 #if !defined(MFD_CLOEXEC) // defined only linux 3.17 or later
99 	#undef XBYAK_USE_MEMFD
100 #endif
101 
102 #if defined(_WIN64) || defined(__MINGW64__) || (defined(__CYGWIN__) && defined(__x86_64__))
103 	#define XBYAK64_WIN
104 #elif defined(__x86_64__)
105 	#define XBYAK64_GCC
106 #endif
107 #if !defined(XBYAK64) && !defined(XBYAK32)
108 	#if defined(XBYAK64_GCC) || defined(XBYAK64_WIN)
109 		#define XBYAK64
110 	#else
111 		#define XBYAK32
112 	#endif
113 #endif
114 
115 #if (__cplusplus >= 201103) || (defined(_MSC_VER) && _MSC_VER >= 1800)
116 	#undef XBYAK_TLS
117 	#define XBYAK_TLS thread_local
118 	#define XBYAK_VARIADIC_TEMPLATE
119 	#define XBYAK_NOEXCEPT noexcept
120 #else
121 	#define XBYAK_NOEXCEPT throw()
122 #endif
123 
124 // require c++14 or later
125 // Visual Studio 2017 version 15.0 or later
126 // g++-6 or later
127 #if ((__cplusplus >= 201402L) && !(!defined(__clang__) && defined(__GNUC__) && (__GNUC__ <= 5))) || (defined(_MSC_VER) && _MSC_VER >= 1910)
128 	#define XBYAK_CONSTEXPR constexpr
129 #else
130 	#define XBYAK_CONSTEXPR
131 #endif
132 
133 #ifdef _MSC_VER
134 	#pragma warning(push)
135 	#pragma warning(disable : 4514) /* remove inline function */
136 	#pragma warning(disable : 4786) /* identifier is too long */
137 	#pragma warning(disable : 4503) /* name is too long */
138 	#pragma warning(disable : 4127) /* constant expresison */
139 #endif
140 
141 namespace Xbyak {
142 
143 enum {
144 	DEFAULT_MAX_CODE_SIZE = 4096,
145 	VERSION = 0x6010 /* 0xABCD = A.BC(D) */
146 };
147 
148 #ifndef MIE_INTEGER_TYPE_DEFINED
149 #define MIE_INTEGER_TYPE_DEFINED
150 // for backward compatibility
151 typedef uint64_t uint64;
152 typedef int64_t sint64;
153 typedef uint32_t uint32;
154 typedef uint16_t uint16;
155 typedef uint8_t uint8;
156 #endif
157 
158 #ifndef MIE_ALIGN
159 	#ifdef _MSC_VER
160 		#define MIE_ALIGN(x) __declspec(align(x))
161 	#else
162 		#define MIE_ALIGN(x) __attribute__((aligned(x)))
163 	#endif
164 #endif
165 #ifndef MIE_PACK // for shufps
166 	#define MIE_PACK(x, y, z, w) ((x) * 64 + (y) * 16 + (z) * 4 + (w))
167 #endif
168 
169 enum {
170 	ERR_NONE = 0,
171 	ERR_BAD_ADDRESSING,
172 	ERR_CODE_IS_TOO_BIG,
173 	ERR_BAD_SCALE,
174 	ERR_ESP_CANT_BE_INDEX,
175 	ERR_BAD_COMBINATION,
176 	ERR_BAD_SIZE_OF_REGISTER,
177 	ERR_IMM_IS_TOO_BIG,
178 	ERR_BAD_ALIGN,
179 	ERR_LABEL_IS_REDEFINED,
180 	ERR_LABEL_IS_TOO_FAR,
181 	ERR_LABEL_IS_NOT_FOUND,
182 	ERR_CODE_ISNOT_COPYABLE,
183 	ERR_BAD_PARAMETER,
184 	ERR_CANT_PROTECT,
185 	ERR_CANT_USE_64BIT_DISP,
186 	ERR_OFFSET_IS_TOO_BIG,
187 	ERR_MEM_SIZE_IS_NOT_SPECIFIED,
188 	ERR_BAD_MEM_SIZE,
189 	ERR_BAD_ST_COMBINATION,
190 	ERR_OVER_LOCAL_LABEL, // not used
191 	ERR_UNDER_LOCAL_LABEL,
192 	ERR_CANT_ALLOC,
193 	ERR_ONLY_T_NEAR_IS_SUPPORTED_IN_AUTO_GROW,
194 	ERR_BAD_PROTECT_MODE,
195 	ERR_BAD_PNUM,
196 	ERR_BAD_TNUM,
197 	ERR_BAD_VSIB_ADDRESSING,
198 	ERR_CANT_CONVERT,
199 	ERR_LABEL_ISNOT_SET_BY_L,
200 	ERR_LABEL_IS_ALREADY_SET_BY_L,
201 	ERR_BAD_LABEL_STR,
202 	ERR_MUNMAP,
203 	ERR_OPMASK_IS_ALREADY_SET,
204 	ERR_ROUNDING_IS_ALREADY_SET,
205 	ERR_K0_IS_INVALID,
206 	ERR_EVEX_IS_INVALID,
207 	ERR_SAE_IS_INVALID,
208 	ERR_ER_IS_INVALID,
209 	ERR_INVALID_BROADCAST,
210 	ERR_INVALID_OPMASK_WITH_MEMORY,
211 	ERR_INVALID_ZERO,
212 	ERR_INVALID_RIP_IN_AUTO_GROW,
213 	ERR_INVALID_MIB_ADDRESS,
214 	ERR_X2APIC_IS_NOT_SUPPORTED,
215 	ERR_NOT_SUPPORTED,
216 	ERR_SAME_REGS_ARE_INVALID,
217 	ERR_INTERNAL // Put it at last.
218 };
219 
ConvertErrorToString(int err)220 inline const char *ConvertErrorToString(int err)
221 {
222 	static const char *errTbl[] = {
223 		"none",
224 		"bad addressing",
225 		"code is too big",
226 		"bad scale",
227 		"esp can't be index",
228 		"bad combination",
229 		"bad size of register",
230 		"imm is too big",
231 		"bad align",
232 		"label is redefined",
233 		"label is too far",
234 		"label is not found",
235 		"code is not copyable",
236 		"bad parameter",
237 		"can't protect",
238 		"can't use 64bit disp(use (void*))",
239 		"offset is too big",
240 		"MEM size is not specified",
241 		"bad mem size",
242 		"bad st combination",
243 		"over local label",
244 		"under local label",
245 		"can't alloc",
246 		"T_SHORT is not supported in AutoGrow",
247 		"bad protect mode",
248 		"bad pNum",
249 		"bad tNum",
250 		"bad vsib addressing",
251 		"can't convert",
252 		"label is not set by L()",
253 		"label is already set by L()",
254 		"bad label string",
255 		"err munmap",
256 		"opmask is already set",
257 		"rounding is already set",
258 		"k0 is invalid",
259 		"evex is invalid",
260 		"sae(suppress all exceptions) is invalid",
261 		"er(embedded rounding) is invalid",
262 		"invalid broadcast",
263 		"invalid opmask with memory",
264 		"invalid zero",
265 		"invalid rip in AutoGrow",
266 		"invalid mib address",
267 		"x2APIC is not supported",
268 		"not supported",
269 		"same regs are invalid",
270 		"internal error"
271 	};
272 	assert(ERR_INTERNAL + 1 == sizeof(errTbl) / sizeof(*errTbl));
273 	return err <= ERR_INTERNAL ? errTbl[err] : "unknown err";
274 }
275 
276 #ifdef XBYAK_NO_EXCEPTION
277 namespace local {
278 
GetErrorRef()279 inline int& GetErrorRef() {
280 	static XBYAK_TLS int err = 0;
281 	return err;
282 }
283 
SetError(int err)284 inline void SetError(int err) {
285 	if (local::GetErrorRef()) return; // keep the first err code
286 	local::GetErrorRef() = err;
287 }
288 
289 } // local
290 
ClearError()291 inline void ClearError() {
292 	local::GetErrorRef() = 0;
293 }
GetError()294 inline int GetError() { return local::GetErrorRef(); }
295 
296 #define XBYAK_THROW(err) { local::SetError(err); return; }
297 #define XBYAK_THROW_RET(err, r) { local::SetError(err); return r; }
298 
299 #else
300 class Error : public std::exception {
301 	int err_;
302 public:
Error(int err)303 	explicit Error(int err) : err_(err)
304 	{
305 		if (err_ < 0 || err_ > ERR_INTERNAL) {
306 			err_ = ERR_INTERNAL;
307 		}
308 	}
309 	operator int() const { return err_; }
what()310 	const char *what() const XBYAK_NOEXCEPT
311 	{
312 		return ConvertErrorToString(err_);
313 	}
314 };
315 
316 // dummy functions
ClearError()317 inline void ClearError() { }
GetError()318 inline int GetError() { return 0; }
319 
ConvertErrorToString(const Error & err)320 inline const char *ConvertErrorToString(const Error& err)
321 {
322 	return err.what();
323 }
324 
325 #define XBYAK_THROW(err) { throw Error(err); }
326 #define XBYAK_THROW_RET(err, r) { throw Error(err); }
327 
328 #endif
329 
AlignedMalloc(size_t size,size_t alignment)330 inline void *AlignedMalloc(size_t size, size_t alignment)
331 {
332 #ifdef __MINGW32__
333 	return __mingw_aligned_malloc(size, alignment);
334 #elif defined(_WIN32)
335 	return _aligned_malloc(size, alignment);
336 #else
337 	void *p;
338 	int ret = posix_memalign(&p, alignment, size);
339 	return (ret == 0) ? p : 0;
340 #endif
341 }
342 
AlignedFree(void * p)343 inline void AlignedFree(void *p)
344 {
345 #ifdef __MINGW32__
346 	__mingw_aligned_free(p);
347 #elif defined(_MSC_VER)
348 	_aligned_free(p);
349 #else
350 	free(p);
351 #endif
352 }
353 
354 template<class To, class From>
CastTo(From p)355 inline const To CastTo(From p) XBYAK_NOEXCEPT
356 {
357 	return (const To)(size_t)(p);
358 }
359 namespace inner {
360 
361 static const size_t ALIGN_PAGE_SIZE = 4096;
362 
IsInDisp8(uint32_t x)363 inline bool IsInDisp8(uint32_t x) { return 0xFFFFFF80 <= x || x <= 0x7F; }
IsInInt32(uint64_t x)364 inline bool IsInInt32(uint64_t x) { return ~uint64_t(0x7fffffffu) <= x || x <= 0x7FFFFFFFU; }
365 
VerifyInInt32(uint64_t x)366 inline uint32_t VerifyInInt32(uint64_t x)
367 {
368 #ifdef XBYAK64
369 	if (!IsInInt32(x)) XBYAK_THROW_RET(ERR_OFFSET_IS_TOO_BIG, 0)
370 #endif
371 	return static_cast<uint32_t>(x);
372 }
373 
374 enum LabelMode {
375 	LasIs, // as is
376 	Labs, // absolute
377 	LaddTop // (addr + top) for mov(reg, label) with AutoGrow
378 };
379 
380 } // inner
381 
382 /*
383 	custom allocator
384 */
385 struct Allocator {
allocAllocator386 	virtual uint8_t *alloc(size_t size) { return reinterpret_cast<uint8_t*>(AlignedMalloc(size, inner::ALIGN_PAGE_SIZE)); }
freeAllocator387 	virtual void free(uint8_t *p) { AlignedFree(p); }
~AllocatorAllocator388 	virtual ~Allocator() {}
389 	/* override to return false if you call protect() manually */
useProtectAllocator390 	virtual bool useProtect() const { return true; }
391 };
392 
393 #ifdef XBYAK_USE_MMAP_ALLOCATOR
394 #ifdef XBYAK_USE_MAP_JIT
395 namespace util {
396 
getMacOsVersionPure()397 inline int getMacOsVersionPure()
398 {
399 	char buf[64];
400 	size_t size = sizeof(buf);
401 	int err = sysctlbyname("kern.osrelease", buf, &size, NULL, 0);
402 	if (err != 0) return 0;
403 	char *endp;
404 	int major = strtol(buf, &endp, 10);
405 	if (*endp != '.') return 0;
406 	return major;
407 }
408 
getMacOsVersion()409 inline int getMacOsVersion()
410 {
411 	static const int version = getMacOsVersionPure();
412 	return version;
413 }
414 
415 } // util
416 #endif
417 class MmapAllocator : Allocator {
418 	typedef XBYAK_STD_UNORDERED_MAP<uintptr_t, size_t> SizeList;
419 	SizeList sizeList_;
420 public:
alloc(size_t size)421 	uint8_t *alloc(size_t size)
422 	{
423 		const size_t alignedSizeM1 = inner::ALIGN_PAGE_SIZE - 1;
424 		size = (size + alignedSizeM1) & ~alignedSizeM1;
425 #if defined(MAP_ANONYMOUS)
426 		int mode = MAP_PRIVATE | MAP_ANONYMOUS;
427 #elif defined(MAP_ANON)
428 		int mode = MAP_PRIVATE | MAP_ANON;
429 #else
430 		#error "not supported"
431 #endif
432 #if defined(XBYAK_USE_MAP_JIT)
433 		const int mojaveVersion = 18;
434 		if (util::getMacOsVersion() >= mojaveVersion) mode |= MAP_JIT;
435 #endif
436 		int fd = -1;
437 #if defined(XBYAK_USE_MEMFD)
438 		fd = memfd_create("xbyak", MFD_CLOEXEC);
439 		if (fd != -1) {
440 			mode = MAP_SHARED;
441 			if (ftruncate(fd, size) != 0) XBYAK_THROW_RET(ERR_CANT_ALLOC, 0)
442 		}
443 #endif
444 		void *p = mmap(NULL, size, PROT_READ | PROT_WRITE, mode, fd, 0);
445 #if defined(XBYAK_USE_MEMFD)
446 		if (fd != -1) close(fd);
447 #endif
448 		if (p == MAP_FAILED) XBYAK_THROW_RET(ERR_CANT_ALLOC, 0)
449 		assert(p);
450 		sizeList_[(uintptr_t)p] = size;
451 		return (uint8_t*)p;
452 	}
free(uint8_t * p)453 	void free(uint8_t *p)
454 	{
455 		if (p == 0) return;
456 		SizeList::iterator i = sizeList_.find((uintptr_t)p);
457 		if (i == sizeList_.end()) XBYAK_THROW(ERR_BAD_PARAMETER)
458 		if (munmap((void*)i->first, i->second) < 0) XBYAK_THROW(ERR_MUNMAP)
459 		sizeList_.erase(i);
460 	}
461 };
462 #endif
463 
464 class Address;
465 class Reg;
466 
467 class Operand {
468 	static const uint8_t EXT8BIT = 0x20;
469 	unsigned int idx_:6; // 0..31 + EXT8BIT = 1 if spl/bpl/sil/dil
470 	unsigned int kind_:10;
471 	unsigned int bit_:14;
472 protected:
473 	unsigned int zero_:1;
474 	unsigned int mask_:3;
475 	unsigned int rounding_:3;
setIdx(int idx)476 	void setIdx(int idx) { idx_ = idx; }
477 public:
478 	enum Kind {
479 		NONE = 0,
480 		MEM = 1 << 0,
481 		REG = 1 << 1,
482 		MMX = 1 << 2,
483 		FPU = 1 << 3,
484 		XMM = 1 << 4,
485 		YMM = 1 << 5,
486 		ZMM = 1 << 6,
487 		OPMASK = 1 << 7,
488 		BNDREG = 1 << 8,
489 		TMM = 1 << 9
490 	};
491 	enum Code {
492 #ifdef XBYAK64
493 		RAX = 0, RCX, RDX, RBX, RSP, RBP, RSI, RDI, R8, R9, R10, R11, R12, R13, R14, R15,
494 		R8D = 8, R9D, R10D, R11D, R12D, R13D, R14D, R15D,
495 		R8W = 8, R9W, R10W, R11W, R12W, R13W, R14W, R15W,
496 		R8B = 8, R9B, R10B, R11B, R12B, R13B, R14B, R15B,
497 		SPL = 4, BPL, SIL, DIL,
498 #endif
499 		EAX = 0, ECX, EDX, EBX, ESP, EBP, ESI, EDI,
500 		AX = 0, CX, DX, BX, SP, BP, SI, DI,
501 		AL = 0, CL, DL, BL, AH, CH, DH, BH
502 	};
Operand()503 	XBYAK_CONSTEXPR Operand() : idx_(0), kind_(0), bit_(0), zero_(0), mask_(0), rounding_(0) { }
504 	XBYAK_CONSTEXPR Operand(int idx, Kind kind, int bit, bool ext8bit = 0)
505 		: idx_(static_cast<uint8_t>(idx | (ext8bit ? EXT8BIT : 0)))
506 		, kind_(kind)
507 		, bit_(bit)
508 		, zero_(0), mask_(0), rounding_(0)
509 	{
510 		assert((bit_ & (bit_ - 1)) == 0); // bit must be power of two
511 	}
getKind()512 	XBYAK_CONSTEXPR Kind getKind() const { return static_cast<Kind>(kind_); }
getIdx()513 	XBYAK_CONSTEXPR int getIdx() const { return idx_ & (EXT8BIT - 1); }
isNone()514 	XBYAK_CONSTEXPR bool isNone() const { return kind_ == 0; }
isMMX()515 	XBYAK_CONSTEXPR bool isMMX() const { return is(MMX); }
isXMM()516 	XBYAK_CONSTEXPR bool isXMM() const { return is(XMM); }
isYMM()517 	XBYAK_CONSTEXPR bool isYMM() const { return is(YMM); }
isZMM()518 	XBYAK_CONSTEXPR bool isZMM() const { return is(ZMM); }
isTMM()519 	XBYAK_CONSTEXPR bool isTMM() const { return is(TMM); }
isXMEM()520 	XBYAK_CONSTEXPR bool isXMEM() const { return is(XMM | MEM); }
isYMEM()521 	XBYAK_CONSTEXPR bool isYMEM() const { return is(YMM | MEM); }
isZMEM()522 	XBYAK_CONSTEXPR bool isZMEM() const { return is(ZMM | MEM); }
isOPMASK()523 	XBYAK_CONSTEXPR bool isOPMASK() const { return is(OPMASK); }
isBNDREG()524 	XBYAK_CONSTEXPR bool isBNDREG() const { return is(BNDREG); }
525 	XBYAK_CONSTEXPR bool isREG(int bit = 0) const { return is(REG, bit); }
526 	XBYAK_CONSTEXPR bool isMEM(int bit = 0) const { return is(MEM, bit); }
isFPU()527 	XBYAK_CONSTEXPR bool isFPU() const { return is(FPU); }
isExt8bit()528 	XBYAK_CONSTEXPR bool isExt8bit() const { return (idx_ & EXT8BIT) != 0; }
isExtIdx()529 	XBYAK_CONSTEXPR bool isExtIdx() const { return (getIdx() & 8) != 0; }
isExtIdx2()530 	XBYAK_CONSTEXPR bool isExtIdx2() const { return (getIdx() & 16) != 0; }
hasEvex()531 	XBYAK_CONSTEXPR bool hasEvex() const { return isZMM() || isExtIdx2() || getOpmaskIdx() || getRounding(); }
hasRex()532 	XBYAK_CONSTEXPR bool hasRex() const { return isExt8bit() || isREG(64) || isExtIdx(); }
hasZero()533 	XBYAK_CONSTEXPR bool hasZero() const { return zero_; }
getOpmaskIdx()534 	XBYAK_CONSTEXPR int getOpmaskIdx() const { return mask_; }
getRounding()535 	XBYAK_CONSTEXPR int getRounding() const { return rounding_; }
setKind(Kind kind)536 	void setKind(Kind kind)
537 	{
538 		if ((kind & (XMM|YMM|ZMM|TMM)) == 0) return;
539 		kind_ = kind;
540 		bit_ = kind == XMM ? 128 : kind == YMM ? 256 : kind == ZMM ? 512 : 8192;
541 	}
542 	// err if MMX/FPU/OPMASK/BNDREG
543 	void setBit(int bit);
544 	void setOpmaskIdx(int idx, bool /*ignore_idx0*/ = true)
545 	{
546 		if (mask_) XBYAK_THROW(ERR_OPMASK_IS_ALREADY_SET)
547 		mask_ = idx;
548 	}
setRounding(int idx)549 	void setRounding(int idx)
550 	{
551 		if (rounding_) XBYAK_THROW(ERR_ROUNDING_IS_ALREADY_SET)
552 		rounding_ = idx;
553 	}
setZero()554 	void setZero() { zero_ = true; }
555 	// ah, ch, dh, bh?
isHigh8bit()556 	bool isHigh8bit() const
557 	{
558 		if (!isBit(8)) return false;
559 		if (isExt8bit()) return false;
560 		const int idx = getIdx();
561 		return AH <= idx && idx <= BH;
562 	}
563 	// any bit is accetable if bit == 0
564 	XBYAK_CONSTEXPR bool is(int kind, uint32_t bit = 0) const
565 	{
566 		return (kind == 0 || (kind_ & kind)) && (bit == 0 || (bit_ & bit)); // cf. you can set (8|16)
567 	}
isBit(uint32_t bit)568 	XBYAK_CONSTEXPR bool isBit(uint32_t bit) const { return (bit_ & bit) != 0; }
getBit()569 	XBYAK_CONSTEXPR uint32_t getBit() const { return bit_; }
toString()570 	const char *toString() const
571 	{
572 		const int idx = getIdx();
573 		if (kind_ == REG) {
574 			if (isExt8bit()) {
575 				static const char *tbl[4] = { "spl", "bpl", "sil", "dil" };
576 				return tbl[idx - 4];
577 			}
578 			static const char *tbl[4][16] = {
579 				{ "al", "cl", "dl", "bl", "ah", "ch", "dh", "bh", "r8b", "r9b", "r10b",  "r11b", "r12b", "r13b", "r14b", "r15b" },
580 				{ "ax", "cx", "dx", "bx", "sp", "bp", "si", "di", "r8w", "r9w", "r10w",  "r11w", "r12w", "r13w", "r14w", "r15w" },
581 				{ "eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi", "r8d", "r9d", "r10d",  "r11d", "r12d", "r13d", "r14d", "r15d" },
582 				{ "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10",  "r11", "r12", "r13", "r14", "r15" },
583 			};
584 			return tbl[bit_ == 8 ? 0 : bit_ == 16 ? 1 : bit_ == 32 ? 2 : 3][idx];
585 		} else if (isOPMASK()) {
586 			static const char *tbl[8] = { "k0", "k1", "k2", "k3", "k4", "k5", "k6", "k7" };
587 			return tbl[idx];
588 		} else if (isTMM()) {
589 			static const char *tbl[8] = {
590 				"tmm0", "tmm1", "tmm2", "tmm3", "tmm4", "tmm5", "tmm6", "tmm7"
591 			};
592 			return tbl[idx];
593 		} else if (isZMM()) {
594 			static const char *tbl[32] = {
595 				"zmm0", "zmm1", "zmm2", "zmm3", "zmm4", "zmm5", "zmm6", "zmm7", "zmm8", "zmm9", "zmm10", "zmm11", "zmm12", "zmm13", "zmm14", "zmm15",
596 				"zmm16", "zmm17", "zmm18", "zmm19", "zmm20", "zmm21", "zmm22", "zmm23", "zmm24", "zmm25", "zmm26", "zmm27", "zmm28", "zmm29", "zmm30", "zmm31"
597 			};
598 			return tbl[idx];
599 		} else if (isYMM()) {
600 			static const char *tbl[32] = {
601 				"ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7", "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15",
602 				"ymm16", "ymm17", "ymm18", "ymm19", "ymm20", "ymm21", "ymm22", "ymm23", "ymm24", "ymm25", "ymm26", "ymm27", "ymm28", "ymm29", "ymm30", "ymm31"
603 			};
604 			return tbl[idx];
605 		} else if (isXMM()) {
606 			static const char *tbl[32] = {
607 				"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15",
608 				"xmm16", "xmm17", "xmm18", "xmm19", "xmm20", "xmm21", "xmm22", "xmm23", "xmm24", "xmm25", "xmm26", "xmm27", "xmm28", "xmm29", "xmm30", "xmm31"
609 			};
610 			return tbl[idx];
611 		} else if (isMMX()) {
612 			static const char *tbl[8] = { "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7" };
613 			return tbl[idx];
614 		} else if (isFPU()) {
615 			static const char *tbl[8] = { "st0", "st1", "st2", "st3", "st4", "st5", "st6", "st7" };
616 			return tbl[idx];
617 		} else if (isBNDREG()) {
618 			static const char *tbl[4] = { "bnd0", "bnd1", "bnd2", "bnd3" };
619 			return tbl[idx];
620 		}
621 		XBYAK_THROW_RET(ERR_INTERNAL, 0);
622 	}
isEqualIfNotInherited(const Operand & rhs)623 	bool isEqualIfNotInherited(const Operand& rhs) const { return idx_ == rhs.idx_ && kind_ == rhs.kind_ && bit_ == rhs.bit_ && zero_ == rhs.zero_ && mask_ == rhs.mask_ && rounding_ == rhs.rounding_; }
624 	bool operator==(const Operand& rhs) const;
625 	bool operator!=(const Operand& rhs) const { return !operator==(rhs); }
626 	const Address& getAddress() const;
627 	const Reg& getReg() const;
628 };
629 
setBit(int bit)630 inline void Operand::setBit(int bit)
631 {
632 	if (bit != 8 && bit != 16 && bit != 32 && bit != 64 && bit != 128 && bit != 256 && bit != 512 && bit != 8192) goto ERR;
633 	if (isBit(bit)) return;
634 	if (is(MEM | OPMASK)) {
635 		bit_ = bit;
636 		return;
637 	}
638 	if (is(REG | XMM | YMM | ZMM | TMM)) {
639 		int idx = getIdx();
640 		// err if converting ah, bh, ch, dh
641 		if (isREG(8) && (4 <= idx && idx < 8) && !isExt8bit()) goto ERR;
642 		Kind kind = REG;
643 		switch (bit) {
644 		case 8:
645 			if (idx >= 16) goto ERR;
646 #ifdef XBYAK32
647 			if (idx >= 4) goto ERR;
648 #else
649 			if (4 <= idx && idx < 8) idx |= EXT8BIT;
650 #endif
651 			break;
652 		case 16:
653 		case 32:
654 		case 64:
655 			if (idx >= 16) goto ERR;
656 			break;
657 		case 128: kind = XMM; break;
658 		case 256: kind = YMM; break;
659 		case 512: kind = ZMM; break;
660 		case 8192: kind = TMM; break;
661 		}
662 		idx_ = idx;
663 		kind_ = kind;
664 		bit_ = bit;
665 		if (bit >= 128) return; // keep mask_ and rounding_
666 		mask_ = 0;
667 		rounding_ = 0;
668 		return;
669 	}
670 ERR:
671 	XBYAK_THROW(ERR_CANT_CONVERT)
672 }
673 
674 class Label;
675 
676 struct Reg8;
677 struct Reg16;
678 struct Reg32;
679 #ifdef XBYAK64
680 struct Reg64;
681 #endif
682 class Reg : public Operand {
683 public:
Reg()684 	XBYAK_CONSTEXPR Reg() { }
Operand(idx,kind,bit,ext8bit)685 	XBYAK_CONSTEXPR Reg(int idx, Kind kind, int bit = 0, bool ext8bit = false) : Operand(idx, kind, bit, ext8bit) { }
686 	// convert to Reg8/Reg16/Reg32/Reg64/XMM/YMM/ZMM
changeBit(int bit)687 	Reg changeBit(int bit) const { Reg r(*this); r.setBit(bit); return r; }
getRexW()688 	uint8_t getRexW() const { return isREG(64) ? 8 : 0; }
getRexR()689 	uint8_t getRexR() const { return isExtIdx() ? 4 : 0; }
getRexX()690 	uint8_t getRexX() const { return isExtIdx() ? 2 : 0; }
getRexB()691 	uint8_t getRexB() const { return isExtIdx() ? 1 : 0; }
692 	uint8_t getRex(const Reg& base = Reg()) const
693 	{
694 		uint8_t rex = getRexW() | getRexR() | base.getRexW() | base.getRexB();
695 		if (rex || isExt8bit() || base.isExt8bit()) rex |= 0x40;
696 		return rex;
697 	}
698 	Reg8 cvt8() const;
699 	Reg16 cvt16() const;
700 	Reg32 cvt32() const;
701 #ifdef XBYAK64
702 	Reg64 cvt64() const;
703 #endif
704 };
705 
getReg()706 inline const Reg& Operand::getReg() const
707 {
708 	assert(!isMEM());
709 	return static_cast<const Reg&>(*this);
710 }
711 
712 struct Reg8 : public Reg {
713 	explicit XBYAK_CONSTEXPR Reg8(int idx = 0, bool ext8bit = false) : Reg(idx, Operand::REG, 8, ext8bit) { }
714 };
715 
716 struct Reg16 : public Reg {
717 	explicit XBYAK_CONSTEXPR Reg16(int idx = 0) : Reg(idx, Operand::REG, 16) { }
718 };
719 
720 struct Mmx : public Reg {
RegMmx721 	explicit XBYAK_CONSTEXPR Mmx(int idx = 0, Kind kind = Operand::MMX, int bit = 64) : Reg(idx, kind, bit) { }
722 };
723 
724 struct EvexModifierRounding {
725 	enum {
726 		T_RN_SAE = 1,
727 		T_RD_SAE = 2,
728 		T_RU_SAE = 3,
729 		T_RZ_SAE = 4,
730 		T_SAE = 5
731 	};
EvexModifierRoundingEvexModifierRounding732 	explicit XBYAK_CONSTEXPR EvexModifierRounding(int rounding) : rounding(rounding) {}
733 	int rounding;
734 };
EvexModifierZeroEvexModifierZero735 struct EvexModifierZero{ XBYAK_CONSTEXPR EvexModifierZero() {}};
736 
737 struct Xmm : public Mmx {
MmxXmm738 	explicit XBYAK_CONSTEXPR Xmm(int idx = 0, Kind kind = Operand::XMM, int bit = 128) : Mmx(idx, kind, bit) { }
XmmXmm739 	XBYAK_CONSTEXPR Xmm(Kind kind, int idx) : Mmx(idx, kind, kind == XMM ? 128 : kind == YMM ? 256 : 512) { }
740 	Xmm operator|(const EvexModifierRounding& emr) const { Xmm r(*this); r.setRounding(emr.rounding); return r; }
copyAndSetIdxXmm741 	Xmm copyAndSetIdx(int idx) const { Xmm ret(*this); ret.setIdx(idx); return ret; }
copyAndSetKindXmm742 	Xmm copyAndSetKind(Operand::Kind kind) const { Xmm ret(*this); ret.setKind(kind); return ret; }
743 };
744 
745 struct Ymm : public Xmm {
XmmYmm746 	explicit XBYAK_CONSTEXPR Ymm(int idx = 0, Kind kind = Operand::YMM, int bit = 256) : Xmm(idx, kind, bit) { }
747 	Ymm operator|(const EvexModifierRounding& emr) const { Ymm r(*this); r.setRounding(emr.rounding); return r; }
748 };
749 
750 struct Zmm : public Ymm {
751 	explicit XBYAK_CONSTEXPR Zmm(int idx = 0) : Ymm(idx, Operand::ZMM, 512) { }
752 	Zmm operator|(const EvexModifierRounding& emr) const { Zmm r(*this); r.setRounding(emr.rounding); return r; }
753 };
754 
755 #ifdef XBYAK64
756 struct Tmm : public Reg {
RegTmm757 	explicit XBYAK_CONSTEXPR Tmm(int idx = 0, Kind kind = Operand::TMM, int bit = 8192) : Reg(idx, kind, bit) { }
758 };
759 #endif
760 
761 struct Opmask : public Reg {
762 	explicit XBYAK_CONSTEXPR Opmask(int idx = 0) : Reg(idx, Operand::OPMASK, 64) {}
763 };
764 
765 struct BoundsReg : public Reg {
766 	explicit XBYAK_CONSTEXPR BoundsReg(int idx = 0) : Reg(idx, Operand::BNDREG, 128) {}
767 };
768 
769 template<class T>T operator|(const T& x, const Opmask& k) { T r(x); r.setOpmaskIdx(k.getIdx()); return r; }
770 template<class T>T operator|(const T& x, const EvexModifierZero&) { T r(x); r.setZero(); return r; }
771 template<class T>T operator|(const T& x, const EvexModifierRounding& emr) { T r(x); r.setRounding(emr.rounding); return r; }
772 
773 struct Fpu : public Reg {
774 	explicit XBYAK_CONSTEXPR Fpu(int idx = 0) : Reg(idx, Operand::FPU, 32) { }
775 };
776 
777 struct Reg32e : public Reg {
Reg32eReg32e778 	explicit XBYAK_CONSTEXPR Reg32e(int idx, int bit) : Reg(idx, Operand::REG, bit) {}
779 };
780 struct Reg32 : public Reg32e {
781 	explicit XBYAK_CONSTEXPR Reg32(int idx = 0) : Reg32e(idx, 32) {}
782 };
783 #ifdef XBYAK64
784 struct Reg64 : public Reg32e {
785 	explicit XBYAK_CONSTEXPR Reg64(int idx = 0) : Reg32e(idx, 64) {}
786 };
787 struct RegRip {
788 	int64_t disp_;
789 	const Label* label_;
790 	bool isAddr_;
disp_RegRip791 	explicit XBYAK_CONSTEXPR RegRip(int64_t disp = 0, const Label* label = 0, bool isAddr = false) : disp_(disp), label_(label), isAddr_(isAddr) {}
792 	friend const RegRip operator+(const RegRip& r, int disp) {
793 		return RegRip(r.disp_ + disp, r.label_, r.isAddr_);
794 	}
795 	friend const RegRip operator-(const RegRip& r, int disp) {
796 		return RegRip(r.disp_ - disp, r.label_, r.isAddr_);
797 	}
798 	friend const RegRip operator+(const RegRip& r, int64_t disp) {
799 		return RegRip(r.disp_ + disp, r.label_, r.isAddr_);
800 	}
801 	friend const RegRip operator-(const RegRip& r, int64_t disp) {
802 		return RegRip(r.disp_ - disp, r.label_, r.isAddr_);
803 	}
804 	friend const RegRip operator+(const RegRip& r, const Label& label) {
805 		if (r.label_ || r.isAddr_) XBYAK_THROW_RET(ERR_BAD_ADDRESSING, RegRip());
806 		return RegRip(r.disp_, &label);
807 	}
808 	friend const RegRip operator+(const RegRip& r, const void *addr) {
809 		if (r.label_ || r.isAddr_) XBYAK_THROW_RET(ERR_BAD_ADDRESSING, RegRip());
810 		return RegRip(r.disp_ + (int64_t)addr, 0, true);
811 	}
812 };
813 #endif
814 
cvt8()815 inline Reg8 Reg::cvt8() const
816 {
817 	Reg r = changeBit(8); return Reg8(r.getIdx(), r.isExt8bit());
818 }
819 
cvt16()820 inline Reg16 Reg::cvt16() const
821 {
822 	return Reg16(changeBit(16).getIdx());
823 }
824 
cvt32()825 inline Reg32 Reg::cvt32() const
826 {
827 	return Reg32(changeBit(32).getIdx());
828 }
829 
830 #ifdef XBYAK64
cvt64()831 inline Reg64 Reg::cvt64() const
832 {
833 	return Reg64(changeBit(64).getIdx());
834 }
835 #endif
836 
837 #ifndef XBYAK_DISABLE_SEGMENT
838 // not derived from Reg
839 class Segment {
840 	int idx_;
841 public:
842 	enum {
843 		es, cs, ss, ds, fs, gs
844 	};
Segment(int idx)845 	explicit XBYAK_CONSTEXPR Segment(int idx) : idx_(idx) { assert(0 <= idx_ && idx_ < 6); }
getIdx()846 	int getIdx() const { return idx_; }
toString()847 	const char *toString() const
848 	{
849 		static const char tbl[][3] = {
850 			"es", "cs", "ss", "ds", "fs", "gs"
851 		};
852 		return tbl[idx_];
853 	}
854 };
855 #endif
856 
857 class RegExp {
858 public:
859 #ifdef XBYAK64
860 	enum { i32e = 32 | 64 };
861 #else
862 	enum { i32e = 32 };
863 #endif
864 	XBYAK_CONSTEXPR RegExp(size_t disp = 0) : scale_(0), disp_(disp) { }
865 	XBYAK_CONSTEXPR RegExp(const Reg& r, int scale = 1)
scale_(scale)866 		: scale_(scale)
867 		, disp_(0)
868 	{
869 		if (!r.isREG(i32e) && !r.is(Reg::XMM|Reg::YMM|Reg::ZMM|Reg::TMM)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER)
870 		if (scale == 0) return;
871 		if (scale != 1 && scale != 2 && scale != 4 && scale != 8) XBYAK_THROW(ERR_BAD_SCALE)
872 		if (r.getBit() >= 128 || scale != 1) { // xmm/ymm is always index
873 			index_ = r;
874 		} else {
875 			base_ = r;
876 		}
877 	}
878 	bool isVsib(int bit = 128 | 256 | 512) const { return index_.isBit(bit); }
optimize()879 	RegExp optimize() const
880 	{
881 		RegExp exp = *this;
882 		// [reg * 2] => [reg + reg]
883 		if (index_.isBit(i32e) && !base_.getBit() && scale_ == 2) {
884 			exp.base_ = index_;
885 			exp.scale_ = 1;
886 		}
887 		return exp;
888 	}
889 	bool operator==(const RegExp& rhs) const
890 	{
891 		return base_ == rhs.base_ && index_ == rhs.index_ && disp_ == rhs.disp_ && scale_ == rhs.scale_;
892 	}
getBase()893 	const Reg& getBase() const { return base_; }
getIndex()894 	const Reg& getIndex() const { return index_; }
getScale()895 	int getScale() const { return scale_; }
getDisp()896 	size_t getDisp() const { return disp_; }
verify()897 	XBYAK_CONSTEXPR void verify() const
898 	{
899 		if (base_.getBit() >= 128) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER)
900 		if (index_.getBit() && index_.getBit() <= 64) {
901 			if (index_.getIdx() == Operand::ESP) XBYAK_THROW(ERR_ESP_CANT_BE_INDEX)
902 			if (base_.getBit() && base_.getBit() != index_.getBit()) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER)
903 		}
904 	}
905 	friend RegExp operator+(const RegExp& a, const RegExp& b);
906 	friend RegExp operator-(const RegExp& e, size_t disp);
getRex()907 	uint8_t getRex() const
908 	{
909 		uint8_t rex = index_.getRexX() | base_.getRexB();
910 		return rex ? uint8_t(rex | 0x40) : 0;
911 	}
912 private:
913 	/*
914 		[base_ + index_ * scale_ + disp_]
915 		base : Reg32e, index : Reg32e(w/o esp), Xmm, Ymm
916 	*/
917 	Reg base_;
918 	Reg index_;
919 	int scale_;
920 	size_t disp_;
921 };
922 
923 inline RegExp operator+(const RegExp& a, const RegExp& b)
924 {
925 	if (a.index_.getBit() && b.index_.getBit()) XBYAK_THROW_RET(ERR_BAD_ADDRESSING, RegExp())
926 	RegExp ret = a;
927 	if (!ret.index_.getBit()) { ret.index_ = b.index_; ret.scale_ = b.scale_; }
928 	if (b.base_.getBit()) {
929 		if (ret.base_.getBit()) {
930 			if (ret.index_.getBit()) XBYAK_THROW_RET(ERR_BAD_ADDRESSING, RegExp())
931 			// base + base => base + index * 1
932 			ret.index_ = b.base_;
933 			// [reg + esp] => [esp + reg]
934 			if (ret.index_.getIdx() == Operand::ESP) std::swap(ret.base_, ret.index_);
935 			ret.scale_ = 1;
936 		} else {
937 			ret.base_ = b.base_;
938 		}
939 	}
940 	ret.disp_ += b.disp_;
941 	return ret;
942 }
943 inline RegExp operator*(const Reg& r, int scale)
944 {
945 	return RegExp(r, scale);
946 }
947 inline RegExp operator*(int scale, const Reg& r)
948 {
949 	return r * scale;
950 }
951 inline RegExp operator-(const RegExp& e, size_t disp)
952 {
953 	RegExp ret = e;
954 	ret.disp_ -= disp;
955 	return ret;
956 }
957 
958 // 2nd parameter for constructor of CodeArray(maxSize, userPtr, alloc)
959 void *const AutoGrow = (void*)1; //-V566
960 void *const DontSetProtectRWE = (void*)2; //-V566
961 
962 class CodeArray {
963 	enum Type {
964 		USER_BUF = 1, // use userPtr(non alignment, non protect)
965 		ALLOC_BUF, // use new(alignment, protect)
966 		AUTO_GROW // automatically move and grow memory if necessary
967 	};
968 	CodeArray(const CodeArray& rhs);
969 	void operator=(const CodeArray&);
isAllocType()970 	bool isAllocType() const { return type_ == ALLOC_BUF || type_ == AUTO_GROW; }
971 	struct AddrInfo {
972 		size_t codeOffset; // position to write
973 		size_t jmpAddr; // value to write
974 		int jmpSize; // size of jmpAddr
975 		inner::LabelMode mode;
AddrInfoAddrInfo976 		AddrInfo(size_t _codeOffset, size_t _jmpAddr, int _jmpSize, inner::LabelMode _mode)
977 			: codeOffset(_codeOffset), jmpAddr(_jmpAddr), jmpSize(_jmpSize), mode(_mode) {}
getValAddrInfo978 		uint64_t getVal(const uint8_t *top) const
979 		{
980 			uint64_t disp = (mode == inner::LaddTop) ? jmpAddr + size_t(top) : (mode == inner::LasIs) ? jmpAddr : jmpAddr - size_t(top);
981 			if (jmpSize == 4) disp = inner::VerifyInInt32(disp);
982 			return disp;
983 		}
984 	};
985 	typedef std::list<AddrInfo> AddrInfoList;
986 	AddrInfoList addrInfoList_;
987 	const Type type_;
988 #ifdef XBYAK_USE_MMAP_ALLOCATOR
989 	MmapAllocator defaultAllocator_;
990 #else
991 	Allocator defaultAllocator_;
992 #endif
993 	Allocator *alloc_;
994 protected:
995 	size_t maxSize_;
996 	uint8_t *top_;
997 	size_t size_;
998 	bool isCalledCalcJmpAddress_;
999 
useProtect()1000 	bool useProtect() const { return alloc_->useProtect(); }
1001 	/*
1002 		allocate new memory and copy old data to the new area
1003 	*/
growMemory()1004 	void growMemory()
1005 	{
1006 		const size_t newSize = (std::max<size_t>)(DEFAULT_MAX_CODE_SIZE, maxSize_ * 2);
1007 		uint8_t *newTop = alloc_->alloc(newSize);
1008 		if (newTop == 0) XBYAK_THROW(ERR_CANT_ALLOC)
1009 		for (size_t i = 0; i < size_; i++) newTop[i] = top_[i];
1010 		alloc_->free(top_);
1011 		top_ = newTop;
1012 		maxSize_ = newSize;
1013 	}
1014 	/*
1015 		calc jmp address for AutoGrow mode
1016 	*/
calcJmpAddress()1017 	void calcJmpAddress()
1018 	{
1019 		if (isCalledCalcJmpAddress_) return;
1020 		for (AddrInfoList::const_iterator i = addrInfoList_.begin(), ie = addrInfoList_.end(); i != ie; ++i) {
1021 			uint64_t disp = i->getVal(top_);
1022 			rewrite(i->codeOffset, disp, i->jmpSize);
1023 		}
1024 		isCalledCalcJmpAddress_ = true;
1025 	}
1026 public:
1027 	enum ProtectMode {
1028 		PROTECT_RW = 0, // read/write
1029 		PROTECT_RWE = 1, // read/write/exec
1030 		PROTECT_RE = 2 // read/exec
1031 	};
1032 	explicit CodeArray(size_t maxSize, void *userPtr = 0, Allocator *allocator = 0)
1033 		: type_(userPtr == AutoGrow ? AUTO_GROW : (userPtr == 0 || userPtr == DontSetProtectRWE) ? ALLOC_BUF : USER_BUF)
1034 		, alloc_(allocator ? allocator : (Allocator*)&defaultAllocator_)
1035 		, maxSize_(maxSize)
1036 		, top_(type_ == USER_BUF ? reinterpret_cast<uint8_t*>(userPtr) : alloc_->alloc((std::max<size_t>)(maxSize, 1)))
1037 		, size_(0)
1038 		, isCalledCalcJmpAddress_(false)
1039 	{
1040 		if (maxSize_ > 0 && top_ == 0) XBYAK_THROW(ERR_CANT_ALLOC)
1041 		if ((type_ == ALLOC_BUF && userPtr != DontSetProtectRWE && useProtect()) && !setProtectMode(PROTECT_RWE, false)) {
1042 			alloc_->free(top_);
1043 			XBYAK_THROW(ERR_CANT_PROTECT)
1044 		}
1045 	}
~CodeArray()1046 	virtual ~CodeArray()
1047 	{
1048 		if (isAllocType()) {
1049 			if (useProtect()) setProtectModeRW(false);
1050 			alloc_->free(top_);
1051 		}
1052 	}
1053 	bool setProtectMode(ProtectMode mode, bool throwException = true)
1054 	{
1055 		bool isOK = protect(top_, maxSize_, mode);
1056 		if (isOK) return true;
1057 		if (throwException) XBYAK_THROW_RET(ERR_CANT_PROTECT, false)
1058 		return false;
1059 	}
1060 	bool setProtectModeRE(bool throwException = true) { return setProtectMode(PROTECT_RE, throwException); }
1061 	bool setProtectModeRW(bool throwException = true) { return setProtectMode(PROTECT_RW, throwException); }
resetSize()1062 	void resetSize()
1063 	{
1064 		size_ = 0;
1065 		addrInfoList_.clear();
1066 		isCalledCalcJmpAddress_ = false;
1067 	}
db(int code)1068 	void db(int code)
1069 	{
1070 		if (size_ >= maxSize_) {
1071 			if (type_ == AUTO_GROW) {
1072 				growMemory();
1073 			} else {
1074 				XBYAK_THROW(ERR_CODE_IS_TOO_BIG)
1075 			}
1076 		}
1077 		top_[size_++] = static_cast<uint8_t>(code);
1078 	}
db(const uint8_t * code,size_t codeSize)1079 	void db(const uint8_t *code, size_t codeSize)
1080 	{
1081 		for (size_t i = 0; i < codeSize; i++) db(code[i]);
1082 	}
db(uint64_t code,size_t codeSize)1083 	void db(uint64_t code, size_t codeSize)
1084 	{
1085 		if (codeSize > 8) XBYAK_THROW(ERR_BAD_PARAMETER)
1086 		for (size_t i = 0; i < codeSize; i++) db(static_cast<uint8_t>(code >> (i * 8)));
1087 	}
dw(uint32_t code)1088 	void dw(uint32_t code) { db(code, 2); }
dd(uint32_t code)1089 	void dd(uint32_t code) { db(code, 4); }
dq(uint64_t code)1090 	void dq(uint64_t code) { db(code, 8); }
getCode()1091 	const uint8_t *getCode() const { return top_; }
1092 	template<class F>
getCode()1093 	const F getCode() const { return reinterpret_cast<F>(top_); }
getCurr()1094 	const uint8_t *getCurr() const { return &top_[size_]; }
1095 	template<class F>
getCurr()1096 	const F getCurr() const { return reinterpret_cast<F>(&top_[size_]); }
getSize()1097 	size_t getSize() const { return size_; }
setSize(size_t size)1098 	void setSize(size_t size)
1099 	{
1100 		if (size > maxSize_) XBYAK_THROW(ERR_OFFSET_IS_TOO_BIG)
1101 		size_ = size;
1102 	}
dump()1103 	void dump() const
1104 	{
1105 		const uint8_t *p = getCode();
1106 		size_t bufSize = getSize();
1107 		size_t remain = bufSize;
1108 		for (int i = 0; i < 4; i++) {
1109 			size_t disp = 16;
1110 			if (remain < 16) {
1111 				disp = remain;
1112 			}
1113 			for (size_t j = 0; j < 16; j++) {
1114 				if (j < disp) {
1115 					printf("%02X", p[i * 16 + j]);
1116 				}
1117 			}
1118 			putchar('\n');
1119 			remain -= disp;
1120 			if (remain == 0) {
1121 				break;
1122 			}
1123 		}
1124 	}
1125 	/*
1126 		@param offset [in] offset from top
1127 		@param disp [in] offset from the next of jmp
1128 		@param size [in] write size(1, 2, 4, 8)
1129 	*/
rewrite(size_t offset,uint64_t disp,size_t size)1130 	void rewrite(size_t offset, uint64_t disp, size_t size)
1131 	{
1132 		assert(offset < maxSize_);
1133 		if (size != 1 && size != 2 && size != 4 && size != 8) XBYAK_THROW(ERR_BAD_PARAMETER)
1134 		uint8_t *const data = top_ + offset;
1135 		for (size_t i = 0; i < size; i++) {
1136 			data[i] = static_cast<uint8_t>(disp >> (i * 8));
1137 		}
1138 	}
save(size_t offset,size_t val,int size,inner::LabelMode mode)1139 	void save(size_t offset, size_t val, int size, inner::LabelMode mode)
1140 	{
1141 		addrInfoList_.push_back(AddrInfo(offset, val, size, mode));
1142 	}
isAutoGrow()1143 	bool isAutoGrow() const { return type_ == AUTO_GROW; }
isCalledCalcJmpAddress()1144 	bool isCalledCalcJmpAddress() const { return isCalledCalcJmpAddress_; }
1145 	/**
1146 		change exec permission of memory
1147 		@param addr [in] buffer address
1148 		@param size [in] buffer size
1149 		@param protectMode [in] mode(RW/RWE/RE)
1150 		@return true(success), false(failure)
1151 	*/
protect(const void * addr,size_t size,int protectMode)1152 	static inline bool protect(const void *addr, size_t size, int protectMode)
1153 	{
1154 #if defined(_WIN32)
1155 		const DWORD c_rw = PAGE_READWRITE;
1156 		const DWORD c_rwe = PAGE_EXECUTE_READWRITE;
1157 		const DWORD c_re = PAGE_EXECUTE_READ;
1158 		DWORD mode;
1159 #else
1160 		const int c_rw = PROT_READ | PROT_WRITE;
1161 		const int c_rwe = PROT_READ | PROT_WRITE | PROT_EXEC;
1162 		const int c_re = PROT_READ | PROT_EXEC;
1163 		int mode;
1164 #endif
1165 		switch (protectMode) {
1166 		case PROTECT_RW: mode = c_rw; break;
1167 		case PROTECT_RWE: mode = c_rwe; break;
1168 		case PROTECT_RE: mode = c_re; break;
1169 		default:
1170 			return false;
1171 		}
1172 #if defined(_WIN32)
1173 		DWORD oldProtect;
1174 		return VirtualProtect(const_cast<void*>(addr), size, mode, &oldProtect) != 0;
1175 #elif defined(__GNUC__)
1176 		size_t pageSize = sysconf(_SC_PAGESIZE);
1177 		size_t iaddr = reinterpret_cast<size_t>(addr);
1178 		size_t roundAddr = iaddr & ~(pageSize - static_cast<size_t>(1));
1179 #ifndef NDEBUG
1180 		if (pageSize != 4096) fprintf(stderr, "large page(%zd) is used. not tested enough.\n", pageSize);
1181 #endif
1182 		return mprotect(reinterpret_cast<void*>(roundAddr), size + (iaddr - roundAddr), mode) == 0;
1183 #else
1184 		return true;
1185 #endif
1186 	}
1187 	/**
1188 		get aligned memory pointer
1189 		@param addr [in] address
1190 		@param alignedSize [in] power of two
1191 		@return aligned addr by alingedSize
1192 	*/
1193 	static inline uint8_t *getAlignedAddress(uint8_t *addr, size_t alignedSize = 16)
1194 	{
1195 		return reinterpret_cast<uint8_t*>((reinterpret_cast<size_t>(addr) + alignedSize - 1) & ~(alignedSize - static_cast<size_t>(1)));
1196 	}
1197 };
1198 
1199 class Address : public Operand {
1200 public:
1201 	enum Mode {
1202 		M_ModRM,
1203 		M_64bitDisp,
1204 		M_rip,
1205 		M_ripAddr
1206 	};
Address(uint32_t sizeBit,bool broadcast,const RegExp & e)1207 	XBYAK_CONSTEXPR Address(uint32_t sizeBit, bool broadcast, const RegExp& e)
1208 		: Operand(0, MEM, sizeBit), e_(e), label_(0), mode_(M_ModRM), broadcast_(broadcast)
1209 	{
1210 		e_.verify();
1211 	}
1212 #ifdef XBYAK64
Address(size_t disp)1213 	explicit XBYAK_CONSTEXPR Address(size_t disp)
1214 		: Operand(0, MEM, 64), e_(disp), label_(0), mode_(M_64bitDisp), broadcast_(false){ }
Address(uint32_t sizeBit,bool broadcast,const RegRip & addr)1215 	XBYAK_CONSTEXPR Address(uint32_t sizeBit, bool broadcast, const RegRip& addr)
1216 		: Operand(0, MEM, sizeBit), e_(addr.disp_), label_(addr.label_), mode_(addr.isAddr_ ? M_ripAddr : M_rip), broadcast_(broadcast) { }
1217 #endif
1218 	RegExp getRegExp(bool optimize = true) const
1219 	{
1220 		return optimize ? e_.optimize() : e_;
1221 	}
getMode()1222 	Mode getMode() const { return mode_; }
is32bit()1223 	bool is32bit() const { return e_.getBase().getBit() == 32 || e_.getIndex().getBit() == 32; }
isOnlyDisp()1224 	bool isOnlyDisp() const { return !e_.getBase().getBit() && !e_.getIndex().getBit(); } // for mov eax
getDisp()1225 	size_t getDisp() const { return e_.getDisp(); }
getRex()1226 	uint8_t getRex() const
1227 	{
1228 		if (mode_ != M_ModRM) return 0;
1229 		return getRegExp().getRex();
1230 	}
is64bitDisp()1231 	bool is64bitDisp() const { return mode_ == M_64bitDisp; } // for moffset
isBroadcast()1232 	bool isBroadcast() const { return broadcast_; }
getLabel()1233 	const Label* getLabel() const { return label_; }
1234 	bool operator==(const Address& rhs) const
1235 	{
1236 		return getBit() == rhs.getBit() && e_ == rhs.e_ && label_ == rhs.label_ && mode_ == rhs.mode_ && broadcast_ == rhs.broadcast_;
1237 	}
1238 	bool operator!=(const Address& rhs) const { return !operator==(rhs); }
isVsib()1239 	bool isVsib() const { return e_.isVsib(); }
1240 private:
1241 	RegExp e_;
1242 	const Label* label_;
1243 	Mode mode_;
1244 	bool broadcast_;
1245 };
1246 
getAddress()1247 inline const Address& Operand::getAddress() const
1248 {
1249 	assert(isMEM());
1250 	return static_cast<const Address&>(*this);
1251 }
1252 
1253 inline bool Operand::operator==(const Operand& rhs) const
1254 {
1255 	if (isMEM() && rhs.isMEM()) return this->getAddress() == rhs.getAddress();
1256 	return isEqualIfNotInherited(rhs);
1257 }
1258 
1259 class AddressFrame {
1260 	void operator=(const AddressFrame&);
1261 	AddressFrame(const AddressFrame&);
1262 public:
1263 	const uint32_t bit_;
1264 	const bool broadcast_;
bit_(bit)1265 	explicit XBYAK_CONSTEXPR AddressFrame(uint32_t bit, bool broadcast = false) : bit_(bit), broadcast_(broadcast) { }
1266 	Address operator[](const RegExp& e) const
1267 	{
1268 		return Address(bit_, broadcast_, e);
1269 	}
1270 	Address operator[](const void *disp) const
1271 	{
1272 		return Address(bit_, broadcast_, RegExp(reinterpret_cast<size_t>(disp)));
1273 	}
1274 #ifdef XBYAK64
1275 	Address operator[](uint64_t disp) const { return Address(disp); }
1276 	Address operator[](const RegRip& addr) const { return Address(bit_, broadcast_, addr); }
1277 #endif
1278 };
1279 
1280 struct JmpLabel {
1281 	size_t endOfJmp; /* offset from top to the end address of jmp */
1282 	int jmpSize;
1283 	inner::LabelMode mode;
1284 	size_t disp; // disp for [rip + disp]
1285 	explicit JmpLabel(size_t endOfJmp = 0, int jmpSize = 0, inner::LabelMode mode = inner::LasIs, size_t disp = 0)
endOfJmpJmpLabel1286 		: endOfJmp(endOfJmp), jmpSize(jmpSize), mode(mode), disp(disp)
1287 	{
1288 	}
1289 };
1290 
1291 class LabelManager;
1292 
1293 class Label {
1294 	mutable LabelManager *mgr;
1295 	mutable int id;
1296 	friend class LabelManager;
1297 public:
Label()1298 	Label() : mgr(0), id(0) {}
1299 	Label(const Label& rhs);
1300 	Label& operator=(const Label& rhs);
1301 	~Label();
clear()1302 	void clear() { mgr = 0; id = 0; }
getId()1303 	int getId() const { return id; }
1304 	const uint8_t *getAddress() const;
1305 
1306 	// backward compatibility
toStr(int num)1307 	static inline std::string toStr(int num)
1308 	{
1309 		char buf[16];
1310 #if defined(_MSC_VER) && (_MSC_VER < 1900)
1311 		_snprintf_s
1312 #else
1313 		snprintf
1314 #endif
1315 		(buf, sizeof(buf), ".%08x", num);
1316 		return buf;
1317 	}
1318 };
1319 
1320 class LabelManager {
1321 	// for string label
1322 	struct SlabelVal {
1323 		size_t offset;
SlabelValSlabelVal1324 		SlabelVal(size_t offset) : offset(offset) {}
1325 	};
1326 	typedef XBYAK_STD_UNORDERED_MAP<std::string, SlabelVal> SlabelDefList;
1327 	typedef XBYAK_STD_UNORDERED_MULTIMAP<std::string, const JmpLabel> SlabelUndefList;
1328 	struct SlabelState {
1329 		SlabelDefList defList;
1330 		SlabelUndefList undefList;
1331 	};
1332 	typedef std::list<SlabelState> StateList;
1333 	// for Label class
1334 	struct ClabelVal {
offsetClabelVal1335 		ClabelVal(size_t offset = 0) : offset(offset), refCount(1) {}
1336 		size_t offset;
1337 		int refCount;
1338 	};
1339 	typedef XBYAK_STD_UNORDERED_MAP<int, ClabelVal> ClabelDefList;
1340 	typedef XBYAK_STD_UNORDERED_MULTIMAP<int, const JmpLabel> ClabelUndefList;
1341 	typedef XBYAK_STD_UNORDERED_SET<Label*> LabelPtrList;
1342 
1343 	CodeArray *base_;
1344 	// global : stateList_.front(), local : stateList_.back()
1345 	StateList stateList_;
1346 	mutable int labelId_;
1347 	ClabelDefList clabelDefList_;
1348 	ClabelUndefList clabelUndefList_;
1349 	LabelPtrList labelPtrList_;
1350 
getId(const Label & label)1351 	int getId(const Label& label) const
1352 	{
1353 		if (label.id == 0) label.id = labelId_++;
1354 		return label.id;
1355 	}
1356 	template<class DefList, class UndefList, class T>
define_inner(DefList & defList,UndefList & undefList,const T & labelId,size_t addrOffset)1357 	void define_inner(DefList& defList, UndefList& undefList, const T& labelId, size_t addrOffset)
1358 	{
1359 		// add label
1360 		typename DefList::value_type item(labelId, addrOffset);
1361 		std::pair<typename DefList::iterator, bool> ret = defList.insert(item);
1362 		if (!ret.second) XBYAK_THROW(ERR_LABEL_IS_REDEFINED)
1363 		// search undefined label
1364 		for (;;) {
1365 			typename UndefList::iterator itr = undefList.find(labelId);
1366 			if (itr == undefList.end()) break;
1367 			const JmpLabel *jmp = &itr->second;
1368 			const size_t offset = jmp->endOfJmp - jmp->jmpSize;
1369 			size_t disp;
1370 			if (jmp->mode == inner::LaddTop) {
1371 				disp = addrOffset;
1372 			} else if (jmp->mode == inner::Labs) {
1373 				disp = size_t(base_->getCurr());
1374 			} else {
1375 				disp = addrOffset - jmp->endOfJmp + jmp->disp;
1376 #ifdef XBYAK64
1377 				if (jmp->jmpSize <= 4 && !inner::IsInInt32(disp)) XBYAK_THROW(ERR_OFFSET_IS_TOO_BIG)
1378 #endif
1379 				if (jmp->jmpSize == 1 && !inner::IsInDisp8((uint32_t)disp)) XBYAK_THROW(ERR_LABEL_IS_TOO_FAR)
1380 			}
1381 			if (base_->isAutoGrow()) {
1382 				base_->save(offset, disp, jmp->jmpSize, jmp->mode);
1383 			} else {
1384 				base_->rewrite(offset, disp, jmp->jmpSize);
1385 			}
1386 			undefList.erase(itr);
1387 		}
1388 	}
1389 	template<class DefList, class T>
getOffset_inner(const DefList & defList,size_t * offset,const T & label)1390 	bool getOffset_inner(const DefList& defList, size_t *offset, const T& label) const
1391 	{
1392 		typename DefList::const_iterator i = defList.find(label);
1393 		if (i == defList.end()) return false;
1394 		*offset = i->second.offset;
1395 		return true;
1396 	}
1397 	friend class Label;
incRefCount(int id,Label * label)1398 	void incRefCount(int id, Label *label)
1399 	{
1400 		clabelDefList_[id].refCount++;
1401 		labelPtrList_.insert(label);
1402 	}
decRefCount(int id,Label * label)1403 	void decRefCount(int id, Label *label)
1404 	{
1405 		labelPtrList_.erase(label);
1406 		ClabelDefList::iterator i = clabelDefList_.find(id);
1407 		if (i == clabelDefList_.end()) return;
1408 		if (i->second.refCount == 1) {
1409 			clabelDefList_.erase(id);
1410 		} else {
1411 			--i->second.refCount;
1412 		}
1413 	}
1414 	template<class T>
hasUndefinedLabel_inner(const T & list)1415 	bool hasUndefinedLabel_inner(const T& list) const
1416 	{
1417 #ifndef NDEBUG
1418 		for (typename T::const_iterator i = list.begin(); i != list.end(); ++i) {
1419 			std::cerr << "undefined label:" << i->first << std::endl;
1420 		}
1421 #endif
1422 		return !list.empty();
1423 	}
1424 	// detach all labels linked to LabelManager
resetLabelPtrList()1425 	void resetLabelPtrList()
1426 	{
1427 		for (LabelPtrList::iterator i = labelPtrList_.begin(), ie = labelPtrList_.end(); i != ie; ++i) {
1428 			(*i)->clear();
1429 		}
1430 		labelPtrList_.clear();
1431 	}
1432 public:
LabelManager()1433 	LabelManager()
1434 	{
1435 		reset();
1436 	}
~LabelManager()1437 	~LabelManager()
1438 	{
1439 		resetLabelPtrList();
1440 	}
reset()1441 	void reset()
1442 	{
1443 		base_ = 0;
1444 		labelId_ = 1;
1445 		stateList_.clear();
1446 		stateList_.push_back(SlabelState());
1447 		stateList_.push_back(SlabelState());
1448 		clabelDefList_.clear();
1449 		clabelUndefList_.clear();
1450 		resetLabelPtrList();
1451 		ClearError();
1452 	}
enterLocal()1453 	void enterLocal()
1454 	{
1455 		stateList_.push_back(SlabelState());
1456 	}
leaveLocal()1457 	void leaveLocal()
1458 	{
1459 		if (stateList_.size() <= 2) XBYAK_THROW(ERR_UNDER_LOCAL_LABEL)
1460 		if (hasUndefinedLabel_inner(stateList_.back().undefList)) XBYAK_THROW(ERR_LABEL_IS_NOT_FOUND)
1461 		stateList_.pop_back();
1462 	}
set(CodeArray * base)1463 	void set(CodeArray *base) { base_ = base; }
defineSlabel(std::string label)1464 	void defineSlabel(std::string label)
1465 	{
1466 		if (label == "@b" || label == "@f") XBYAK_THROW(ERR_BAD_LABEL_STR)
1467 		if (label == "@@") {
1468 			SlabelDefList& defList = stateList_.front().defList;
1469 			SlabelDefList::iterator i = defList.find("@f");
1470 			if (i != defList.end()) {
1471 				defList.erase(i);
1472 				label = "@b";
1473 			} else {
1474 				i = defList.find("@b");
1475 				if (i != defList.end()) {
1476 					defList.erase(i);
1477 				}
1478 				label = "@f";
1479 			}
1480 		}
1481 		SlabelState& st = *label.c_str() == '.' ? stateList_.back() : stateList_.front();
1482 		define_inner(st.defList, st.undefList, label, base_->getSize());
1483 	}
defineClabel(Label & label)1484 	void defineClabel(Label& label)
1485 	{
1486 		define_inner(clabelDefList_, clabelUndefList_, getId(label), base_->getSize());
1487 		label.mgr = this;
1488 		labelPtrList_.insert(&label);
1489 	}
assign(Label & dst,const Label & src)1490 	void assign(Label& dst, const Label& src)
1491 	{
1492 		ClabelDefList::const_iterator i = clabelDefList_.find(src.id);
1493 		if (i == clabelDefList_.end()) XBYAK_THROW(ERR_LABEL_ISNOT_SET_BY_L)
1494 		define_inner(clabelDefList_, clabelUndefList_, dst.id, i->second.offset);
1495 		dst.mgr = this;
1496 		labelPtrList_.insert(&dst);
1497 	}
getOffset(size_t * offset,std::string & label)1498 	bool getOffset(size_t *offset, std::string& label) const
1499 	{
1500 		const SlabelDefList& defList = stateList_.front().defList;
1501 		if (label == "@b") {
1502 			if (defList.find("@f") != defList.end()) {
1503 				label = "@f";
1504 			} else if (defList.find("@b") == defList.end()) {
1505 				XBYAK_THROW_RET(ERR_LABEL_IS_NOT_FOUND, false)
1506 			}
1507 		} else if (label == "@f") {
1508 			if (defList.find("@f") != defList.end()) {
1509 				label = "@b";
1510 			}
1511 		}
1512 		const SlabelState& st = *label.c_str() == '.' ? stateList_.back() : stateList_.front();
1513 		return getOffset_inner(st.defList, offset, label);
1514 	}
getOffset(size_t * offset,const Label & label)1515 	bool getOffset(size_t *offset, const Label& label) const
1516 	{
1517 		return getOffset_inner(clabelDefList_, offset, getId(label));
1518 	}
addUndefinedLabel(const std::string & label,const JmpLabel & jmp)1519 	void addUndefinedLabel(const std::string& label, const JmpLabel& jmp)
1520 	{
1521 		SlabelState& st = *label.c_str() == '.' ? stateList_.back() : stateList_.front();
1522 		st.undefList.insert(SlabelUndefList::value_type(label, jmp));
1523 	}
addUndefinedLabel(const Label & label,const JmpLabel & jmp)1524 	void addUndefinedLabel(const Label& label, const JmpLabel& jmp)
1525 	{
1526 		clabelUndefList_.insert(ClabelUndefList::value_type(label.id, jmp));
1527 	}
hasUndefSlabel()1528 	bool hasUndefSlabel() const
1529 	{
1530 		for (StateList::const_iterator i = stateList_.begin(), ie = stateList_.end(); i != ie; ++i) {
1531 			if (hasUndefinedLabel_inner(i->undefList)) return true;
1532 		}
1533 		return false;
1534 	}
hasUndefClabel()1535 	bool hasUndefClabel() const { return hasUndefinedLabel_inner(clabelUndefList_); }
getCode()1536 	const uint8_t *getCode() const { return base_->getCode(); }
isReady()1537 	bool isReady() const { return !base_->isAutoGrow() || base_->isCalledCalcJmpAddress(); }
1538 };
1539 
Label(const Label & rhs)1540 inline Label::Label(const Label& rhs)
1541 {
1542 	id = rhs.id;
1543 	mgr = rhs.mgr;
1544 	if (mgr) mgr->incRefCount(id, this);
1545 }
1546 inline Label& Label::operator=(const Label& rhs)
1547 {
1548 	if (id) XBYAK_THROW_RET(ERR_LABEL_IS_ALREADY_SET_BY_L, *this)
1549 	id = rhs.id;
1550 	mgr = rhs.mgr;
1551 	if (mgr) mgr->incRefCount(id, this);
1552 	return *this;
1553 }
~Label()1554 inline Label::~Label()
1555 {
1556 	if (id && mgr) mgr->decRefCount(id, this);
1557 }
getAddress()1558 inline const uint8_t* Label::getAddress() const
1559 {
1560 	if (mgr == 0 || !mgr->isReady()) return 0;
1561 	size_t offset;
1562 	if (!mgr->getOffset(&offset, *this)) return 0;
1563 	return mgr->getCode() + offset;
1564 }
1565 
1566 typedef enum {
1567 	DefaultEncoding,
1568 	VexEncoding,
1569 	EvexEncoding
1570 } PreferredEncoding;
1571 
1572 class CodeGenerator : public CodeArray {
1573 public:
1574 	enum LabelType {
1575 		T_SHORT,
1576 		T_NEAR,
1577 		T_FAR, // far jump
1578 		T_AUTO // T_SHORT if possible
1579 	};
1580 private:
1581 	CodeGenerator operator=(const CodeGenerator&); // don't call
1582 #ifdef XBYAK64
1583 	enum { i32e = 32 | 64, BIT = 64 };
1584 	static const uint64_t dummyAddr = uint64_t(0x1122334455667788ull);
1585 	typedef Reg64 NativeReg;
1586 #else
1587 	enum { i32e = 32, BIT = 32 };
1588 	static const size_t dummyAddr = 0x12345678;
1589 	typedef Reg32 NativeReg;
1590 #endif
1591 	// (XMM, XMM|MEM)
isXMM_XMMorMEM(const Operand & op1,const Operand & op2)1592 	static inline bool isXMM_XMMorMEM(const Operand& op1, const Operand& op2)
1593 	{
1594 		return op1.isXMM() && (op2.isXMM() || op2.isMEM());
1595 	}
1596 	// (MMX, MMX|MEM) or (XMM, XMM|MEM)
isXMMorMMX_MEM(const Operand & op1,const Operand & op2)1597 	static inline bool isXMMorMMX_MEM(const Operand& op1, const Operand& op2)
1598 	{
1599 		return (op1.isMMX() && (op2.isMMX() || op2.isMEM())) || isXMM_XMMorMEM(op1, op2);
1600 	}
1601 	// (XMM, MMX|MEM)
isXMM_MMXorMEM(const Operand & op1,const Operand & op2)1602 	static inline bool isXMM_MMXorMEM(const Operand& op1, const Operand& op2)
1603 	{
1604 		return op1.isXMM() && (op2.isMMX() || op2.isMEM());
1605 	}
1606 	// (MMX, XMM|MEM)
isMMX_XMMorMEM(const Operand & op1,const Operand & op2)1607 	static inline bool isMMX_XMMorMEM(const Operand& op1, const Operand& op2)
1608 	{
1609 		return op1.isMMX() && (op2.isXMM() || op2.isMEM());
1610 	}
1611 	// (XMM, REG32|MEM)
isXMM_REG32orMEM(const Operand & op1,const Operand & op2)1612 	static inline bool isXMM_REG32orMEM(const Operand& op1, const Operand& op2)
1613 	{
1614 		return op1.isXMM() && (op2.isREG(i32e) || op2.isMEM());
1615 	}
1616 	// (REG32, XMM|MEM)
isREG32_XMMorMEM(const Operand & op1,const Operand & op2)1617 	static inline bool isREG32_XMMorMEM(const Operand& op1, const Operand& op2)
1618 	{
1619 		return op1.isREG(i32e) && (op2.isXMM() || op2.isMEM());
1620 	}
1621 	// (REG32, REG32|MEM)
isREG32_REG32orMEM(const Operand & op1,const Operand & op2)1622 	static inline bool isREG32_REG32orMEM(const Operand& op1, const Operand& op2)
1623 	{
1624 		return op1.isREG(i32e) && ((op2.isREG(i32e) && op1.getBit() == op2.getBit()) || op2.isMEM());
1625 	}
1626 	void rex(const Operand& op1, const Operand& op2 = Operand())
1627 	{
1628 		uint8_t rex = 0;
1629 		const Operand *p1 = &op1, *p2 = &op2;
1630 		if (p1->isMEM()) std::swap(p1, p2);
1631 		if (p1->isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION)
1632 		if (p2->isMEM()) {
1633 			const Address& addr = p2->getAddress();
1634 			if (BIT == 64 && addr.is32bit()) db(0x67);
1635 			rex = addr.getRex() | p1->getReg().getRex();
1636 		} else {
1637 			// ModRM(reg, base);
1638 			rex = op2.getReg().getRex(op1.getReg());
1639 		}
1640 		// except movsx(16bit, 32/64bit)
1641 		if ((op1.isBit(16) && !op2.isBit(i32e)) || (op2.isBit(16) && !op1.isBit(i32e))) db(0x66);
1642 		if (rex) db(rex);
1643 	}
1644 	enum AVXtype {
1645 		// low 3 bit
1646 		T_N1 = 1,
1647 		T_N2 = 2,
1648 		T_N4 = 3,
1649 		T_N8 = 4,
1650 		T_N16 = 5,
1651 		T_N32 = 6,
1652 		T_NX_MASK = 7,
1653 		//
1654 		T_N_VL = 1 << 3, // N * (1, 2, 4) for VL
1655 		T_DUP = 1 << 4, // N = (8, 32, 64)
1656 		T_66 = 1 << 5, // pp = 1
1657 		T_F3 = 1 << 6, // pp = 2
1658 		T_F2 = T_66 | T_F3, // pp = 3
1659 		T_ER_R = 1 << 7, // reg{er}
1660 		T_0F = 1 << 8,
1661 		T_0F38 = 1 << 9,
1662 		T_0F3A = 1 << 10,
1663 		T_L0 = 1 << 11,
1664 		T_L1 = 1 << 12,
1665 		T_W0 = 1 << 13,
1666 		T_W1 = 1 << 14,
1667 		T_EW0 = 1 << 15,
1668 		T_EW1 = 1 << 16,
1669 		T_YMM = 1 << 17, // support YMM, ZMM
1670 		T_EVEX = 1 << 18,
1671 		T_ER_X = 1 << 19, // xmm{er}
1672 		T_ER_Y = 1 << 20, // ymm{er}
1673 		T_ER_Z = 1 << 21, // zmm{er}
1674 		T_SAE_X = 1 << 22, // xmm{sae}
1675 		T_SAE_Y = 1 << 23, // ymm{sae}
1676 		T_SAE_Z = 1 << 24, // zmm{sae}
1677 		T_MUST_EVEX = 1 << 25, // contains T_EVEX
1678 		T_B32 = 1 << 26, // m32bcst
1679 		T_B64 = 1 << 27, // m64bcst
1680 		T_B16 = T_B32 | T_B64, // m16bcst (Be careful)
1681 		T_M_K = 1 << 28, // mem{k}
1682 		T_VSIB = 1 << 29,
1683 		T_MEM_EVEX = 1 << 30, // use evex if mem
1684 		T_FP16 = 1 << 31, // avx512-fp16
1685 		T_MAP5 = T_FP16 | T_0F,
1686 		T_MAP6 = T_FP16 | T_0F38,
1687 		T_XXX
1688 	};
1689 	// T_66 = 1, T_F3 = 2, T_F2 = 3
getPP(int type)1690 	uint32_t getPP(int type) const { return (type >> 5) & 3; }
1691 	void vex(const Reg& reg, const Reg& base, const Operand *v, int type, int code, bool x = false)
1692 	{
1693 		int w = (type & T_W1) ? 1 : 0;
1694 		bool is256 = (type & T_L1) ? true : (type & T_L0) ? false : reg.isYMM();
1695 		bool r = reg.isExtIdx();
1696 		bool b = base.isExtIdx();
1697 		int idx = v ? v->getIdx() : 0;
1698 		if ((idx | reg.getIdx() | base.getIdx()) >= 16) XBYAK_THROW(ERR_BAD_COMBINATION)
1699 		uint32_t pp = getPP(type);
1700 		uint32_t vvvv = (((~idx) & 15) << 3) | (is256 ? 4 : 0) | pp;
1701 		if (!b && !x && !w && (type & T_0F)) {
1702 			db(0xC5); db((r ? 0 : 0x80) | vvvv);
1703 		} else {
1704 			uint32_t mmmm = (type & T_0F) ? 1 : (type & T_0F38) ? 2 : (type & T_0F3A) ? 3 : 0;
1705 			db(0xC4); db((r ? 0 : 0x80) | (x ? 0 : 0x40) | (b ? 0 : 0x20) | mmmm); db((w << 7) | vvvv);
1706 		}
1707 		db(code);
1708 	}
verifySAE(const Reg & r,int type)1709 	void verifySAE(const Reg& r, int type) const
1710 	{
1711 		if (((type & T_SAE_X) && r.isXMM()) || ((type & T_SAE_Y) && r.isYMM()) || ((type & T_SAE_Z) && r.isZMM())) return;
1712 		XBYAK_THROW(ERR_SAE_IS_INVALID)
1713 	}
verifyER(const Reg & r,int type)1714 	void verifyER(const Reg& r, int type) const
1715 	{
1716 		if ((type & T_ER_R) && r.isREG(32|64)) return;
1717 		if (((type & T_ER_X) && r.isXMM()) || ((type & T_ER_Y) && r.isYMM()) || ((type & T_ER_Z) && r.isZMM())) return;
1718 		XBYAK_THROW(ERR_ER_IS_INVALID)
1719 	}
1720 	// (a, b, c) contains non zero two or three values then err
verifyDuplicate(int a,int b,int c,int err)1721 	int verifyDuplicate(int a, int b, int c, int err)
1722 	{
1723 		int v = a | b | c;
1724 		if ((a > 0 && a != v) + (b > 0 && b != v) + (c > 0 && c != v) > 0) XBYAK_THROW_RET(err, 0)
1725 		return v;
1726 	}
1727 	int evex(const Reg& reg, const Reg& base, const Operand *v, int type, int code, bool x = false, bool b = false, int aaa = 0, uint32_t VL = 0, bool Hi16Vidx = false)
1728 	{
1729 		if (!(type & (T_EVEX | T_MUST_EVEX))) XBYAK_THROW_RET(ERR_EVEX_IS_INVALID, 0)
1730 		int w = (type & T_EW1) ? 1 : 0;
1731 		uint32_t mmm = (type & T_0F) ? 1 : (type & T_0F38) ? 2 : (type & T_0F3A) ? 3 : 0;
1732 		if (type & T_FP16) mmm |= 4;
1733 		uint32_t pp = getPP(type);
1734 		int idx = v ? v->getIdx() : 0;
1735 		uint32_t vvvv = ~idx;
1736 
1737 		bool R = !reg.isExtIdx();
1738 		bool X = x ? false : !base.isExtIdx2();
1739 		bool B = !base.isExtIdx();
1740 		bool Rp = !reg.isExtIdx2();
1741 		int LL;
1742 		int rounding = verifyDuplicate(reg.getRounding(), base.getRounding(), v ? v->getRounding() : 0, ERR_ROUNDING_IS_ALREADY_SET);
1743 		int disp8N = 1;
1744 		if (rounding) {
1745 			if (rounding == EvexModifierRounding::T_SAE) {
1746 				verifySAE(base, type); LL = 0;
1747 			} else {
1748 				verifyER(base, type); LL = rounding - 1;
1749 			}
1750 			b = true;
1751 		} else {
1752 			if (v) VL = (std::max)(VL, v->getBit());
1753 			VL = (std::max)((std::max)(reg.getBit(), base.getBit()), VL);
1754 			LL = (VL == 512) ? 2 : (VL == 256) ? 1 : 0;
1755 			if (b) {
1756 				disp8N = ((type & T_B16) == T_B16) ? 2 : (type & T_B32) ? 4 : 8;
1757 			} else if (type & T_DUP) {
1758 				disp8N = VL == 128 ? 8 : VL == 256 ? 32 : 64;
1759 			} else {
1760 				if ((type & (T_NX_MASK | T_N_VL)) == 0) {
1761 					type |= T_N16 | T_N_VL; // default
1762 				}
1763 				int low = type & T_NX_MASK;
1764 				if (low > 0) {
1765 					disp8N = 1 << (low - 1);
1766 					if (type & T_N_VL) disp8N *= (VL == 512 ? 4 : VL == 256 ? 2 : 1);
1767 				}
1768 			}
1769 		}
1770 		bool Vp = !((v ? v->isExtIdx2() : 0) | Hi16Vidx);
1771 		bool z = reg.hasZero() || base.hasZero() || (v ? v->hasZero() : false);
1772 		if (aaa == 0) aaa = verifyDuplicate(base.getOpmaskIdx(), reg.getOpmaskIdx(), (v ? v->getOpmaskIdx() : 0), ERR_OPMASK_IS_ALREADY_SET);
1773 		if (aaa == 0) z = 0; // clear T_z if mask is not set
1774 		db(0x62);
1775 		db((R ? 0x80 : 0) | (X ? 0x40 : 0) | (B ? 0x20 : 0) | (Rp ? 0x10 : 0) | mmm);
1776 		db((w == 1 ? 0x80 : 0) | ((vvvv & 15) << 3) | 4 | (pp & 3));
1777 		db((z ? 0x80 : 0) | ((LL & 3) << 5) | (b ? 0x10 : 0) | (Vp ? 8 : 0) | (aaa & 7));
1778 		db(code);
1779 		return disp8N;
1780 	}
setModRM(int mod,int r1,int r2)1781 	void setModRM(int mod, int r1, int r2)
1782 	{
1783 		db(static_cast<uint8_t>((mod << 6) | ((r1 & 7) << 3) | (r2 & 7)));
1784 	}
1785 	void setSIB(const RegExp& e, int reg, int disp8N = 0)
1786 	{
1787 		uint64_t disp64 = e.getDisp();
1788 #ifdef XBYAK64
1789 		uint64_t high = disp64 >> 32;
1790 		if (high != 0 && high != 0xFFFFFFFF) XBYAK_THROW(ERR_OFFSET_IS_TOO_BIG)
1791 #endif
1792 		uint32_t disp = static_cast<uint32_t>(disp64);
1793 		const Reg& base = e.getBase();
1794 		const Reg& index = e.getIndex();
1795 		const int baseIdx = base.getIdx();
1796 		const int baseBit = base.getBit();
1797 		const int indexBit = index.getBit();
1798 		enum {
1799 			mod00 = 0, mod01 = 1, mod10 = 2
1800 		};
1801 		int mod = mod10; // disp32
1802 		if (!baseBit || ((baseIdx & 7) != Operand::EBP && disp == 0)) {
1803 			mod = mod00;
1804 		} else {
1805 			if (disp8N == 0) {
1806 				if (inner::IsInDisp8(disp)) {
1807 					mod = mod01;
1808 				}
1809 			} else {
1810 				// disp must be casted to signed
1811 				uint32_t t = static_cast<uint32_t>(static_cast<int>(disp) / disp8N);
1812 				if ((disp % disp8N) == 0 && inner::IsInDisp8(t)) {
1813 					disp = t;
1814 					mod = mod01;
1815 				}
1816 			}
1817 		}
1818 		const int newBaseIdx = baseBit ? (baseIdx & 7) : Operand::EBP;
1819 		/* ModR/M = [2:3:3] = [Mod:reg/code:R/M] */
1820 		bool hasSIB = indexBit || (baseIdx & 7) == Operand::ESP;
1821 #ifdef XBYAK64
1822 		if (!baseBit && !indexBit) hasSIB = true;
1823 #endif
1824 		if (hasSIB) {
1825 			setModRM(mod, reg, Operand::ESP);
1826 			/* SIB = [2:3:3] = [SS:index:base(=rm)] */
1827 			const int idx = indexBit ? (index.getIdx() & 7) : Operand::ESP;
1828 			const int scale = e.getScale();
1829 			const int SS = (scale == 8) ? 3 : (scale == 4) ? 2 : (scale == 2) ? 1 : 0;
1830 			setModRM(SS, idx, newBaseIdx);
1831 		} else {
1832 			setModRM(mod, reg, newBaseIdx);
1833 		}
1834 		if (mod == mod01) {
1835 			db(disp);
1836 		} else if (mod == mod10 || (mod == mod00 && !baseBit)) {
1837 			dd(disp);
1838 		}
1839 	}
1840 	LabelManager labelMgr_;
isInDisp16(uint32_t x)1841 	bool isInDisp16(uint32_t x) const { return 0xFFFF8000 <= x || x <= 0x7FFF; }
1842 	void opModR(const Reg& reg1, const Reg& reg2, int code0, int code1 = NONE, int code2 = NONE)
1843 	{
1844 		rex(reg2, reg1);
1845 		db(code0 | (reg1.isBit(8) ? 0 : 1)); if (code1 != NONE) db(code1); if (code2 != NONE) db(code2);
1846 		setModRM(3, reg1.getIdx(), reg2.getIdx());
1847 	}
1848 	void opModM(const Address& addr, const Reg& reg, int code0, int code1 = NONE, int code2 = NONE, int immSize = 0)
1849 	{
1850 		if (addr.is64bitDisp()) XBYAK_THROW(ERR_CANT_USE_64BIT_DISP)
1851 		rex(addr, reg);
1852 		db(code0 | (reg.isBit(8) ? 0 : 1)); if (code1 != NONE) db(code1); if (code2 != NONE) db(code2);
1853 		opAddr(addr, reg.getIdx(), immSize);
1854 	}
1855 	void opLoadSeg(const Address& addr, const Reg& reg, int code0, int code1 = NONE)
1856 	{
1857 		if (addr.is64bitDisp()) XBYAK_THROW(ERR_CANT_USE_64BIT_DISP)
1858 		if (reg.isBit(8)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER)
1859 		rex(addr, reg);
1860 		db(code0); if (code1 != NONE) db(code1);
1861 		opAddr(addr, reg.getIdx());
1862 	}
opMIB(const Address & addr,const Reg & reg,int code0,int code1)1863 	void opMIB(const Address& addr, const Reg& reg, int code0, int code1)
1864 	{
1865 		if (addr.is64bitDisp()) XBYAK_THROW(ERR_CANT_USE_64BIT_DISP)
1866 		if (addr.getMode() != Address::M_ModRM) XBYAK_THROW(ERR_INVALID_MIB_ADDRESS)
1867 		if (BIT == 64 && addr.is32bit()) db(0x67);
1868 		const RegExp& regExp = addr.getRegExp(false);
1869 		uint8_t rex = regExp.getRex();
1870 		if (rex) db(rex);
1871 		db(code0); db(code1);
1872 		setSIB(regExp, reg.getIdx());
1873 	}
makeJmp(uint32_t disp,LabelType type,uint8_t shortCode,uint8_t longCode,uint8_t longPref)1874 	void makeJmp(uint32_t disp, LabelType type, uint8_t shortCode, uint8_t longCode, uint8_t longPref)
1875 	{
1876 		const int shortJmpSize = 2;
1877 		const int longHeaderSize = longPref ? 2 : 1;
1878 		const int longJmpSize = longHeaderSize + 4;
1879 		if (type != T_NEAR && inner::IsInDisp8(disp - shortJmpSize)) {
1880 			db(shortCode); db(disp - shortJmpSize);
1881 		} else {
1882 			if (type == T_SHORT) XBYAK_THROW(ERR_LABEL_IS_TOO_FAR)
1883 			if (longPref) db(longPref);
1884 			db(longCode); dd(disp - longJmpSize);
1885 		}
1886 	}
isNEAR(LabelType type)1887 	bool isNEAR(LabelType type) const { return type == T_NEAR || (type == T_AUTO && isDefaultJmpNEAR_); }
1888 	template<class T>
opJmp(T & label,LabelType type,uint8_t shortCode,uint8_t longCode,uint8_t longPref)1889 	void opJmp(T& label, LabelType type, uint8_t shortCode, uint8_t longCode, uint8_t longPref)
1890 	{
1891 		if (type == T_FAR) XBYAK_THROW(ERR_NOT_SUPPORTED)
1892 		if (isAutoGrow() && size_ + 16 >= maxSize_) growMemory(); /* avoid splitting code of jmp */
1893 		size_t offset = 0;
1894 		if (labelMgr_.getOffset(&offset, label)) { /* label exists */
1895 			makeJmp(inner::VerifyInInt32(offset - size_), type, shortCode, longCode, longPref);
1896 		} else {
1897 			int jmpSize = 0;
1898 			if (isNEAR(type)) {
1899 				jmpSize = 4;
1900 				if (longPref) db(longPref);
1901 				db(longCode); dd(0);
1902 			} else {
1903 				jmpSize = 1;
1904 				db(shortCode); db(0);
1905 			}
1906 			JmpLabel jmp(size_, jmpSize, inner::LasIs);
1907 			labelMgr_.addUndefinedLabel(label, jmp);
1908 		}
1909 	}
1910 	void opJmpAbs(const void *addr, LabelType type, uint8_t shortCode, uint8_t longCode, uint8_t longPref = 0)
1911 	{
1912 		if (type == T_FAR) XBYAK_THROW(ERR_NOT_SUPPORTED)
1913 		if (isAutoGrow()) {
1914 			if (!isNEAR(type)) XBYAK_THROW(ERR_ONLY_T_NEAR_IS_SUPPORTED_IN_AUTO_GROW)
1915 			if (size_ + 16 >= maxSize_) growMemory();
1916 			if (longPref) db(longPref);
1917 			db(longCode);
1918 			dd(0);
1919 			save(size_ - 4, size_t(addr) - size_, 4, inner::Labs);
1920 		} else {
1921 			makeJmp(inner::VerifyInInt32(reinterpret_cast<const uint8_t*>(addr) - getCurr()), type, shortCode, longCode, longPref);
1922 		}
1923 
1924 	}
opJmpOp(const Operand & op,LabelType type,int ext)1925 	void opJmpOp(const Operand& op, LabelType type, int ext)
1926 	{
1927 		const int bit = 16|i32e;
1928 		if (type == T_FAR) {
1929 			if (!op.isMEM(bit)) XBYAK_THROW(ERR_NOT_SUPPORTED)
1930 			opR_ModM(op, bit, ext + 1, 0xFF, NONE, NONE, false);
1931 		} else {
1932 			opR_ModM(op, bit, ext, 0xFF, NONE, NONE, true);
1933 		}
1934 	}
1935 	// reg is reg field of ModRM
1936 	// immSize is the size for immediate value
1937 	// disp8N = 0(normal), disp8N = 1(force disp32), disp8N = {2, 4, 8} ; compressed displacement
1938 	void opAddr(const Address &addr, int reg, int immSize = 0, int disp8N = 0, bool permitVisb = false)
1939 	{
1940 		if (!permitVisb && addr.isVsib()) XBYAK_THROW(ERR_BAD_VSIB_ADDRESSING)
1941 		if (addr.getMode() == Address::M_ModRM) {
1942 			setSIB(addr.getRegExp(), reg, disp8N);
1943 		} else if (addr.getMode() == Address::M_rip || addr.getMode() == Address::M_ripAddr) {
1944 			setModRM(0, reg, 5);
1945 			if (addr.getLabel()) { // [rip + Label]
1946 				putL_inner(*addr.getLabel(), true, addr.getDisp() - immSize);
1947 			} else {
1948 				size_t disp = addr.getDisp();
1949 				if (addr.getMode() == Address::M_ripAddr) {
1950 					if (isAutoGrow()) XBYAK_THROW(ERR_INVALID_RIP_IN_AUTO_GROW)
1951 					disp -= (size_t)getCurr() + 4 + immSize;
1952 				}
1953 				dd(inner::VerifyInInt32(disp));
1954 			}
1955 		}
1956 	}
1957 	/* preCode is for SSSE3/SSE4 */
1958 	void opGen(const Operand& reg, const Operand& op, int code, int pref, bool isValid(const Operand&, const Operand&), int imm8 = NONE, int preCode = NONE)
1959 	{
1960 		if (isValid && !isValid(reg, op)) XBYAK_THROW(ERR_BAD_COMBINATION)
1961 		if (pref != NONE) db(pref);
1962 		if (op.isMEM()) {
1963 			opModM(op.getAddress(), reg.getReg(), 0x0F, preCode, code, (imm8 != NONE) ? 1 : 0);
1964 		} else {
1965 			opModR(reg.getReg(), op.getReg(), 0x0F, preCode, code);
1966 		}
1967 		if (imm8 != NONE) db(imm8);
1968 	}
opMMX_IMM(const Mmx & mmx,int imm8,int code,int ext)1969 	void opMMX_IMM(const Mmx& mmx, int imm8, int code, int ext)
1970 	{
1971 		if (mmx.isXMM()) db(0x66);
1972 		opModR(Reg32(ext), mmx, 0x0F, code);
1973 		db(imm8);
1974 	}
1975 	void opMMX(const Mmx& mmx, const Operand& op, int code, int pref = 0x66, int imm8 = NONE, int preCode = NONE)
1976 	{
1977 		opGen(mmx, op, code, mmx.isXMM() ? pref : NONE, isXMMorMMX_MEM, imm8, preCode);
1978 	}
opMovXMM(const Operand & op1,const Operand & op2,int code,int pref)1979 	void opMovXMM(const Operand& op1, const Operand& op2, int code, int pref)
1980 	{
1981 		if (pref != NONE) db(pref);
1982 		if (op1.isXMM() && op2.isMEM()) {
1983 			opModM(op2.getAddress(), op1.getReg(), 0x0F, code);
1984 		} else if (op1.isMEM() && op2.isXMM()) {
1985 			opModM(op1.getAddress(), op2.getReg(), 0x0F, code | 1);
1986 		} else {
1987 			XBYAK_THROW(ERR_BAD_COMBINATION)
1988 		}
1989 	}
1990 	void opExt(const Operand& op, const Mmx& mmx, int code, int imm, bool hasMMX2 = false)
1991 	{
1992 		if (hasMMX2 && op.isREG(i32e)) { /* pextrw is special */
1993 			if (mmx.isXMM()) db(0x66);
1994 			opModR(op.getReg(), mmx, 0x0F, 0xC5); db(imm);
1995 		} else {
1996 			opGen(mmx, op, code, 0x66, isXMM_REG32orMEM, imm, 0x3A);
1997 		}
1998 	}
1999 	void opR_ModM(const Operand& op, int bit, int ext, int code0, int code1 = NONE, int code2 = NONE, bool disableRex = false, int immSize = 0)
2000 	{
2001 		int opBit = op.getBit();
2002 		if (disableRex && opBit == 64) opBit = 32;
2003 		if (op.isREG(bit)) {
2004 			opModR(Reg(ext, Operand::REG, opBit), op.getReg().changeBit(opBit), code0, code1, code2);
2005 		} else if (op.isMEM()) {
2006 			opModM(op.getAddress(), Reg(ext, Operand::REG, opBit), code0, code1, code2, immSize);
2007 		} else {
2008 			XBYAK_THROW(ERR_BAD_COMBINATION)
2009 		}
2010 	}
opShift(const Operand & op,int imm,int ext)2011 	void opShift(const Operand& op, int imm, int ext)
2012 	{
2013 		verifyMemHasSize(op);
2014 		opR_ModM(op, 0, ext, (0xC0 | ((imm == 1 ? 1 : 0) << 4)), NONE, NONE, false, (imm != 1) ? 1 : 0);
2015 		if (imm != 1) db(imm);
2016 	}
opShift(const Operand & op,const Reg8 & _cl,int ext)2017 	void opShift(const Operand& op, const Reg8& _cl, int ext)
2018 	{
2019 		if (_cl.getIdx() != Operand::CL) XBYAK_THROW(ERR_BAD_COMBINATION)
2020 		opR_ModM(op, 0, ext, 0xD2);
2021 	}
2022 	void opModRM(const Operand& op1, const Operand& op2, bool condR, bool condM, int code0, int code1 = NONE, int code2 = NONE, int immSize = 0)
2023 	{
2024 		if (condR) {
2025 			opModR(op1.getReg(), op2.getReg(), code0, code1, code2);
2026 		} else if (condM) {
2027 			opModM(op2.getAddress(), op1.getReg(), code0, code1, code2, immSize);
2028 		} else {
2029 			XBYAK_THROW(ERR_BAD_COMBINATION)
2030 		}
2031 	}
2032 	void opShxd(const Operand& op, const Reg& reg, uint8_t imm, int code, const Reg8 *_cl = 0)
2033 	{
2034 		if (_cl && _cl->getIdx() != Operand::CL) XBYAK_THROW(ERR_BAD_COMBINATION)
2035 		opModRM(reg, op, (op.isREG(16 | i32e) && op.getBit() == reg.getBit()), op.isMEM() && (reg.isREG(16 | i32e)), 0x0F, code | (_cl ? 1 : 0), NONE, _cl ? 0 : 1);
2036 		if (!_cl) db(imm);
2037 	}
2038 	// (REG, REG|MEM), (MEM, REG)
opRM_RM(const Operand & op1,const Operand & op2,int code)2039 	void opRM_RM(const Operand& op1, const Operand& op2, int code)
2040 	{
2041 		if (op1.isREG() && op2.isMEM()) {
2042 			opModM(op2.getAddress(), op1.getReg(), code | 2);
2043 		} else {
2044 			opModRM(op2, op1, op1.isREG() && op1.getKind() == op2.getKind(), op1.isMEM() && op2.isREG(), code);
2045 		}
2046 	}
2047 	// (REG|MEM, IMM)
opRM_I(const Operand & op,uint32_t imm,int code,int ext)2048 	void opRM_I(const Operand& op, uint32_t imm, int code, int ext)
2049 	{
2050 		verifyMemHasSize(op);
2051 		uint32_t immBit = inner::IsInDisp8(imm) ? 8 : isInDisp16(imm) ? 16 : 32;
2052 		if (op.isBit(8)) immBit = 8;
2053 		if (op.getBit() < immBit) XBYAK_THROW(ERR_IMM_IS_TOO_BIG)
2054 		if (op.isBit(32|64) && immBit == 16) immBit = 32; /* don't use MEM16 if 32/64bit mode */
2055 		if (op.isREG() && op.getIdx() == 0 && (op.getBit() == immBit || (op.isBit(64) && immBit == 32))) { // rax, eax, ax, al
2056 			rex(op);
2057 			db(code | 4 | (immBit == 8 ? 0 : 1));
2058 		} else {
2059 			int tmp = immBit < (std::min)(op.getBit(), 32U) ? 2 : 0;
2060 			opR_ModM(op, 0, ext, 0x80 | tmp, NONE, NONE, false, immBit / 8);
2061 		}
2062 		db(imm, immBit / 8);
2063 	}
opIncDec(const Operand & op,int code,int ext)2064 	void opIncDec(const Operand& op, int code, int ext)
2065 	{
2066 		verifyMemHasSize(op);
2067 #ifndef XBYAK64
2068 		if (op.isREG() && !op.isBit(8)) {
2069 			rex(op); db(code | op.getIdx());
2070 			return;
2071 		}
2072 #endif
2073 		code = 0xFE;
2074 		if (op.isREG()) {
2075 			opModR(Reg(ext, Operand::REG, op.getBit()), op.getReg(), code);
2076 		} else {
2077 			opModM(op.getAddress(), Reg(ext, Operand::REG, op.getBit()), code);
2078 		}
2079 	}
opPushPop(const Operand & op,int code,int ext,int alt)2080 	void opPushPop(const Operand& op, int code, int ext, int alt)
2081 	{
2082 		int bit = op.getBit();
2083 		if (bit == 16 || bit == BIT) {
2084 			if (bit == 16) db(0x66);
2085 			if (op.isREG()) {
2086 				if (op.getReg().getIdx() >= 8) db(0x41);
2087 				db(alt | (op.getIdx() & 7));
2088 				return;
2089 			}
2090 			if (op.isMEM()) {
2091 				opModM(op.getAddress(), Reg(ext, Operand::REG, 32), code);
2092 				return;
2093 			}
2094 		}
2095 		XBYAK_THROW(ERR_BAD_COMBINATION)
2096 	}
verifyMemHasSize(const Operand & op)2097 	void verifyMemHasSize(const Operand& op) const
2098 	{
2099 		if (op.isMEM() && op.getBit() == 0) XBYAK_THROW(ERR_MEM_SIZE_IS_NOT_SPECIFIED)
2100 	}
2101 	/*
2102 		mov(r, imm) = db(imm, mov_imm(r, imm))
2103 	*/
mov_imm(const Reg & reg,uint64_t imm)2104 	int mov_imm(const Reg& reg, uint64_t imm)
2105 	{
2106 		int bit = reg.getBit();
2107 		const int idx = reg.getIdx();
2108 		int code = 0xB0 | ((bit == 8 ? 0 : 1) << 3);
2109 		if (bit == 64 && (imm & ~uint64_t(0xffffffffu)) == 0) {
2110 			rex(Reg32(idx));
2111 			bit = 32;
2112 		} else {
2113 			rex(reg);
2114 			if (bit == 64 && inner::IsInInt32(imm)) {
2115 				db(0xC7);
2116 				code = 0xC0;
2117 				bit = 32;
2118 			}
2119 		}
2120 		db(code | (idx & 7));
2121 		return bit / 8;
2122 	}
2123 	template<class T>
2124 	void putL_inner(T& label, bool relative = false, size_t disp = 0)
2125 	{
2126 		const int jmpSize = relative ? 4 : (int)sizeof(size_t);
2127 		if (isAutoGrow() && size_ + 16 >= maxSize_) growMemory();
2128 		size_t offset = 0;
2129 		if (labelMgr_.getOffset(&offset, label)) {
2130 			if (relative) {
2131 				db(inner::VerifyInInt32(offset + disp - size_ - jmpSize), jmpSize);
2132 			} else if (isAutoGrow()) {
2133 				db(uint64_t(0), jmpSize);
2134 				save(size_ - jmpSize, offset, jmpSize, inner::LaddTop);
2135 			} else {
2136 				db(size_t(top_) + offset, jmpSize);
2137 			}
2138 			return;
2139 		}
2140 		db(uint64_t(0), jmpSize);
2141 		JmpLabel jmp(size_, jmpSize, (relative ? inner::LasIs : isAutoGrow() ? inner::LaddTop : inner::Labs), disp);
2142 		labelMgr_.addUndefinedLabel(label, jmp);
2143 	}
opMovxx(const Reg & reg,const Operand & op,uint8_t code)2144 	void opMovxx(const Reg& reg, const Operand& op, uint8_t code)
2145 	{
2146 		if (op.isBit(32)) XBYAK_THROW(ERR_BAD_COMBINATION)
2147 		int w = op.isBit(16);
2148 #ifdef XBYAK64
2149 		if (op.isHigh8bit()) XBYAK_THROW(ERR_BAD_COMBINATION)
2150 #endif
2151 		bool cond = reg.isREG() && (reg.getBit() > op.getBit());
2152 		opModRM(reg, op, cond && op.isREG(), cond && op.isMEM(), 0x0F, code | w);
2153 	}
opFpuMem(const Address & addr,uint8_t m16,uint8_t m32,uint8_t m64,uint8_t ext,uint8_t m64ext)2154 	void opFpuMem(const Address& addr, uint8_t m16, uint8_t m32, uint8_t m64, uint8_t ext, uint8_t m64ext)
2155 	{
2156 		if (addr.is64bitDisp()) XBYAK_THROW(ERR_CANT_USE_64BIT_DISP)
2157 		uint8_t code = addr.isBit(16) ? m16 : addr.isBit(32) ? m32 : addr.isBit(64) ? m64 : 0;
2158 		if (!code) XBYAK_THROW(ERR_BAD_MEM_SIZE)
2159 		if (m64ext && addr.isBit(64)) ext = m64ext;
2160 
2161 		rex(addr, st0);
2162 		db(code);
2163 		opAddr(addr, ext);
2164 	}
2165 	// use code1 if reg1 == st0
2166 	// use code2 if reg1 != st0 && reg2 == st0
opFpuFpu(const Fpu & reg1,const Fpu & reg2,uint32_t code1,uint32_t code2)2167 	void opFpuFpu(const Fpu& reg1, const Fpu& reg2, uint32_t code1, uint32_t code2)
2168 	{
2169 		uint32_t code = reg1.getIdx() == 0 ? code1 : reg2.getIdx() == 0 ? code2 : 0;
2170 		if (!code) XBYAK_THROW(ERR_BAD_ST_COMBINATION)
2171 		db(uint8_t(code >> 8));
2172 		db(uint8_t(code | (reg1.getIdx() | reg2.getIdx())));
2173 	}
opFpu(const Fpu & reg,uint8_t code1,uint8_t code2)2174 	void opFpu(const Fpu& reg, uint8_t code1, uint8_t code2)
2175 	{
2176 		db(code1); db(code2 | reg.getIdx());
2177 	}
2178 	void opVex(const Reg& r, const Operand *p1, const Operand& op2, int type, int code, int imm8 = NONE)
2179 	{
2180 		if (op2.isMEM()) {
2181 			const Address& addr = op2.getAddress();
2182 			const RegExp& regExp = addr.getRegExp();
2183 			const Reg& base = regExp.getBase();
2184 			const Reg& index = regExp.getIndex();
2185 			if (BIT == 64 && addr.is32bit()) db(0x67);
2186 			int disp8N = 0;
2187 			bool x = index.isExtIdx();
2188 			if ((type & (T_MUST_EVEX|T_MEM_EVEX)) || r.hasEvex() || (p1 && p1->hasEvex()) || addr.isBroadcast() || addr.getOpmaskIdx()) {
2189 				int aaa = addr.getOpmaskIdx();
2190 				if (aaa && !(type & T_M_K)) XBYAK_THROW(ERR_INVALID_OPMASK_WITH_MEMORY)
2191 				bool b = false;
2192 				if (addr.isBroadcast()) {
2193 					if (!(type & (T_B32 | T_B64))) XBYAK_THROW(ERR_INVALID_BROADCAST)
2194 					b = true;
2195 				}
2196 				int VL = regExp.isVsib() ? index.getBit() : 0;
2197 				disp8N = evex(r, base, p1, type, code, x, b, aaa, VL, index.isExtIdx2());
2198 			} else {
2199 				vex(r, base, p1, type, code, x);
2200 			}
2201 			opAddr(addr, r.getIdx(), (imm8 != NONE) ? 1 : 0, disp8N, (type & T_VSIB) != 0);
2202 		} else {
2203 			const Reg& base = op2.getReg();
2204 			if ((type & T_MUST_EVEX) || r.hasEvex() || (p1 && p1->hasEvex()) || base.hasEvex()) {
2205 				evex(r, base, p1, type, code);
2206 			} else {
2207 				vex(r, base, p1, type, code);
2208 			}
2209 			setModRM(3, r.getIdx(), base.getIdx());
2210 		}
2211 		if (imm8 != NONE) db(imm8);
2212 	}
2213 	// (r, r, r/m) if isR_R_RM
2214 	// (r, r/m, r)
2215 	void opGpr(const Reg32e& r, const Operand& op1, const Operand& op2, int type, uint8_t code, bool isR_R_RM, int imm8 = NONE)
2216 	{
2217 		const Operand *p1 = &op1;
2218 		const Operand *p2 = &op2;
2219 		if (!isR_R_RM) std::swap(p1, p2);
2220 		const unsigned int bit = r.getBit();
2221 		if (p1->getBit() != bit || (p2->isREG() && p2->getBit() != bit)) XBYAK_THROW(ERR_BAD_COMBINATION)
2222 		type |= (bit == 64) ? T_W1 : T_W0;
2223 		opVex(r, p1, *p2, type, code, imm8);
2224 	}
2225 	void opAVX_X_X_XM(const Xmm& x1, const Operand& op1, const Operand& op2, int type, int code0, int imm8 = NONE)
2226 	{
2227 		const Xmm *x2 = static_cast<const Xmm*>(&op1);
2228 		const Operand *op = &op2;
2229 		if (op2.isNone()) { // (x1, op1) -> (x1, x1, op1)
2230 			x2 = &x1;
2231 			op = &op1;
2232 		}
2233 		// (x1, x2, op)
2234 		if (!((x1.isXMM() && x2->isXMM()) || ((type & T_YMM) && ((x1.isYMM() && x2->isYMM()) || (x1.isZMM() && x2->isZMM()))))) XBYAK_THROW(ERR_BAD_COMBINATION)
2235 		opVex(x1, x2, *op, type, code0, imm8);
2236 	}
2237 	void opAVX_K_X_XM(const Opmask& k, const Xmm& x2, const Operand& op3, int type, int code0, int imm8 = NONE)
2238 	{
2239 		if (!op3.isMEM() && (x2.getKind() != op3.getKind())) XBYAK_THROW(ERR_BAD_COMBINATION)
2240 		opVex(k, &x2, op3, type, code0, imm8);
2241 	}
2242 	// (x, x/m), (y, x/m256), (z, y/m)
checkCvt1(const Operand & x,const Operand & op)2243 	void checkCvt1(const Operand& x, const Operand& op) const
2244 	{
2245 		if (!op.isMEM() && !(x.is(Operand::XMM | Operand::YMM) && op.isXMM()) && !(x.isZMM() && op.isYMM())) XBYAK_THROW(ERR_BAD_COMBINATION)
2246 	}
2247 	// (x, x/m), (x, y/m256), (y, z/m)
checkCvt2(const Xmm & x,const Operand & op)2248 	void checkCvt2(const Xmm& x, const Operand& op) const
2249 	{
2250 		if (!(x.isXMM() && op.is(Operand::XMM | Operand::YMM | Operand::MEM)) && !(x.isYMM() && op.is(Operand::ZMM | Operand::MEM))) XBYAK_THROW(ERR_BAD_COMBINATION)
2251 	}
opCvt(const Xmm & x,const Operand & op,int type,int code)2252 	void opCvt(const Xmm& x, const Operand& op, int type, int code)
2253 	{
2254 		Operand::Kind kind = x.isXMM() ? (op.isBit(256) ? Operand::YMM : Operand::XMM) : Operand::ZMM;
2255 		opVex(x.copyAndSetKind(kind), &xm0, op, type, code);
2256 	}
opCvt2(const Xmm & x,const Operand & op,int type,int code)2257 	void opCvt2(const Xmm& x, const Operand& op, int type, int code)
2258 	{
2259 		checkCvt2(x, op);
2260 		opCvt(x, op, type, code);
2261 	}
opCvt3(const Xmm & x1,const Xmm & x2,const Operand & op,int type,int type64,int type32,uint8_t code)2262 	void opCvt3(const Xmm& x1, const Xmm& x2, const Operand& op, int type, int type64, int type32, uint8_t code)
2263 	{
2264 		if (!(x1.isXMM() && x2.isXMM() && (op.isREG(i32e) || op.isMEM()))) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER)
2265 		Xmm x(op.getIdx());
2266 		const Operand *p = op.isREG() ? &x : &op;
2267 		opVex(x1, &x2, *p, type | (op.isBit(64) ? type64 : type32), code);
2268 	}
2269 	// (x, x/y/xword/yword), (y, z/m)
checkCvt4(const Xmm & x,const Operand & op)2270 	void checkCvt4(const Xmm& x, const Operand& op) const
2271 	{
2272 		if (!(x.isXMM() && op.is(Operand::XMM | Operand::YMM | Operand::MEM) && op.isBit(128|256)) && !(x.isYMM() && op.is(Operand::ZMM | Operand::MEM))) XBYAK_THROW(ERR_BAD_COMBINATION)
2273 	}
2274 	// (x, x/y/z/xword/yword/zword)
opCvt5(const Xmm & x,const Operand & op,int type,int code)2275 	void opCvt5(const Xmm& x, const Operand& op, int type, int code)
2276 	{
2277 		if (!(x.isXMM() && op.isBit(128|256|512))) XBYAK_THROW(ERR_BAD_COMBINATION)
2278 		Operand::Kind kind = op.isBit(128) ? Operand::XMM : op.isBit(256) ? Operand::YMM : Operand::ZMM;
2279 		opVex(x.copyAndSetKind(kind), &xm0, op, type, code);
2280 	}
cvtIdx0(const Operand & x)2281 	const Xmm& cvtIdx0(const Operand& x) const
2282 	{
2283 		return x.isZMM() ? zm0 : x.isYMM() ? ym0 : xm0;
2284 	}
2285 	// support (x, x/m, imm), (y, y/m, imm)
2286 	void opAVX_X_XM_IMM(const Xmm& x, const Operand& op, int type, int code, int imm8 = NONE)
2287 	{
2288 		opAVX_X_X_XM(x, cvtIdx0(x), op, type, code, imm8);
2289 	}
2290 	// QQQ:need to refactor
opSp1(const Reg & reg,const Operand & op,uint8_t pref,uint8_t code0,uint8_t code1)2291 	void opSp1(const Reg& reg, const Operand& op, uint8_t pref, uint8_t code0, uint8_t code1)
2292 	{
2293 		if (reg.isBit(8)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER)
2294 		bool is16bit = reg.isREG(16) && (op.isREG(16) || op.isMEM());
2295 		if (!is16bit && !(reg.isREG(i32e) && (op.isREG(reg.getBit()) || op.isMEM()))) XBYAK_THROW(ERR_BAD_COMBINATION)
2296 		if (is16bit) db(0x66);
2297 		db(pref); opModRM(reg.changeBit(i32e == 32 ? 32 : reg.getBit()), op, op.isREG(), true, code0, code1);
2298 	}
opGather(const Xmm & x1,const Address & addr,const Xmm & x2,int type,uint8_t code,int mode)2299 	void opGather(const Xmm& x1, const Address& addr, const Xmm& x2, int type, uint8_t code, int mode)
2300 	{
2301 		const RegExp& regExp = addr.getRegExp();
2302 		if (!regExp.isVsib(128 | 256)) XBYAK_THROW(ERR_BAD_VSIB_ADDRESSING)
2303 		const int y_vx_y = 0;
2304 		const int y_vy_y = 1;
2305 //		const int x_vy_x = 2;
2306 		const bool isAddrYMM = regExp.getIndex().getBit() == 256;
2307 		if (!x1.isXMM() || isAddrYMM || !x2.isXMM()) {
2308 			bool isOK = false;
2309 			if (mode == y_vx_y) {
2310 				isOK = x1.isYMM() && !isAddrYMM && x2.isYMM();
2311 			} else if (mode == y_vy_y) {
2312 				isOK = x1.isYMM() && isAddrYMM && x2.isYMM();
2313 			} else { // x_vy_x
2314 				isOK = !x1.isYMM() && isAddrYMM && !x2.isYMM();
2315 			}
2316 			if (!isOK) XBYAK_THROW(ERR_BAD_VSIB_ADDRESSING)
2317 		}
2318 		int i1 = x1.getIdx();
2319 		int i2 = regExp.getIndex().getIdx();
2320 		int i3 = x2.getIdx();
2321 		if (i1 == i2 || i1 == i3 || i2 == i3) XBYAK_THROW(ERR_SAME_REGS_ARE_INVALID);
2322 		opAVX_X_X_XM(isAddrYMM ? Ymm(i1) : x1, isAddrYMM ? Ymm(i3) : x2, addr, type, code);
2323 	}
2324 	enum {
2325 		xx_yy_zz = 0,
2326 		xx_yx_zy = 1,
2327 		xx_xy_yz = 2
2328 	};
checkGather2(const Xmm & x1,const Reg & x2,int mode)2329 	void checkGather2(const Xmm& x1, const Reg& x2, int mode) const
2330 	{
2331 		if (x1.isXMM() && x2.isXMM()) return;
2332 		switch (mode) {
2333 		case xx_yy_zz: if ((x1.isYMM() && x2.isYMM()) || (x1.isZMM() && x2.isZMM())) return;
2334 			break;
2335 		case xx_yx_zy: if ((x1.isYMM() && x2.isXMM()) || (x1.isZMM() && x2.isYMM())) return;
2336 			break;
2337 		case xx_xy_yz: if ((x1.isXMM() && x2.isYMM()) || (x1.isYMM() && x2.isZMM())) return;
2338 			break;
2339 		}
2340 		XBYAK_THROW(ERR_BAD_VSIB_ADDRESSING)
2341 	}
opGather2(const Xmm & x,const Address & addr,int type,uint8_t code,int mode)2342 	void opGather2(const Xmm& x, const Address& addr, int type, uint8_t code, int mode)
2343 	{
2344 		if (x.hasZero()) XBYAK_THROW(ERR_INVALID_ZERO)
2345 		const RegExp& regExp = addr.getRegExp();
2346 		checkGather2(x, regExp.getIndex(), mode);
2347 		int maskIdx = x.getOpmaskIdx();
2348 		if ((type & T_M_K) && addr.getOpmaskIdx()) maskIdx = addr.getOpmaskIdx();
2349 		if (maskIdx == 0) XBYAK_THROW(ERR_K0_IS_INVALID);
2350 		if (!(type & T_M_K) && x.getIdx() == regExp.getIndex().getIdx()) XBYAK_THROW(ERR_SAME_REGS_ARE_INVALID);
2351 		opVex(x, 0, addr, type, code);
2352 	}
2353 	/*
2354 		xx_xy_yz ; mode = true
2355 		xx_xy_xz ; mode = false
2356 	*/
opVmov(const Operand & op,const Xmm & x,int type,uint8_t code,bool mode)2357 	void opVmov(const Operand& op, const Xmm& x, int type, uint8_t code, bool mode)
2358 	{
2359 		if (mode) {
2360 			if (!op.isMEM() && !((op.isXMM() && x.isXMM()) || (op.isXMM() && x.isYMM()) || (op.isYMM() && x.isZMM()))) XBYAK_THROW(ERR_BAD_COMBINATION)
2361 		} else {
2362 			if (!op.isMEM() && !op.isXMM()) XBYAK_THROW(ERR_BAD_COMBINATION)
2363 		}
2364 		opVex(x, 0, op, type, code);
2365 	}
opGatherFetch(const Address & addr,const Xmm & x,int type,uint8_t code,Operand::Kind kind)2366 	void opGatherFetch(const Address& addr, const Xmm& x, int type, uint8_t code, Operand::Kind kind)
2367 	{
2368 		if (addr.hasZero()) XBYAK_THROW(ERR_INVALID_ZERO)
2369 		if (addr.getRegExp().getIndex().getKind() != kind) XBYAK_THROW(ERR_BAD_VSIB_ADDRESSING)
2370 		opVex(x, 0, addr, type, code);
2371 	}
opVnni(const Xmm & x1,const Xmm & x2,const Operand & op,int type,int code0,PreferredEncoding encoding)2372 	void opVnni(const Xmm& x1, const Xmm& x2, const Operand& op, int type, int code0, PreferredEncoding encoding)
2373 	{
2374 		if (encoding == DefaultEncoding) {
2375 			encoding = EvexEncoding;
2376 		}
2377 		if (encoding == EvexEncoding) {
2378 #ifdef XBYAK_DISABLE_AVX512
2379 			XBYAK_THROW(ERR_EVEX_IS_INVALID)
2380 #endif
2381 			type |= T_MUST_EVEX;
2382 		}
2383 		opAVX_X_X_XM(x1, x2, op, type, code0);
2384 	}
opInOut(const Reg & a,const Reg & d,uint8_t code)2385 	void opInOut(const Reg& a, const Reg& d, uint8_t code)
2386 	{
2387 		if (a.getIdx() == Operand::AL && d.getIdx() == Operand::DX && d.getBit() == 16) {
2388 			switch (a.getBit()) {
2389 			case 8: db(code); return;
2390 			case 16: db(0x66); db(code + 1); return;
2391 			case 32: db(code + 1); return;
2392 			}
2393 		}
2394 		XBYAK_THROW(ERR_BAD_COMBINATION)
2395 	}
opInOut(const Reg & a,uint8_t code,uint8_t v)2396 	void opInOut(const Reg& a, uint8_t code, uint8_t v)
2397 	{
2398 		if (a.getIdx() == Operand::AL) {
2399 			switch (a.getBit()) {
2400 			case 8: db(code); db(v); return;
2401 			case 16: db(0x66); db(code + 1); db(v); return;
2402 			case 32: db(code + 1); db(v); return;
2403 			}
2404 		}
2405 		XBYAK_THROW(ERR_BAD_COMBINATION)
2406 	}
2407 #ifdef XBYAK64
opAMX(const Tmm & t1,const Address & addr,int type,int code0)2408 	void opAMX(const Tmm& t1, const Address& addr, int type, int code0)
2409 	{
2410 		// require both base and index
2411 		const RegExp exp = addr.getRegExp(false);
2412 		if (exp.getBase().getBit() == 0 || exp.getIndex().getBit() == 0) XBYAK_THROW(ERR_NOT_SUPPORTED)
2413 		opVex(t1, &tmm0, addr, type, code0);
2414 	}
2415 #endif
2416 public:
getVersion()2417 	unsigned int getVersion() const { return VERSION; }
2418 	using CodeArray::db;
2419 	const Mmx mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7;
2420 	const Xmm xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;
2421 	const Ymm ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7;
2422 	const Zmm zmm0, zmm1, zmm2, zmm3, zmm4, zmm5, zmm6, zmm7;
2423 	const Xmm &xm0, &xm1, &xm2, &xm3, &xm4, &xm5, &xm6, &xm7;
2424 	const Ymm &ym0, &ym1, &ym2, &ym3, &ym4, &ym5, &ym6, &ym7;
2425 	const Zmm &zm0, &zm1, &zm2, &zm3, &zm4, &zm5, &zm6, &zm7;
2426 	const Reg32 eax, ecx, edx, ebx, esp, ebp, esi, edi;
2427 	const Reg16 ax, cx, dx, bx, sp, bp, si, di;
2428 	const Reg8 al, cl, dl, bl, ah, ch, dh, bh;
2429 	const AddressFrame ptr, byte, word, dword, qword, xword, yword, zword; // xword is same as oword of NASM
2430 	const AddressFrame ptr_b, xword_b, yword_b, zword_b; // broadcast such as {1to2}, {1to4}, {1to8}, {1to16}, {b}
2431 	const Fpu st0, st1, st2, st3, st4, st5, st6, st7;
2432 	const Opmask k0, k1, k2, k3, k4, k5, k6, k7;
2433 	const BoundsReg bnd0, bnd1, bnd2, bnd3;
2434 	const EvexModifierRounding T_sae, T_rn_sae, T_rd_sae, T_ru_sae, T_rz_sae; // {sae}, {rn-sae}, {rd-sae}, {ru-sae}, {rz-sae}
2435 	const EvexModifierZero T_z; // {z}
2436 #ifdef XBYAK64
2437 	const Reg64 rax, rcx, rdx, rbx, rsp, rbp, rsi, rdi, r8, r9, r10, r11, r12, r13, r14, r15;
2438 	const Reg32 r8d, r9d, r10d, r11d, r12d, r13d, r14d, r15d;
2439 	const Reg16 r8w, r9w, r10w, r11w, r12w, r13w, r14w, r15w;
2440 	const Reg8 r8b, r9b, r10b, r11b, r12b, r13b, r14b, r15b;
2441 	const Reg8 spl, bpl, sil, dil;
2442 	const Xmm xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15;
2443 	const Xmm xmm16, xmm17, xmm18, xmm19, xmm20, xmm21, xmm22, xmm23;
2444 	const Xmm xmm24, xmm25, xmm26, xmm27, xmm28, xmm29, xmm30, xmm31;
2445 	const Ymm ymm8, ymm9, ymm10, ymm11, ymm12, ymm13, ymm14, ymm15;
2446 	const Ymm ymm16, ymm17, ymm18, ymm19, ymm20, ymm21, ymm22, ymm23;
2447 	const Ymm ymm24, ymm25, ymm26, ymm27, ymm28, ymm29, ymm30, ymm31;
2448 	const Zmm zmm8, zmm9, zmm10, zmm11, zmm12, zmm13, zmm14, zmm15;
2449 	const Zmm zmm16, zmm17, zmm18, zmm19, zmm20, zmm21, zmm22, zmm23;
2450 	const Zmm zmm24, zmm25, zmm26, zmm27, zmm28, zmm29, zmm30, zmm31;
2451 	const Tmm tmm0, tmm1, tmm2, tmm3, tmm4, tmm5, tmm6, tmm7;
2452 	const Xmm &xm8, &xm9, &xm10, &xm11, &xm12, &xm13, &xm14, &xm15; // for my convenience
2453 	const Xmm &xm16, &xm17, &xm18, &xm19, &xm20, &xm21, &xm22, &xm23;
2454 	const Xmm &xm24, &xm25, &xm26, &xm27, &xm28, &xm29, &xm30, &xm31;
2455 	const Ymm &ym8, &ym9, &ym10, &ym11, &ym12, &ym13, &ym14, &ym15;
2456 	const Ymm &ym16, &ym17, &ym18, &ym19, &ym20, &ym21, &ym22, &ym23;
2457 	const Ymm &ym24, &ym25, &ym26, &ym27, &ym28, &ym29, &ym30, &ym31;
2458 	const Zmm &zm8, &zm9, &zm10, &zm11, &zm12, &zm13, &zm14, &zm15;
2459 	const Zmm &zm16, &zm17, &zm18, &zm19, &zm20, &zm21, &zm22, &zm23;
2460 	const Zmm &zm24, &zm25, &zm26, &zm27, &zm28, &zm29, &zm30, &zm31;
2461 	const RegRip rip;
2462 #endif
2463 #ifndef XBYAK_DISABLE_SEGMENT
2464 	const Segment es, cs, ss, ds, fs, gs;
2465 #endif
2466 private:
2467 	bool isDefaultJmpNEAR_;
2468 public:
L(const std::string & label)2469 	void L(const std::string& label) { labelMgr_.defineSlabel(label); }
L(Label & label)2470 	void L(Label& label) { labelMgr_.defineClabel(label); }
L()2471 	Label L() { Label label; L(label); return label; }
inLocalLabel()2472 	void inLocalLabel() { labelMgr_.enterLocal(); }
outLocalLabel()2473 	void outLocalLabel() { labelMgr_.leaveLocal(); }
2474 	/*
2475 		assign src to dst
2476 		require
2477 		dst : does not used by L()
2478 		src : used by L()
2479 	*/
assignL(Label & dst,const Label & src)2480 	void assignL(Label& dst, const Label& src) { labelMgr_.assign(dst, src); }
2481 	/*
2482 		put address of label to buffer
2483 		@note the put size is 4(32-bit), 8(64-bit)
2484 	*/
putL(std::string label)2485 	void putL(std::string label) { putL_inner(label); }
putL(const Label & label)2486 	void putL(const Label& label) { putL_inner(label); }
2487 
2488 	// set default type of `jmp` of undefined label to T_NEAR
setDefaultJmpNEAR(bool isNear)2489 	void setDefaultJmpNEAR(bool isNear) { isDefaultJmpNEAR_ = isNear; }
2490 	void jmp(const Operand& op, LabelType type = T_AUTO) { opJmpOp(op, type, 4); }
2491 	void jmp(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0xEB, 0xE9, 0); }
2492 	void jmp(const char *label, LabelType type = T_AUTO) { jmp(std::string(label), type); }
2493 	void jmp(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0xEB, 0xE9, 0); }
2494 	void jmp(const void *addr, LabelType type = T_AUTO) { opJmpAbs(addr, type, 0xEB, 0xE9); }
2495 
2496 	void call(const Operand& op, LabelType type = T_AUTO) { opJmpOp(op, type, 2); }
2497 	// call(string label), not const std::string&
call(std::string label)2498 	void call(std::string label) { opJmp(label, T_NEAR, 0, 0xE8, 0); }
call(const char * label)2499 	void call(const char *label) { call(std::string(label)); }
call(const Label & label)2500 	void call(const Label& label) { opJmp(label, T_NEAR, 0, 0xE8, 0); }
2501 	// call(function pointer)
2502 #ifdef XBYAK_VARIADIC_TEMPLATE
2503 	template<class Ret, class... Params>
call(Ret (* func)(Params...))2504 	void call(Ret(*func)(Params...)) { call(reinterpret_cast<const void*>(func)); }
2505 #endif
call(const void * addr)2506 	void call(const void *addr) { opJmpAbs(addr, T_NEAR, 0, 0xE8); }
2507 
test(const Operand & op,const Reg & reg)2508 	void test(const Operand& op, const Reg& reg)
2509 	{
2510 		opModRM(reg, op, op.isREG() && (op.getKind() == reg.getKind()), op.isMEM(), 0x84);
2511 	}
test(const Operand & op,uint32_t imm)2512 	void test(const Operand& op, uint32_t imm)
2513 	{
2514 		verifyMemHasSize(op);
2515         int immSize = (std::min)(op.getBit() / 8, 4U);
2516 		if (op.isREG() && op.getIdx() == 0) { // al, ax, eax
2517 			rex(op);
2518 			db(0xA8 | (op.isBit(8) ? 0 : 1));
2519 		} else {
2520 			opR_ModM(op, 0, 0, 0xF6, NONE, NONE, false, immSize);
2521 		}
2522 		db(imm, immSize);
2523 	}
imul(const Reg & reg,const Operand & op)2524 	void imul(const Reg& reg, const Operand& op)
2525 	{
2526 		opModRM(reg, op, op.isREG() && (reg.getKind() == op.getKind()), op.isMEM(), 0x0F, 0xAF);
2527 	}
imul(const Reg & reg,const Operand & op,int imm)2528 	void imul(const Reg& reg, const Operand& op, int imm)
2529 	{
2530 		int s = inner::IsInDisp8(imm) ? 1 : 0;
2531         int immSize = s ? 1 : reg.isREG(16) ? 2 : 4;
2532 		opModRM(reg, op, op.isREG() && (reg.getKind() == op.getKind()), op.isMEM(), 0x69 | (s << 1), NONE, NONE, immSize);
2533 		db(imm, immSize);
2534 	}
push(const Operand & op)2535 	void push(const Operand& op) { opPushPop(op, 0xFF, 6, 0x50); }
pop(const Operand & op)2536 	void pop(const Operand& op) { opPushPop(op, 0x8F, 0, 0x58); }
push(const AddressFrame & af,uint32_t imm)2537 	void push(const AddressFrame& af, uint32_t imm)
2538 	{
2539 		if (af.bit_ == 8) {
2540 			db(0x6A); db(imm);
2541 		} else if (af.bit_ == 16) {
2542 			db(0x66); db(0x68); dw(imm);
2543 		} else {
2544 			db(0x68); dd(imm);
2545 		}
2546 	}
2547 	/* use "push(word, 4)" if you want "push word 4" */
push(uint32_t imm)2548 	void push(uint32_t imm)
2549 	{
2550 		if (inner::IsInDisp8(imm)) {
2551 			push(byte, imm);
2552 		} else {
2553 			push(dword, imm);
2554 		}
2555 	}
mov(const Operand & reg1,const Operand & reg2)2556 	void mov(const Operand& reg1, const Operand& reg2)
2557 	{
2558 		const Reg *reg = 0;
2559 		const Address *addr = 0;
2560 		uint8_t code = 0;
2561 		if (reg1.isREG() && reg1.getIdx() == 0 && reg2.isMEM()) { // mov eax|ax|al, [disp]
2562 			reg = &reg1.getReg();
2563 			addr= &reg2.getAddress();
2564 			code = 0xA0;
2565 		} else
2566 		if (reg1.isMEM() && reg2.isREG() && reg2.getIdx() == 0) { // mov [disp], eax|ax|al
2567 			reg = &reg2.getReg();
2568 			addr= &reg1.getAddress();
2569 			code = 0xA2;
2570 		}
2571 #ifdef XBYAK64
2572 		if (addr && addr->is64bitDisp()) {
2573 			if (code) {
2574 				rex(*reg);
2575 				db(reg1.isREG(8) ? 0xA0 : reg1.isREG() ? 0xA1 : reg2.isREG(8) ? 0xA2 : 0xA3);
2576 				db(addr->getDisp(), 8);
2577 			} else {
2578 				XBYAK_THROW(ERR_BAD_COMBINATION)
2579 			}
2580 		} else
2581 #else
2582 		if (code && addr->isOnlyDisp()) {
2583 			rex(*reg, *addr);
2584 			db(code | (reg->isBit(8) ? 0 : 1));
2585 			dd(static_cast<uint32_t>(addr->getDisp()));
2586 		} else
2587 #endif
2588 		{
2589 			opRM_RM(reg1, reg2, 0x88);
2590 		}
2591 	}
mov(const Operand & op,uint64_t imm)2592 	void mov(const Operand& op, uint64_t imm)
2593 	{
2594 		if (op.isREG()) {
2595 			const int size = mov_imm(op.getReg(), imm);
2596 			db(imm, size);
2597 		} else if (op.isMEM()) {
2598 			verifyMemHasSize(op);
2599 			int immSize = op.getBit() / 8;
2600 			if (immSize <= 4) {
2601 				int64_t s = int64_t(imm) >> (immSize * 8);
2602 				if (s != 0 && s != -1) XBYAK_THROW(ERR_IMM_IS_TOO_BIG)
2603 			} else {
2604 				if (!inner::IsInInt32(imm)) XBYAK_THROW(ERR_IMM_IS_TOO_BIG)
2605 				immSize = 4;
2606 			}
2607 			opModM(op.getAddress(), Reg(0, Operand::REG, op.getBit()), 0xC6, NONE, NONE, immSize);
2608 			db(static_cast<uint32_t>(imm), immSize);
2609 		} else {
2610 			XBYAK_THROW(ERR_BAD_COMBINATION)
2611 		}
2612 	}
2613 
2614 	// The template is used to avoid ambiguity when the 2nd argument is 0.
2615 	// When the 2nd argument is 0 the call goes to
2616 	// `void mov(const Operand& op, uint64_t imm)`.
2617 	template <typename T1, typename T2>
mov(const T1 &,const T2 *)2618 	void mov(const T1&, const T2 *) { T1::unexpected; }
mov(const NativeReg & reg,const Label & label)2619 	void mov(const NativeReg& reg, const Label& label)
2620 	{
2621 		mov_imm(reg, dummyAddr);
2622 		putL(label);
2623 	}
xchg(const Operand & op1,const Operand & op2)2624 	void xchg(const Operand& op1, const Operand& op2)
2625 	{
2626 		const Operand *p1 = &op1, *p2 = &op2;
2627 		if (p1->isMEM() || (p2->isREG(16 | i32e) && p2->getIdx() == 0)) {
2628 			p1 = &op2; p2 = &op1;
2629 		}
2630 		if (p1->isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION)
2631 		if (p2->isREG() && (p1->isREG(16 | i32e) && p1->getIdx() == 0)
2632 #ifdef XBYAK64
2633 			&& (p2->getIdx() != 0 || !p1->isREG(32))
2634 #endif
2635 		) {
2636 			rex(*p2, *p1); db(0x90 | (p2->getIdx() & 7));
2637 			return;
2638 		}
2639 		opModRM(*p1, *p2, (p1->isREG() && p2->isREG() && (p1->getBit() == p2->getBit())), p2->isMEM(), 0x86 | (p1->isBit(8) ? 0 : 1));
2640 	}
2641 
2642 #ifndef XBYAK_DISABLE_SEGMENT
push(const Segment & seg)2643 	void push(const Segment& seg)
2644 	{
2645 		switch (seg.getIdx()) {
2646 		case Segment::es: db(0x06); break;
2647 		case Segment::cs: db(0x0E); break;
2648 		case Segment::ss: db(0x16); break;
2649 		case Segment::ds: db(0x1E); break;
2650 		case Segment::fs: db(0x0F); db(0xA0); break;
2651 		case Segment::gs: db(0x0F); db(0xA8); break;
2652 		default:
2653 			assert(0);
2654 		}
2655 	}
pop(const Segment & seg)2656 	void pop(const Segment& seg)
2657 	{
2658 		switch (seg.getIdx()) {
2659 		case Segment::es: db(0x07); break;
2660 		case Segment::cs: XBYAK_THROW(ERR_BAD_COMBINATION)
2661 		case Segment::ss: db(0x17); break;
2662 		case Segment::ds: db(0x1F); break;
2663 		case Segment::fs: db(0x0F); db(0xA1); break;
2664 		case Segment::gs: db(0x0F); db(0xA9); break;
2665 		default:
2666 			assert(0);
2667 		}
2668 	}
putSeg(const Segment & seg)2669 	void putSeg(const Segment& seg)
2670 	{
2671 		switch (seg.getIdx()) {
2672 		case Segment::es: db(0x2E); break;
2673 		case Segment::cs: db(0x36); break;
2674 		case Segment::ss: db(0x3E); break;
2675 		case Segment::ds: db(0x26); break;
2676 		case Segment::fs: db(0x64); break;
2677 		case Segment::gs: db(0x65); break;
2678 		default:
2679 			assert(0);
2680 		}
2681 	}
mov(const Operand & op,const Segment & seg)2682 	void mov(const Operand& op, const Segment& seg)
2683 	{
2684 		opModRM(Reg8(seg.getIdx()), op, op.isREG(16|i32e), op.isMEM(), 0x8C);
2685 	}
mov(const Segment & seg,const Operand & op)2686 	void mov(const Segment& seg, const Operand& op)
2687 	{
2688 		opModRM(Reg8(seg.getIdx()), op.isREG(16|i32e) ? static_cast<const Operand&>(op.getReg().cvt32()) : op, op.isREG(16|i32e), op.isMEM(), 0x8E);
2689 	}
2690 #endif
2691 
2692 	enum { NONE = 256 };
2693 	// constructor
2694 	CodeGenerator(size_t maxSize = DEFAULT_MAX_CODE_SIZE, void *userPtr = 0, Allocator *allocator = 0)
CodeArray(maxSize,userPtr,allocator)2695 		: CodeArray(maxSize, userPtr, allocator)
2696 		, mm0(0), mm1(1), mm2(2), mm3(3), mm4(4), mm5(5), mm6(6), mm7(7)
2697 		, xmm0(0), xmm1(1), xmm2(2), xmm3(3), xmm4(4), xmm5(5), xmm6(6), xmm7(7)
2698 		, ymm0(0), ymm1(1), ymm2(2), ymm3(3), ymm4(4), ymm5(5), ymm6(6), ymm7(7)
2699 		, zmm0(0), zmm1(1), zmm2(2), zmm3(3), zmm4(4), zmm5(5), zmm6(6), zmm7(7)
2700 		// for my convenience
2701 		, xm0(xmm0), xm1(xmm1), xm2(xmm2), xm3(xmm3), xm4(xmm4), xm5(xmm5), xm6(xmm6), xm7(xmm7)
2702 		, ym0(ymm0), ym1(ymm1), ym2(ymm2), ym3(ymm3), ym4(ymm4), ym5(ymm5), ym6(ymm6), ym7(ymm7)
2703 		, zm0(zmm0), zm1(zmm1), zm2(zmm2), zm3(zmm3), zm4(zmm4), zm5(zmm5), zm6(zmm6), zm7(zmm7)
2704 
2705 		, eax(Operand::EAX), ecx(Operand::ECX), edx(Operand::EDX), ebx(Operand::EBX), esp(Operand::ESP), ebp(Operand::EBP), esi(Operand::ESI), edi(Operand::EDI)
2706 		, ax(Operand::AX), cx(Operand::CX), dx(Operand::DX), bx(Operand::BX), sp(Operand::SP), bp(Operand::BP), si(Operand::SI), di(Operand::DI)
2707 		, al(Operand::AL), cl(Operand::CL), dl(Operand::DL), bl(Operand::BL), ah(Operand::AH), ch(Operand::CH), dh(Operand::DH), bh(Operand::BH)
2708 		, ptr(0), byte(8), word(16), dword(32), qword(64), xword(128), yword(256), zword(512)
2709 		, ptr_b(0, true), xword_b(128, true), yword_b(256, true), zword_b(512, true)
2710 		, st0(0), st1(1), st2(2), st3(3), st4(4), st5(5), st6(6), st7(7)
2711 		, k0(0), k1(1), k2(2), k3(3), k4(4), k5(5), k6(6), k7(7)
2712 		, bnd0(0), bnd1(1), bnd2(2), bnd3(3)
2713 		, T_sae(EvexModifierRounding::T_SAE), T_rn_sae(EvexModifierRounding::T_RN_SAE), T_rd_sae(EvexModifierRounding::T_RD_SAE), T_ru_sae(EvexModifierRounding::T_RU_SAE), T_rz_sae(EvexModifierRounding::T_RZ_SAE)
2714 		, T_z()
2715 #ifdef XBYAK64
2716 		, rax(Operand::RAX), rcx(Operand::RCX), rdx(Operand::RDX), rbx(Operand::RBX), rsp(Operand::RSP), rbp(Operand::RBP), rsi(Operand::RSI), rdi(Operand::RDI), r8(Operand::R8), r9(Operand::R9), r10(Operand::R10), r11(Operand::R11), r12(Operand::R12), r13(Operand::R13), r14(Operand::R14), r15(Operand::R15)
2717 		, r8d(8), r9d(9), r10d(10), r11d(11), r12d(12), r13d(13), r14d(14), r15d(15)
2718 		, r8w(8), r9w(9), r10w(10), r11w(11), r12w(12), r13w(13), r14w(14), r15w(15)
2719 		, r8b(8), r9b(9), r10b(10), r11b(11), r12b(12), r13b(13), r14b(14), r15b(15)
2720 		, spl(Operand::SPL, true), bpl(Operand::BPL, true), sil(Operand::SIL, true), dil(Operand::DIL, true)
2721 		, xmm8(8), xmm9(9), xmm10(10), xmm11(11), xmm12(12), xmm13(13), xmm14(14), xmm15(15)
2722 		, xmm16(16), xmm17(17), xmm18(18), xmm19(19), xmm20(20), xmm21(21), xmm22(22), xmm23(23)
2723 		, xmm24(24), xmm25(25), xmm26(26), xmm27(27), xmm28(28), xmm29(29), xmm30(30), xmm31(31)
2724 		, ymm8(8), ymm9(9), ymm10(10), ymm11(11), ymm12(12), ymm13(13), ymm14(14), ymm15(15)
2725 		, ymm16(16), ymm17(17), ymm18(18), ymm19(19), ymm20(20), ymm21(21), ymm22(22), ymm23(23)
2726 		, ymm24(24), ymm25(25), ymm26(26), ymm27(27), ymm28(28), ymm29(29), ymm30(30), ymm31(31)
2727 		, zmm8(8), zmm9(9), zmm10(10), zmm11(11), zmm12(12), zmm13(13), zmm14(14), zmm15(15)
2728 		, zmm16(16), zmm17(17), zmm18(18), zmm19(19), zmm20(20), zmm21(21), zmm22(22), zmm23(23)
2729 		, zmm24(24), zmm25(25), zmm26(26), zmm27(27), zmm28(28), zmm29(29), zmm30(30), zmm31(31)
2730 		, tmm0(0), tmm1(1), tmm2(2), tmm3(3), tmm4(4), tmm5(5), tmm6(6), tmm7(7)
2731 		// for my convenience
2732 		, xm8(xmm8), xm9(xmm9), xm10(xmm10), xm11(xmm11), xm12(xmm12), xm13(xmm13), xm14(xmm14), xm15(xmm15)
2733 		, xm16(xmm16), xm17(xmm17), xm18(xmm18), xm19(xmm19), xm20(xmm20), xm21(xmm21), xm22(xmm22), xm23(xmm23)
2734 		, xm24(xmm24), xm25(xmm25), xm26(xmm26), xm27(xmm27), xm28(xmm28), xm29(xmm29), xm30(xmm30), xm31(xmm31)
2735 		, ym8(ymm8), ym9(ymm9), ym10(ymm10), ym11(ymm11), ym12(ymm12), ym13(ymm13), ym14(ymm14), ym15(ymm15)
2736 		, ym16(ymm16), ym17(ymm17), ym18(ymm18), ym19(ymm19), ym20(ymm20), ym21(ymm21), ym22(ymm22), ym23(ymm23)
2737 		, ym24(ymm24), ym25(ymm25), ym26(ymm26), ym27(ymm27), ym28(ymm28), ym29(ymm29), ym30(ymm30), ym31(ymm31)
2738 		, zm8(zmm8), zm9(zmm9), zm10(zmm10), zm11(zmm11), zm12(zmm12), zm13(zmm13), zm14(zmm14), zm15(zmm15)
2739 		, zm16(zmm16), zm17(zmm17), zm18(zmm18), zm19(zmm19), zm20(zmm20), zm21(zmm21), zm22(zmm22), zm23(zmm23)
2740 		, zm24(zmm24), zm25(zmm25), zm26(zmm26), zm27(zmm27), zm28(zmm28), zm29(zmm29), zm30(zmm30), zm31(zmm31)
2741 		, rip()
2742 #endif
2743 #ifndef XBYAK_DISABLE_SEGMENT
2744 		, es(Segment::es), cs(Segment::cs), ss(Segment::ss), ds(Segment::ds), fs(Segment::fs), gs(Segment::gs)
2745 #endif
2746 		, isDefaultJmpNEAR_(false)
2747 	{
2748 		labelMgr_.set(this);
2749 	}
reset()2750 	void reset()
2751 	{
2752 		resetSize();
2753 		labelMgr_.reset();
2754 		labelMgr_.set(this);
2755 	}
hasUndefinedLabel()2756 	bool hasUndefinedLabel() const { return labelMgr_.hasUndefSlabel() || labelMgr_.hasUndefClabel(); }
2757 	/*
2758 		MUST call ready() to complete generating code if you use AutoGrow mode.
2759 		It is not necessary for the other mode if hasUndefinedLabel() is true.
2760 	*/
2761 	void ready(ProtectMode mode = PROTECT_RWE)
2762 	{
2763 		if (hasUndefinedLabel()) XBYAK_THROW(ERR_LABEL_IS_NOT_FOUND)
2764 		if (isAutoGrow()) {
2765 			calcJmpAddress();
2766 			if (useProtect()) setProtectMode(mode);
2767 		}
2768 	}
2769 	// set read/exec
readyRE()2770 	void readyRE() { return ready(PROTECT_RE); }
2771 #ifdef XBYAK_TEST
2772 	void dump(bool doClear = true)
2773 	{
2774 		CodeArray::dump();
2775 		if (doClear) size_ = 0;
2776 	}
2777 #endif
2778 
2779 #ifdef XBYAK_UNDEF_JNL
2780 	#undef jnl
2781 #endif
2782 
2783 	/*
2784 		use single byte nop if useMultiByteNop = false
2785 	*/
2786 	void nop(size_t size = 1, bool useMultiByteNop = true)
2787 	{
2788 		if (!useMultiByteNop) {
2789 			for (size_t i = 0; i < size; i++) {
2790 				db(0x90);
2791 			}
2792 			return;
2793 		}
2794 		/*
2795 			Intel Architectures Software Developer's Manual Volume 2
2796 			recommended multi-byte sequence of NOP instruction
2797 			AMD and Intel seem to agree on the same sequences for up to 9 bytes:
2798 			https://support.amd.com/TechDocs/55723_SOG_Fam_17h_Processors_3.00.pdf
2799 		*/
2800 		static const uint8_t nopTbl[9][9] = {
2801 			{0x90},
2802 			{0x66, 0x90},
2803 			{0x0F, 0x1F, 0x00},
2804 			{0x0F, 0x1F, 0x40, 0x00},
2805 			{0x0F, 0x1F, 0x44, 0x00, 0x00},
2806 			{0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00},
2807 			{0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00},
2808 			{0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
2809 			{0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
2810 		};
2811 		const size_t n = sizeof(nopTbl) / sizeof(nopTbl[0]);
2812 		while (size > 0) {
2813 			size_t len = (std::min)(n, size);
2814 			const uint8_t *seq = nopTbl[len - 1];
2815 			db(seq, len);
2816 			size -= len;
2817 		}
2818 	}
2819 
2820 #ifndef XBYAK_DONT_READ_LIST
2821 #include "xbyak_mnemonic.h"
2822 	/*
2823 		use single byte nop if useMultiByteNop = false
2824 	*/
2825 	void align(size_t x = 16, bool useMultiByteNop = true)
2826 	{
2827 		if (x == 1) return;
2828 		if (x < 1 || (x & (x - 1))) XBYAK_THROW(ERR_BAD_ALIGN)
2829 		if (isAutoGrow() && x > inner::ALIGN_PAGE_SIZE) fprintf(stderr, "warning:autoGrow mode does not support %d align\n", (int)x);
2830 		size_t remain = size_t(getCurr()) % x;
2831 		if (remain) {
2832 			nop(x - remain, useMultiByteNop);
2833 		}
2834 	}
2835 #endif
2836 };
2837 
2838 template <>
mov(const NativeReg & reg,const char * label)2839 inline void CodeGenerator::mov(const NativeReg& reg, const char *label) // can't use std::string
2840 {
2841 	assert(label);
2842 	mov_imm(reg, dummyAddr);
2843 	putL(label);
2844 }
2845 
2846 namespace util {
2847 static const XBYAK_CONSTEXPR Mmx mm0(0), mm1(1), mm2(2), mm3(3), mm4(4), mm5(5), mm6(6), mm7(7);
2848 static const XBYAK_CONSTEXPR Xmm xmm0(0), xmm1(1), xmm2(2), xmm3(3), xmm4(4), xmm5(5), xmm6(6), xmm7(7);
2849 static const XBYAK_CONSTEXPR Ymm ymm0(0), ymm1(1), ymm2(2), ymm3(3), ymm4(4), ymm5(5), ymm6(6), ymm7(7);
2850 static const XBYAK_CONSTEXPR Zmm zmm0(0), zmm1(1), zmm2(2), zmm3(3), zmm4(4), zmm5(5), zmm6(6), zmm7(7);
2851 static const XBYAK_CONSTEXPR Reg32 eax(Operand::EAX), ecx(Operand::ECX), edx(Operand::EDX), ebx(Operand::EBX), esp(Operand::ESP), ebp(Operand::EBP), esi(Operand::ESI), edi(Operand::EDI);
2852 static const XBYAK_CONSTEXPR Reg16 ax(Operand::AX), cx(Operand::CX), dx(Operand::DX), bx(Operand::BX), sp(Operand::SP), bp(Operand::BP), si(Operand::SI), di(Operand::DI);
2853 static const XBYAK_CONSTEXPR Reg8 al(Operand::AL), cl(Operand::CL), dl(Operand::DL), bl(Operand::BL), ah(Operand::AH), ch(Operand::CH), dh(Operand::DH), bh(Operand::BH);
2854 static const XBYAK_CONSTEXPR AddressFrame ptr(0), byte(8), word(16), dword(32), qword(64), xword(128), yword(256), zword(512);
2855 static const XBYAK_CONSTEXPR AddressFrame ptr_b(0, true), xword_b(128, true), yword_b(256, true), zword_b(512, true);
2856 static const XBYAK_CONSTEXPR Fpu st0(0), st1(1), st2(2), st3(3), st4(4), st5(5), st6(6), st7(7);
2857 static const XBYAK_CONSTEXPR Opmask k0(0), k1(1), k2(2), k3(3), k4(4), k5(5), k6(6), k7(7);
2858 static const XBYAK_CONSTEXPR BoundsReg bnd0(0), bnd1(1), bnd2(2), bnd3(3);
2859 static const XBYAK_CONSTEXPR EvexModifierRounding T_sae(EvexModifierRounding::T_SAE), T_rn_sae(EvexModifierRounding::T_RN_SAE), T_rd_sae(EvexModifierRounding::T_RD_SAE), T_ru_sae(EvexModifierRounding::T_RU_SAE), T_rz_sae(EvexModifierRounding::T_RZ_SAE);
2860 static const XBYAK_CONSTEXPR EvexModifierZero T_z;
2861 #ifdef XBYAK64
2862 static const XBYAK_CONSTEXPR Reg64 rax(Operand::RAX), rcx(Operand::RCX), rdx(Operand::RDX), rbx(Operand::RBX), rsp(Operand::RSP), rbp(Operand::RBP), rsi(Operand::RSI), rdi(Operand::RDI), r8(Operand::R8), r9(Operand::R9), r10(Operand::R10), r11(Operand::R11), r12(Operand::R12), r13(Operand::R13), r14(Operand::R14), r15(Operand::R15);
2863 static const XBYAK_CONSTEXPR Reg32 r8d(8), r9d(9), r10d(10), r11d(11), r12d(12), r13d(13), r14d(14), r15d(15);
2864 static const XBYAK_CONSTEXPR Reg16 r8w(8), r9w(9), r10w(10), r11w(11), r12w(12), r13w(13), r14w(14), r15w(15);
2865 static const XBYAK_CONSTEXPR Reg8 r8b(8), r9b(9), r10b(10), r11b(11), r12b(12), r13b(13), r14b(14), r15b(15), spl(Operand::SPL, true), bpl(Operand::BPL, true), sil(Operand::SIL, true), dil(Operand::DIL, true);
2866 static const XBYAK_CONSTEXPR Xmm xmm8(8), xmm9(9), xmm10(10), xmm11(11), xmm12(12), xmm13(13), xmm14(14), xmm15(15);
2867 static const XBYAK_CONSTEXPR Xmm xmm16(16), xmm17(17), xmm18(18), xmm19(19), xmm20(20), xmm21(21), xmm22(22), xmm23(23);
2868 static const XBYAK_CONSTEXPR Xmm xmm24(24), xmm25(25), xmm26(26), xmm27(27), xmm28(28), xmm29(29), xmm30(30), xmm31(31);
2869 static const XBYAK_CONSTEXPR Ymm ymm8(8), ymm9(9), ymm10(10), ymm11(11), ymm12(12), ymm13(13), ymm14(14), ymm15(15);
2870 static const XBYAK_CONSTEXPR Ymm ymm16(16), ymm17(17), ymm18(18), ymm19(19), ymm20(20), ymm21(21), ymm22(22), ymm23(23);
2871 static const XBYAK_CONSTEXPR Ymm ymm24(24), ymm25(25), ymm26(26), ymm27(27), ymm28(28), ymm29(29), ymm30(30), ymm31(31);
2872 static const XBYAK_CONSTEXPR Zmm zmm8(8), zmm9(9), zmm10(10), zmm11(11), zmm12(12), zmm13(13), zmm14(14), zmm15(15);
2873 static const XBYAK_CONSTEXPR Zmm zmm16(16), zmm17(17), zmm18(18), zmm19(19), zmm20(20), zmm21(21), zmm22(22), zmm23(23);
2874 static const XBYAK_CONSTEXPR Zmm zmm24(24), zmm25(25), zmm26(26), zmm27(27), zmm28(28), zmm29(29), zmm30(30), zmm31(31);
2875 static const XBYAK_CONSTEXPR Zmm tmm0(0), tmm1(1), tmm2(2), tmm3(3), tmm4(4), tmm5(5), tmm6(6), tmm7(7);
2876 static const XBYAK_CONSTEXPR RegRip rip;
2877 #endif
2878 #ifndef XBYAK_DISABLE_SEGMENT
2879 static const XBYAK_CONSTEXPR Segment es(Segment::es), cs(Segment::cs), ss(Segment::ss), ds(Segment::ds), fs(Segment::fs), gs(Segment::gs);
2880 #endif
2881 } // util
2882 
2883 #ifdef _MSC_VER
2884 	#pragma warning(pop)
2885 #endif
2886 
2887 } // end of namespace
2888 
2889 #endif // XBYAK_XBYAK_H_
2890