1 #define XBYAK_DONT_READ_LIST
2 #include <stdio.h>
3 #include <string.h>
4 #include "xbyak/xbyak.h"
5 #define NUM_OF_ARRAY(x) (sizeof(x) / sizeof(x[0]))
6
7 using namespace Xbyak;
8 #ifdef _MSC_VER
9 #pragma warning(disable : 4996) // scanf
10 #define snprintf _snprintf_s
11 #endif
12
13 #include "avx_type.hpp"
14 /*
15 reg = cx/ecx/rcx
16 insert 0x67 if prefix is true
17 */
put_jREGz(const char * reg,bool prefix)18 void put_jREGz(const char *reg, bool prefix)
19 {
20 printf("void j%sz(std::string label) { %sopJmp(label, T_SHORT, 0xe3, 0, 0); }\n", reg, prefix ? "db(0x67); " : "");
21 printf("void j%sz(const Label& label) { %sopJmp(label, T_SHORT, 0xe3, 0, 0); }\n", reg, prefix ? "db(0x67); " : "");
22 }
23
24 struct GenericTbl {
25 const char *name;
26 uint8_t code1;
27 uint8_t code2;
28 uint8_t code3;
29 uint8_t code4;
30 };
31
putGeneric(const GenericTbl * p,size_t n)32 void putGeneric(const GenericTbl *p, size_t n)
33 {
34 for (size_t i = 0; i < n; i++) {
35 printf("void %s() { db(0x%02X); ", p->name, p->code1);
36 if (p->code2) printf("db(0x%02X); ", p->code2);
37 if (p->code3) printf("db(0x%02X); ", p->code3);
38 if (p->code4) printf("db(0x%02X); ", p->code4);
39 printf("}\n");
40 p++;
41 }
42 }
43
putX_X_XM(bool omitOnly)44 void putX_X_XM(bool omitOnly)
45 {
46 // (x, x, x/m[, imm]) or (y, y, y/m[, imm])
47 {
48 const struct Tbl {
49 uint8_t code;
50 const char *name;
51 int type;
52 bool hasIMM;
53 bool enableOmit;
54 int mode; // 1 : sse, 2 : avx, 3 : sse + avx
55 } tbl[] = {
56 { 0x0D, "blendpd", T_0F3A | T_66 | T_W0 | T_YMM, true, true, 3 },
57 { 0x0C, "blendps", T_0F3A | T_66 | T_W0 | T_YMM, true, true, 3 },
58 { 0x41, "dppd", T_0F3A | T_66 | T_W0, true, true, 3 },
59 { 0x40, "dpps", T_0F3A | T_66 | T_W0 | T_YMM, true, true, 3 },
60 { 0x42, "mpsadbw", T_0F3A | T_66 | T_W0 | T_YMM, true, true, 3 },
61 { 0x0E, "pblendw", T_0F3A | T_66 | T_W0 | T_YMM, true, true, 3 },
62 { 0x02, "pblendd", T_0F3A | T_66 | T_W0 | T_YMM, true, true, 2 },
63 { 0x0B, "roundsd", T_0F3A | T_66 | T_W0, true, true, 3 },
64 { 0x0A, "roundss", T_0F3A | T_66 | T_W0, true, true, 3 },
65 { 0x44, "pclmulqdq", T_0F3A | T_66 | T_W0 | T_YMM | T_EVEX, true, true, 3 },
66 { 0x0C, "permilps", T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_EW0 | T_B32, false, false, 2 },
67 { 0x0D, "permilpd", T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_EW1 | T_B64, false, false, 2 },
68
69 { 0x47, "psllvd", T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_EW0 | T_B32, false, false, 2 },
70 { 0x47, "psllvq", T_0F38 | T_66 | T_W1 | T_YMM | T_EVEX | T_EW1 | T_B64, false, false, 2 },
71 { 0x46, "psravd", T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_EW0 | T_B32, false, false, 2 },
72 { 0x45, "psrlvd", T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_EW0 | T_B32, false, false, 2 },
73 { 0x45, "psrlvq", T_0F38 | T_66 | T_W1 | T_YMM | T_EVEX | T_EW1 | T_B64, false, false, 2 },
74
75 { 0xC2, "cmppd", T_0F | T_66 | T_YMM, true, true, 2 },
76 { 0xC2, "cmpps", T_0F | T_YMM, true, true, 2 },
77 { 0xC2, "cmpsd", T_0F | T_F2, true, true, 2 },
78 { 0xC2, "cmpss", T_0F | T_F3, true, true, 2 },
79 { 0x5A, "cvtsd2ss", T_0F | T_F2 | T_EVEX | T_EW1 | T_N8 | T_ER_X, false, true, 2 },
80 { 0x5A, "cvtss2sd", T_0F | T_F3 | T_EVEX | T_EW0 | T_N4 | T_SAE_X, false, true, 2 },
81 { 0x21, "insertps", T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N4, true, true, 2 },
82 { 0x63, "packsswb", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 },
83 { 0x6B, "packssdw", T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false, true, 2 },
84 { 0x67, "packuswb", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 },
85 { 0x2B, "packusdw", T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false, true, 3 },
86
87 { 0xFC, "paddb", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 },
88 { 0xFD, "paddw", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 },
89 { 0xFE, "paddd", T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false, true, 2 },
90 { 0xD4, "paddq", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64, false, true, 2 },
91
92 { 0xEC, "paddsb", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 },
93 { 0xED, "paddsw", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 },
94
95 { 0xDC, "paddusb", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 },
96 { 0xDD, "paddusw", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 },
97
98 { 0x0F, "palignr", T_0F3A | T_66 | T_YMM | T_EVEX, true, true, 2 },
99
100 { 0xDB, "pand", T_0F | T_66 | T_YMM, false, true, 2 },
101 { 0xDF, "pandn", T_0F | T_66 | T_YMM, false, true, 2 },
102
103 { 0xE0, "pavgb", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 },
104 { 0xE3, "pavgw", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 },
105
106 { 0x74, "pcmpeqb", T_0F | T_66 | T_YMM, false, true, 2 },
107 { 0x75, "pcmpeqw", T_0F | T_66 | T_YMM, false, true, 2 },
108 { 0x76, "pcmpeqd", T_0F | T_66 | T_YMM, false, true, 2 },
109 { 0x29, "pcmpeqq", T_0F38 | T_66 | T_YMM, false, true, 3 },
110
111 { 0x64, "pcmpgtb", T_0F | T_66 | T_YMM, false, true, 2 },
112 { 0x65, "pcmpgtw", T_0F | T_66 | T_YMM, false, true, 2 },
113 { 0x66, "pcmpgtd", T_0F | T_66 | T_YMM, false, true, 2 },
114 { 0x37, "pcmpgtq", T_0F38 | T_66 | T_YMM, false, true, 3 },
115
116 { 0x01, "phaddw", T_0F38 | T_66 | T_YMM, false, true, 2 },
117 { 0x02, "phaddd", T_0F38 | T_66 | T_YMM, false, true, 2 },
118 { 0x03, "phaddsw", T_0F38 | T_66 | T_YMM, false, true, 2 },
119
120 { 0x05, "phsubw", T_0F38 | T_66 | T_YMM, false, true, 2 },
121 { 0x06, "phsubd", T_0F38 | T_66 | T_YMM, false, true, 2 },
122 { 0x07, "phsubsw", T_0F38 | T_66 | T_YMM, false, true, 2 },
123 { 0xF5, "pmaddwd", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 },
124 { 0x04, "pmaddubsw", T_0F38 | T_66 | T_YMM | T_EVEX, false, true, 2 },
125
126 { 0x3C, "pmaxsb", T_0F38 | T_66 | T_YMM | T_EVEX, false, true, 3 },
127 { 0xEE, "pmaxsw", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 },
128 { 0x3D, "pmaxsd", T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false, true, 3 },
129
130 { 0xDE, "pmaxub", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 },
131 { 0x3E, "pmaxuw", T_0F38 | T_66 | T_YMM | T_EVEX, false, true, 3 },
132 { 0x3F, "pmaxud", T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false, true, 3 },
133
134 { 0x38, "pminsb", T_0F38 | T_66 | T_YMM | T_EVEX, false, true, 3 },
135 { 0xEA, "pminsw", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 },
136 { 0x39, "pminsd", T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false, true, 3 },
137
138 { 0xDA, "pminub", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 },
139 { 0x3A, "pminuw", T_0F38 | T_66 | T_YMM | T_EVEX, false, true, 3 },
140 { 0x3B, "pminud", T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false, true, 3 },
141
142 { 0xE4, "pmulhuw", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 },
143 { 0x0B, "pmulhrsw", T_0F38 | T_66 | T_YMM | T_EVEX, false, true, 2 },
144 { 0xE5, "pmulhw", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 },
145 { 0xD5, "pmullw", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 },
146 { 0x40, "pmulld", T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false, true, 3 },
147
148 { 0xF4, "pmuludq", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64, false, true, 2 },
149 { 0x28, "pmuldq", T_0F38 | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64, false, true, 3 },
150
151 { 0xEB, "por", T_0F | T_66 | T_YMM, false, true, 2 },
152 { 0xF6, "psadbw", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 },
153
154 { 0x00, "pshufb", T_0F38 | T_66 | T_YMM | T_EVEX, false, false, 2 },
155
156 { 0x08, "psignb", T_0F38 | T_66 | T_YMM, false, true, 2 },
157 { 0x09, "psignw", T_0F38 | T_66 | T_YMM, false, true, 2 },
158 { 0x0A, "psignd", T_0F38 | T_66 | T_YMM, false, true, 2 },
159
160 { 0xF1, "psllw", T_0F | T_66 | T_YMM | T_EVEX | T_N16, false, true, 2 },
161 { 0xF2, "pslld", T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_N16, false, true, 2 },
162 { 0xF3, "psllq", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_N16, false, true, 2 },
163
164 { 0xE1, "psraw", T_0F | T_66 | T_YMM | T_EVEX | T_N16, false, true, 2 },
165 { 0xE2, "psrad", T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_N16, false, true, 2 },
166 { 0xD1, "psrlw", T_0F | T_66 | T_YMM | T_EVEX | T_N16, false, true, 2 },
167 { 0xD2, "psrld", T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_N16, false, true, 2 },
168 { 0xD3, "psrlq", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_N16, false, true, 2 },
169
170 { 0xF8, "psubb", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 },
171 { 0xF9, "psubw", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 },
172 { 0xFA, "psubd", T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false, true, 2 },
173 { 0xFB, "psubq", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64, false, true, 2 },
174
175 { 0xE8, "psubsb", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 },
176 { 0xE9, "psubsw", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 },
177
178 { 0xD8, "psubusb", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 },
179 { 0xD9, "psubusw", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 },
180
181 { 0x68, "punpckhbw", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 },
182 { 0x69, "punpckhwd", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 },
183 { 0x6A, "punpckhdq", T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false, true, 2 },
184 { 0x6D, "punpckhqdq", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64, false, true, 2 },
185
186 { 0x60, "punpcklbw", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 },
187 { 0x61, "punpcklwd", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 },
188 { 0x62, "punpckldq", T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false, true, 2 },
189 { 0x6C, "punpcklqdq", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64, false, true, 2 },
190
191 { 0xEF, "pxor", T_0F | T_66 | T_YMM, false, true, 2 },
192
193 { 0x53, "rcpss", T_0F | T_F3, false, true, 2 },
194 { 0x52, "rsqrtss", T_0F | T_F3, false, true, 2 },
195
196 { 0xC6, "shufpd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64, true, true, 2 },
197 { 0xC6, "shufps", T_0F | T_YMM | T_EVEX | T_EW0 | T_B32, true, true, 2 },
198
199 { 0x51, "sqrtsd", T_0F | T_F2 | T_EVEX | T_EW1 | T_ER_X | T_N8, false, true, 2 },
200 { 0x51, "sqrtss", T_0F | T_F3 | T_EVEX | T_EW0 | T_ER_X | T_N4, false, true, 2 },
201
202 { 0x15, "unpckhpd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64, false, true, 2 },
203 { 0x15, "unpckhps", T_0F | T_YMM | T_EVEX | T_EW0 | T_B32, false, true, 2 },
204
205 { 0x14, "unpcklpd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64, false, true, 2 },
206 { 0x14, "unpcklps", T_0F | T_YMM | T_EVEX | T_EW0 | T_B32, false, true, 2 },
207
208 { 0xCF, "gf2p8affineinvqb", T_66 | T_0F3A | T_W1 | T_EVEX | T_YMM | T_EW1 | T_SAE_Z | T_B64, true, false, 3 },
209 { 0xCE, "gf2p8affineqb", T_66 | T_0F3A | T_W1 | T_EVEX | T_YMM | T_EW1 | T_SAE_Z | T_B64, true, false, 3 },
210 { 0xCF, "gf2p8mulb", T_66 | T_0F38 | T_W0 | T_EVEX | T_YMM | T_EW0 | T_SAE_Z, false, false, 3 },
211 };
212 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
213 const Tbl *p = &tbl[i];
214 std::string type = type2String(p->type);
215 if (omitOnly) {
216 if (p->enableOmit) {
217 printf("void v%s(const Xmm& x, const Operand& op%s) { v%s(x, x, op%s); }\n", p->name, p->hasIMM ? ", uint8_t imm" : "", p->name, p->hasIMM ? ", imm" : "");
218 }
219 } else {
220 if (p->mode & 1) {
221 if (p->hasIMM) {
222 printf("void %s(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x%02X, 0x66, isXMM_XMMorMEM, static_cast<uint8_t>(imm), 0x3A); }\n", p->name, p->code);
223 } else {
224 printf("void %s(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x%02X, 0x66, isXMM_XMMorMEM, NONE, 0x38); }\n", p->name, p->code);
225 }
226 }
227 if (p->mode & 2) {
228 printf("void v%s(const Xmm& x1, const Xmm& x2, const Operand& op%s) { opAVX_X_X_XM(x1, x2, op, %s, 0x%02X%s); }\n"
229 , p->name, p->hasIMM ? ", uint8_t imm" : "", type.c_str(), p->code, p->hasIMM ? ", imm" : "");
230 }
231 }
232 }
233 }
234 }
235
putMemOp(const char * name,uint8_t prefix,uint8_t ext,uint8_t code1,int code2,int bit=32)236 void putMemOp(const char *name, uint8_t prefix, uint8_t ext, uint8_t code1, int code2, int bit = 32)
237 {
238 printf("void %s(const Address& addr) { ", name);
239 if (prefix) printf("db(0x%02X); ", prefix);
240 printf("opModM(addr, Reg%d(%d), 0x%02X, 0x%02X); }\n", bit, ext, code1, code2);
241 }
242
putLoadSeg(const char * name,uint8_t code1,int code2=NONE)243 void putLoadSeg(const char *name, uint8_t code1, int code2 = NONE)
244 {
245 printf("void %s(const Reg& reg, const Address& addr) { opLoadSeg(addr, reg, 0x%02X, 0x%02X); }\n", name, code1, code2);
246 }
247
put()248 void put()
249 {
250 const int NO = CodeGenerator::NONE;
251 {
252 char buf[16];
253 unsigned int v = VERSION;
254 if (v & 0xF) {
255 snprintf(buf, sizeof(buf), "%d.%02X%x", v >> 12, (v >> 4) & 0xFF, v & 0xF);
256 } else {
257 snprintf(buf, sizeof(buf), "%d.%02X", v >> 12, (v >> 4) & 0xFF);
258 }
259 printf("const char *getVersionString() const { return \"%s\"; }\n", buf);
260 }
261 const int B = 1 << 0;
262 const int W = 1 << 1;
263 const int D = 1 << 2;
264 const int Q = 1 << 3;
265 {
266 const struct Tbl {
267 uint8_t code;
268 const char *name;
269 } tbl[] = {
270 // MMX
271 { 0x6B, "packssdw" },
272 { 0x63, "packsswb" },
273 { 0x67, "packuswb" },
274
275 { 0xDB, "pand" },
276 { 0xDF, "pandn" },
277
278 { 0xF5, "pmaddwd" },
279 { 0xE4, "pmulhuw" },
280 { 0xE5, "pmulhw" },
281 { 0xD5, "pmullw" },
282
283 { 0xEB, "por" },
284
285 { 0x68, "punpckhbw" },
286 { 0x69, "punpckhwd" },
287 { 0x6A, "punpckhdq" },
288
289 { 0x60, "punpcklbw" },
290 { 0x61, "punpcklwd" },
291 { 0x62, "punpckldq" },
292
293 { 0xEF, "pxor" },
294
295 // MMX2
296 { 0xE0, "pavgb" },
297 { 0xE3, "pavgw" },
298 { 0xEE, "pmaxsw" },
299 { 0xDE, "pmaxub" },
300 { 0xEA, "pminsw" },
301 { 0xDA, "pminub" },
302 { 0xF6, "psadbw" },
303 //
304 { 0xD4, "paddq" },
305 { 0xF4, "pmuludq" },
306 { 0xFB, "psubq" },
307 };
308 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
309 const Tbl *p = &tbl[i];
310 printf("void %s(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x%02X); }\n"
311 , p->name, p->code);
312 }
313 }
314
315 {
316 const struct Tbl {
317 uint8_t code;
318 int mode;
319 const char *name;
320 } tbl[] = {
321 { 0xFC, B|W|D, "padd" },
322 { 0xEC, B|W , "padds" },
323 { 0xDC, B|W , "paddus" },
324 { 0x74, B|W|D, "pcmpeq" },
325 { 0x64, B|W|D, "pcmpgt" },
326 { 0xF0, W|D|Q, "psll" },
327 { 0xE0, W|D , "psra" },
328 { 0xD0, W|D|Q, "psrl" },
329 { 0xF8, B|W|D, "psub" },
330 { 0xE8, B|W , "psubs" },
331 { 0xD8, B|W , "psubus" },
332 };
333 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
334 const Tbl *p = &tbl[i];
335 static const char modTbl[][4] = {
336 "b", "w", "d", "q"
337 };
338 for (int j = 0; j < 4; j++) {
339 // B(0), W(1), D(2), Q(3)
340 if (!(p->mode & (1 << j))) continue;
341 printf("void %s%s(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x%02X); }\n"
342 , p->name, modTbl[j]
343 , p->code | j
344 );
345 }
346 }
347 }
348
349 {
350 const struct Tbl {
351 uint8_t code;
352 int ext;
353 int mode;
354 const char *name;
355 } tbl[] = {
356 { 0x70, 6, W|D|Q, "psll" },
357 { 0x70, 4, W|D , "psra" },
358 { 0x70, 2, W|D|Q, "psrl" },
359 };
360 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
361 const Tbl *p = &tbl[i];
362 static const char modTbl[][4] = {
363 "b", "w", "d", "q"
364 };
365 for (int j = 0; j < 4; j++) {
366 // B(0), W(1), D(2), Q(3)
367 if (!(p->mode & (1 << j))) continue;
368 printf("void %s%s(const Mmx& mmx, int imm8) { opMMX_IMM(mmx, imm8, 0x%02X, %d); }\n"
369 , p->name, modTbl[j]
370 , p->code | j
371 , p->ext
372 );
373 }
374 }
375 printf("void pslldq(const Xmm& xmm, int imm8) { opMMX_IMM(xmm, imm8, 0x%02X, %d); }\n", 0x73, 7);
376 printf("void psrldq(const Xmm& xmm, int imm8) { opMMX_IMM(xmm, imm8, 0x%02X, %d); }\n", 0x73, 3);
377 }
378
379 {
380 const struct Tbl {
381 uint8_t code;
382 uint8_t pref;
383 const char *name;
384 } tbl[] = {
385 { 0x70, 0, "pshufw" },
386 { 0x70, 0xF2, "pshuflw" },
387 { 0x70, 0xF3, "pshufhw" },
388 { 0x70, 0x66, "pshufd" },
389 };
390 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
391 const Tbl *p = &tbl[i];
392 printf("void %s(const Mmx& mmx, const Operand& op, uint8_t imm8) { opMMX(mmx, op, 0x%02X, 0x%02X, imm8); }\n", p->name, p->code, p->pref);
393 }
394 }
395 {
396 const struct MmxTbl6 {
397 uint8_t code; // for (reg, reg/[mem])
398 uint8_t code2; // for ([mem], reg)
399 int pref;
400 const char *name;
401 } mmxTbl6[] = {
402 { 0x6F, 0x7F, 0x66, "movdqa" },
403 { 0x6F, 0x7F, 0xF3, "movdqu" },
404 // SSE2
405 { 0x28, 0x29, NO, "movaps" },
406 { 0x10, 0x11, 0xF3, "movss" },
407 { 0x10, 0x11, NO, "movups" },
408 { 0x28, 0x29, 0x66, "movapd" },
409 { 0x10, 0x11, 0xF2, "movsd" },
410 { 0x10, 0x11, 0x66, "movupd" },
411 };
412 for (size_t i = 0; i < NUM_OF_ARRAY(mmxTbl6); i++) {
413 const MmxTbl6 *p = &mmxTbl6[i];
414 printf("void %s(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x%02X, 0x%02X); }\n", p->name, p->code, p->pref);
415 printf("void %s(const Address& addr, const Xmm& xmm) { ", p->name);
416 if (p->pref != NO) printf("db(0x%02X); ", p->pref);
417 printf("opModM(addr, xmm, 0x0F, 0x%02X); }\n", p->code2);
418 }
419 }
420 {
421 enum {
422 PS = 1 << 0,
423 SS = 1 << 1,
424 PD = 1 << 2,
425 SD = 1 << 3
426 };
427 const struct {
428 int code;
429 const char *name;
430 } sufTbl[] = {
431 { NO, "ps" },
432 { 0xF3, "ss" },
433 { 0x66, "pd" },
434 { 0xF2, "sd" },
435 };
436 const struct Tbl {
437 uint8_t code;
438 int mode;
439 const char *name;
440 bool hasImm;
441 } tbl[] = {
442 { 0x58, PS|SS|PD|SD, "add" },
443 { 0x55, PS|PD , "andn" },
444 { 0x54, PS|PD , "and" },
445 { 0xC2, PS|SS|PD|SD, "cmp", true },
446 { 0x5E, PS|SS|PD|SD, "div" },
447 { 0x5F, PS|SS|PD|SD, "max" },
448 { 0x5D, PS|SS|PD|SD, "min" },
449 { 0x59, PS|SS|PD|SD, "mul" },
450 { 0x56, PS|PD , "or" },
451 { 0x53, PS|SS , "rcp" },
452 { 0x52, PS|SS , "rsqrt" },
453 { 0xC6, PS|PD , "shuf", true },
454 { 0x51, PS|SS|PD|SD, "sqrt" },
455 { 0x5C, PS|SS|PD|SD, "sub" },
456 { 0x15, PS|PD , "unpckh" },
457 { 0x14, PS|PD , "unpckl" },
458 { 0x57, PS|PD , "xor" },
459 //
460 };
461 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
462 const Tbl *p = &tbl[i];
463 for (size_t j = 0; j < NUM_OF_ARRAY(sufTbl); j++) {
464 if (!(p->mode & (1 << j))) continue;
465 if (p->hasImm) {
466 // don't change uint8_t to int because NO is not in byte
467 printf("void %s%s(const Xmm& xmm, const Operand& op, uint8_t imm8) { opGen(xmm, op, 0x%2X, 0x%02X, isXMM_XMMorMEM, imm8); }\n", p->name, sufTbl[j].name, p->code, sufTbl[j].code);
468 } else {
469 printf("void %s%s(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x%2X, 0x%02X, isXMM_XMMorMEM); }\n", p->name, sufTbl[j].name, p->code, sufTbl[j].code);
470 }
471 }
472 }
473 }
474 {
475 // (XMM, XMM)
476 const struct Tbl {
477 uint8_t code;
478 uint8_t pref;
479 const char *name;
480 } tbl[] = {
481 { 0xF7, 0x66, "maskmovdqu" },
482 { 0x12, 0 , "movhlps" },
483 { 0x16, 0 , "movlhps" },
484 };
485 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
486 const Tbl *p = &tbl[i];
487 printf("void %s(const Xmm& reg1, const Xmm& reg2) { ", p->name);
488 if (p->pref) printf("db(0x%02X); ", p->pref);
489 printf(" opModR(reg1, reg2, 0x0F, 0x%02X); }\n", p->code);
490 }
491 }
492 {
493 // (XMM, XMM|MEM)
494 const struct Tbl {
495 uint8_t code;
496 int pref;
497 const char *name;
498 } tbl[] = {
499 { 0x6D, 0x66, "punpckhqdq" },
500 { 0x6C, 0x66, "punpcklqdq" },
501
502 { 0x2F, NO , "comiss" },
503 { 0x2E, NO , "ucomiss" },
504 { 0x2F, 0x66, "comisd" },
505 { 0x2E, 0x66, "ucomisd" },
506
507 { 0x5A, 0x66, "cvtpd2ps" },
508 { 0x5A, NO , "cvtps2pd" },
509 { 0x5A, 0xF2, "cvtsd2ss" },
510 { 0x5A, 0xF3, "cvtss2sd" },
511 { 0xE6, 0xF2, "cvtpd2dq" },
512 { 0xE6, 0x66, "cvttpd2dq" },
513 { 0xE6, 0xF3, "cvtdq2pd" },
514 { 0x5B, 0x66, "cvtps2dq" },
515 { 0x5B, 0xF3, "cvttps2dq" },
516 { 0x5B, NO , "cvtdq2ps" },
517 };
518 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
519 const Tbl *p = &tbl[i];
520 printf("void %s(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x%02X, 0x%02X, isXMM_XMMorMEM); }\n", p->name, p->code, p->pref);
521 }
522 }
523
524 {
525 // special type
526 const struct Tbl {
527 uint8_t code;
528 int pref;
529 const char *name;
530 const char *cond;
531 } tbl[] = {
532 { 0x2A, NO , "cvtpi2ps", "isXMM_MMXorMEM" },
533 { 0x2D, NO , "cvtps2pi", "isMMX_XMMorMEM" },
534 { 0x2A, 0xF3, "cvtsi2ss", "isXMM_REG32orMEM" },
535 { 0x2D, 0xF3, "cvtss2si", "isREG32_XMMorMEM" },
536 { 0x2C, NO , "cvttps2pi", "isMMX_XMMorMEM" },
537 { 0x2C, 0xF3, "cvttss2si", "isREG32_XMMorMEM" },
538 { 0x2A, 0x66, "cvtpi2pd", "isXMM_MMXorMEM" },
539 { 0x2D, 0x66, "cvtpd2pi", "isMMX_XMMorMEM" },
540 { 0x2A, 0xF2, "cvtsi2sd", "isXMM_REG32orMEM" },
541 { 0x2D, 0xF2, "cvtsd2si", "isREG32_XMMorMEM" },
542 { 0x2C, 0x66, "cvttpd2pi", "isMMX_XMMorMEM" },
543 { 0x2C, 0xF2, "cvttsd2si", "isREG32_XMMorMEM" },
544 };
545 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
546 const Tbl *p = &tbl[i];
547 printf("void %s(const Operand& reg, const Operand& op) { opGen(reg, op, 0x%02X, 0x%02X, %s); }\n", p->name, p->code, p->pref, p->cond);
548 }
549 }
550 {
551 // prefetch
552 const struct Tbl {
553 int ext;
554 const char *name;
555 int code;
556 } tbl[] = {
557 { 1, "t0", 0x18},
558 { 2, "t1", 0x18},
559 { 3, "t2", 0x18},
560 { 0, "nta", 0x18},
561 { 2, "wt1", 0x0D},
562 { 1, "w", 0x0D},
563 };
564 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
565 const Tbl *p = &tbl[i];
566 printf("void prefetch%s(const Address& addr) { opModM(addr, Reg32(%d), 0x0F, 0x%02X); }\n", p->name, p->ext, p->code);
567 }
568 }
569 {
570 const struct Tbl {
571 uint8_t code;
572 int pref;
573 const char *name;
574 } tbl[] = {
575 { 0x16, NO, "movhps" },
576 { 0x12, NO, "movlps" },
577 { 0x16, 0x66, "movhpd" },
578 { 0x12, 0x66, "movlpd" },
579 };
580 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
581 const Tbl *p = &tbl[i];
582 printf("void %s(const Operand& op1, const Operand& op2) { opMovXMM(op1, op2, 0x%02X, 0x%02X); }\n", p->name, p->code, p->pref);
583 }
584 }
585 {
586 // cmov
587 const struct Tbl {
588 uint8_t ext;
589 const char *name;
590 } tbl[] = {
591 { 0, "o" },
592 { 1, "no" },
593 { 2, "b" },
594 { 2, "c" },
595 { 2, "nae" },
596 { 3, "nb" },
597 { 3, "ae" },
598 { 3, "nc" },
599 { 4, "e" },
600 { 4, "z" },
601 { 5, "ne" },
602 { 5, "nz" },
603 { 6, "be" },
604 { 6, "na" },
605 { 7, "nbe" },
606 { 7, "a" },
607 { 8, "s" },
608 { 9, "ns" },
609 { 10, "p" },
610 { 10, "pe" },
611 { 11, "np" },
612 { 11, "po" },
613 { 12, "l" },
614 { 12, "nge" },
615 { 13, "nl" },
616 { 13, "ge" },
617 { 14, "le" },
618 { 14, "ng" },
619 { 15, "nle" },
620 { 15, "g" },
621 };
622 const char *msg = "//-V524"; // disable warning of PVS-Studio
623 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
624 const Tbl *p = &tbl[i];
625 printf("void cmov%s(const Reg& reg, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | %d); }%s\n", p->name, p->ext, msg);
626 printf("void j%s(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x%02X, 0x%02X, 0x%02X); }%s\n", p->name, p->ext | 0x70, p->ext | 0x80, 0x0F, msg);
627 printf("void j%s(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x%02X, 0x%02X, 0x%02X); }%s\n", p->name, p->ext | 0x70, p->ext | 0x80, 0x0F, msg);
628 printf("void j%s(const char *label, LabelType type = T_AUTO) { j%s(std::string(label), type); }%s\n", p->name, p->name, msg);
629 printf("void j%s(const void *addr) { opJmpAbs(addr, T_NEAR, 0x%02X, 0x%02X, 0x%02X); }%s\n", p->name, p->ext | 0x70, p->ext | 0x80, 0x0F, msg);
630 printf("void set%s(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | %d); }%s\n", p->name, p->ext, msg);
631 }
632 }
633 {
634 const struct Tbl {
635 const char *name;
636 uint8_t code;
637 } tbl[] = {
638 { "loop", 0xE2 },
639 { "loope", 0xE1 },
640 { "loopne", 0xE0 },
641 };
642 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
643 const Tbl *p = &tbl[i];
644 printf("void %s(std::string label) { opJmp(label, T_SHORT, 0x%02X, 0, 0); }\n", p->name, p->code);
645 printf("void %s(const Label& label) { opJmp(label, T_SHORT, 0x%02X, 0, 0); }\n", p->name, p->code);
646 printf("void %s(const char *label) { %s(std::string(label)); }\n", p->name, p->name);
647 }
648 }
649 ////////////////////////////////////////////////////////////////
650 {
651 const GenericTbl tbl[] = {
652 { "bnd", 0xf2 }, /* 0xf2 prefix for MPX */
653 { "cbw", 0x66, 0x98 },
654 { "cdq", 0x99 },
655 { "clc", 0xF8 },
656 { "cld", 0xFC },
657 { "cli", 0xFA },
658 { "cmc", 0xF5 },
659
660 { "cpuid", 0x0F, 0xA2 },
661 { "cwd", 0x66, 0x99 },
662 { "cwde", 0x98 },
663 { "cmpsb", 0xA6 },
664 { "cmpsw", 0x66, 0xA7 },
665 { "cmpsd", 0xA7 },
666 { "endbr32", 0xF3, 0x0F, 0x1E, 0xFB },
667 { "endbr64", 0xF3, 0x0F, 0x1E, 0xFA },
668 { "hlt", 0xF4 },
669 { "int3", 0xCC },
670 { "scasb", 0xAE },
671 { "scasw", 0x66, 0xAF },
672 { "scasd", 0xAF },
673 { "movsb", 0xA4 },
674 { "leave", 0xC9 },
675 { "lodsb", 0xAC },
676 { "lodsw", 0x66, 0xAD },
677 { "lodsd", 0xAD },
678 { "movsw", 0x66, 0xA5 },
679 { "movsd", 0xA5 },
680 { "outsb", 0x6E },
681 { "outsw", 0x66, 0x6F },
682 { "outsd", 0x6F },
683 { "stosb", 0xAA },
684 { "stosw", 0x66, 0xAB },
685 { "stosd", 0xAB },
686 { "rep", 0xF3 },
687 { "repe", 0xF3 },
688 { "repz", 0xF3 },
689 { "repne", 0xF2 },
690 { "repnz", 0xF2 },
691
692 { "lahf", 0x9F },
693 { "lock", 0xF0 },
694
695 { "sahf", 0x9E },
696 { "stc", 0xF9 },
697 { "std", 0xFD },
698 { "sti", 0xFB },
699 { "sysenter", 0x0F, 0x34 },
700 { "sysexit", 0x0F, 0x35 },
701
702 { "emms", 0x0F, 0x77 },
703 { "pause", 0xF3, 0x90 },
704 { "sfence", 0x0F, 0xAE, 0xF8 },
705 { "lfence", 0x0F, 0xAE, 0xE8 },
706 { "mfence", 0x0F, 0xAE, 0xF0 },
707 { "monitor", 0x0F, 0x01, 0xC8 },
708 { "mwait", 0x0F, 0x01, 0xC9 },
709
710 { "rdmsr", 0x0F, 0x32 },
711 { "rdpmc", 0x0F, 0x33 },
712 { "rdtsc", 0x0F, 0x31 },
713 { "rdtscp", 0x0F, 0x01, 0xF9 },
714 { "ud2", 0x0F, 0x0B },
715 { "wait", 0x9B },
716 { "fwait", 0x9B },
717 { "wbinvd", 0x0F, 0x09 },
718 { "wrmsr", 0x0F, 0x30 },
719 { "xlatb", 0xD7 },
720
721 { "popf", 0x9D },
722 { "pushf", 0x9C },
723 { "stac", 0x0F, 0x01, 0xCB },
724
725 { "vzeroall", 0xC5, 0xFC, 0x77 },
726 { "vzeroupper", 0xC5, 0xF8, 0x77 },
727 { "xgetbv", 0x0F, 0x01, 0xD0 },
728
729 // FPU
730 { "f2xm1", 0xD9, 0xF0 },
731 { "fabs", 0xD9, 0xE1 },
732 { "faddp", 0xDE, 0xC1 },
733 { "fchs", 0xD9, 0xE0 },
734 { "fclex", 0x9B, 0xDB, 0xE2 },
735 { "fnclex", 0xDB, 0xE2 },
736 { "fcom", 0xD8, 0xD1 },
737 { "fcomp", 0xD8, 0xD9 },
738 { "fcompp", 0xDE, 0xD9 },
739 { "fcos", 0xD9, 0xFF },
740 { "fdecstp", 0xD9, 0xF6 },
741 { "fdivp", 0xDE, 0xF9 },
742 { "fdivrp", 0xDE, 0xF1 },
743 { "fincstp", 0xD9, 0xF7 },
744 { "finit", 0x9B, 0xDB, 0xE3 },
745 { "fninit", 0xDB, 0xE3 },
746 { "fld1", 0xD9, 0xE8 },
747 { "fldl2t", 0xD9, 0xE9 },
748 { "fldl2e", 0xD9, 0xEA },
749 { "fldpi", 0xD9, 0xEB },
750 { "fldlg2", 0xD9, 0xEC },
751 { "fldln2", 0xD9, 0xED },
752 { "fldz", 0xD9, 0xEE },
753 { "fmulp", 0xDE, 0xC9 },
754 { "fnop", 0xD9, 0xD0 },
755 { "fpatan", 0xD9, 0xF3 },
756 { "fprem", 0xD9, 0xF8 },
757 { "fprem1", 0xD9, 0xF5 },
758 { "fptan", 0xD9, 0xF2 },
759 { "frndint", 0xD9, 0xFC },
760 { "fscale", 0xD9, 0xFD },
761 { "fsin", 0xD9, 0xFE },
762 { "fsincos", 0xD9, 0xFB },
763 { "fsqrt", 0xD9, 0xFA },
764 { "fsubp", 0xDE, 0xE9 },
765 { "fsubrp", 0xDE, 0xE1 },
766 { "ftst", 0xD9, 0xE4 },
767 { "fucom", 0xDD, 0xE1 },
768 { "fucomp", 0xDD, 0xE9 },
769 { "fucompp", 0xDA, 0xE9 },
770 { "fxam", 0xD9, 0xE5 },
771 { "fxch", 0xD9, 0xC9 },
772 { "fxtract", 0xD9, 0xF4 },
773 { "fyl2x", 0xD9, 0xF1 },
774 { "fyl2xp1", 0xD9, 0xF9 },
775
776 // AMD Zen
777 { "monitorx", 0x0F, 0x01, 0xFA },
778 { "mwaitx", 0x0F, 0x01, 0xFB },
779 { "clzero", 0x0F, 0x01, 0xFC },
780 };
781 putGeneric(tbl, NUM_OF_ARRAY(tbl));
782 puts("void enter(uint16_t x, uint8_t y) { db(0xC8); dw(x); db(y); }");
783 puts("void int_(uint8_t x) { db(0xCD); db(x); }");
784 putLoadSeg("lss", 0x0F, 0xB2);
785 putLoadSeg("lfs", 0x0F, 0xB4);
786 putLoadSeg("lgs", 0x0F, 0xB5);
787 }
788 {
789 const struct Tbl {
790 uint8_t code; // (reg, reg)
791 uint8_t ext; // (reg, imm)
792 const char *name;
793 } tbl[] = {
794 { 0x10, 2, "adc" },
795 { 0x00, 0, "add" },
796 { 0x20, 4, "and_" },
797 { 0x38, 7, "cmp" },
798 { 0x08, 1, "or_" },
799 { 0x18, 3, "sbb" },
800 { 0x28, 5, "sub" },
801 { 0x30, 6, "xor_" },
802 };
803 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
804 const Tbl *p = &tbl[i];
805 printf("void %s(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x%02X); }\n", p->name, p->code);
806 printf("void %s(const Operand& op, uint32_t imm) { opRM_I(op, imm, 0x%02X, %d); }\n", p->name, p->code, p->ext);
807 }
808 }
809
810 {
811 const struct Tbl {
812 uint8_t code;
813 uint8_t ext;
814 const char *name;
815 } tbl[] = {
816 { 0x48, 1, "dec" },
817 { 0x40, 0, "inc" },
818 };
819 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
820 const Tbl *p = &tbl[i];
821 printf("void %s(const Operand& op) { opIncDec(op, 0x%02X, %d); }\n", p->name, p->code, p->ext);
822 }
823 }
824 {
825 const struct Tbl {
826 uint8_t code;
827 uint8_t ext;
828 const char *name;
829 } tbl[] = {
830 { 0xa3, 4, "bt" },
831 { 0xab, 5, "bts" },
832 { 0xb3, 6, "btr" },
833 { 0xbb, 7, "btc" },
834 };
835 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
836 const Tbl *p = &tbl[i];
837 printf("void %s(const Operand& op, const Reg& reg) { opModRM(reg, op, op.isREG(16|32|64) && op.getBit() == reg.getBit(), op.isMEM(), 0x0f, 0x%02X); }\n", p->name, p->code);
838 printf("void %s(const Operand& op, uint8_t imm) { opR_ModM(op, 16|32|64, %d, 0x0f, 0xba, NONE, false, 1); db(imm); }\n", p->name, p->ext);
839 }
840 }
841 {
842 const struct Tbl {
843 uint8_t code;
844 uint8_t ext;
845 const char *name;
846 } tbl[] = {
847 { 0xF6, 6, "div" },
848 { 0xF6, 7, "idiv" },
849 { 0xF6, 5, "imul" },
850 { 0xF6, 4, "mul" },
851 { 0xF6, 3, "neg" },
852 { 0xF6, 2, "not_" },
853 };
854 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
855 const Tbl *p = &tbl[i];
856 const std::string name = p->name;
857 printf("void %s(const Operand& op) { opR_ModM(op, 0, %d, 0x%02X); }\n", p->name, p->ext, p->code);
858 }
859 }
860 {
861 const struct Tbl {
862 const char *name;
863 uint8_t ext;
864 } tbl[] = {
865 { "rcl", 2 },
866 { "rcr", 3 },
867 { "rol", 0 },
868 { "ror", 1 },
869 { "sar", 7 },
870 { "shl", 4 },
871 { "shr", 5 },
872
873 { "sal", 4 },
874 };
875 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
876 const Tbl *p = &tbl[i];
877 printf("void %s(const Operand& op, int imm) { opShift(op, imm, %d); }\n", p->name, p->ext);
878 printf("void %s(const Operand& op, const Reg8& _cl) { opShift(op, _cl, %d); }\n", p->name, p->ext);
879 }
880 }
881 {
882 const struct Tbl {
883 const char *name;
884 uint8_t code;
885 } tbl[] = {
886 { "shld", 0xA4 },
887 { "shrd", 0xAC },
888 };
889 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
890 const Tbl *p = &tbl[i];
891 printf("void %s(const Operand& op, const Reg& reg, uint8_t imm) { opShxd(op, reg, imm, 0x%02X); }\n", p->name, p->code);
892 printf("void %s(const Operand& op, const Reg& reg, const Reg8& _cl) { opShxd(op, reg, 0, 0x%02X, &_cl); }\n", p->name, p->code);
893 }
894 }
895 {
896 const struct Tbl {
897 const char *name;
898 uint8_t code;
899 } tbl[] = {
900 { "bsf", 0xBC },
901 { "bsr", 0xBD },
902 };
903 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
904 const Tbl *p = &tbl[i];
905 printf("void %s(const Reg®, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x%02X); }\n", p->name, p->code);
906 }
907 }
908 {
909 const struct Tbl {
910 const char *name;
911 uint8_t code;
912 } tbl[] = {
913 { "popcnt", 0xB8 },
914 { "tzcnt", 0xBC },
915 { "lzcnt", 0xBD },
916 };
917 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
918 const Tbl *p = &tbl[i];
919 printf("void %s(const Reg®, const Operand& op) { opSp1(reg, op, 0xF3, 0x0F, 0x%02X); }\n", p->name, p->code);
920 }
921 }
922 // SSSE3
923 {
924 const struct Tbl {
925 uint8_t code;
926 const char *name;
927 } tbl[] = {
928 { 0x00, "pshufb" },
929 { 0x01, "phaddw" },
930 { 0x02, "phaddd" },
931 { 0x03, "phaddsw" },
932 { 0x04, "pmaddubsw" },
933 { 0x05, "phsubw" },
934 { 0x06, "phsubd" },
935 { 0x07, "phsubsw" },
936 { 0x08, "psignb" },
937 { 0x09, "psignw" },
938 { 0x0a, "psignd" },
939 { 0x0b, "pmulhrsw" },
940 { 0x1c, "pabsb" },
941 { 0x1d, "pabsw" },
942 { 0x1e, "pabsd" },
943 };
944 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
945 const Tbl *p = &tbl[i];
946 printf("void %s(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x%02X, 0x66, NONE, 0x38); }\n", p->name, p->code);
947 }
948 printf("void palignr(const Mmx& mmx, const Operand& op, int imm) { opMMX(mmx, op, 0x0f, 0x66, static_cast<uint8_t>(imm), 0x3a); }\n");
949 }
950 {
951 const struct Tbl {
952 const char *name;
953 uint8_t code;
954 } tbl[] = {
955 { "pclmullqlqdq", 0 },
956 { "pclmulhqlqdq", 1 },
957 { "pclmullqhdq", 0x10 },
958 { "pclmulhqhdq", 0x11 },
959 };
960 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
961 const Tbl *p = &tbl[i];
962 printf("void %s(const Xmm& xmm, const Operand& op) { pclmulqdq(xmm, op, 0x%02X); }\n", p->name, p->code);
963 }
964 }
965 {
966 const struct Tbl {
967 uint8_t code1;
968 int code2;
969 uint8_t ext;
970 const char *name;
971 uint8_t prefix;
972 } tbl[] = {
973 { 0x0F, 0xAE, 2, "ldmxcsr", 0 },
974 { 0x0F, 0xAE, 3, "stmxcsr", 0 },
975 { 0x0F, 0xAE, 7, "clflush", 0 },
976 { 0x0F, 0xAE, 7, "clflushopt", 0x66 },
977 { 0xDF, NONE, 4, "fbld", 0 },
978 { 0xDF, NONE, 6, "fbstp", 0 },
979 { 0xD9, NONE, 5, "fldcw", 0 },
980 { 0xD9, NONE, 4, "fldenv", 0 },
981 { 0xDD, NONE, 4, "frstor", 0 },
982 { 0xDD, NONE, 6, "fsave", 0x9B },
983 { 0xDD, NONE, 6, "fnsave", 0 },
984 { 0xD9, NONE, 7, "fstcw", 0x9B },
985 { 0xD9, NONE, 7, "fnstcw", 0 },
986 { 0xD9, NONE, 6, "fstenv", 0x9B },
987 { 0xD9, NONE, 6, "fnstenv", 0 },
988 { 0xDD, NONE, 7, "fstsw", 0x9B },
989 { 0xDD, NONE, 7, "fnstsw", 0 },
990 { 0x0F, 0xAE, 1, "fxrstor", 0 },
991 };
992 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
993 const Tbl *p = &tbl[i];
994 putMemOp(p->name, p->prefix, p->ext, p->code1, p->code2);
995 }
996 puts("void fstsw(const Reg16& r) { if (r.getIdx() != Operand::AX) XBYAK_THROW(ERR_BAD_PARAMETER) db(0x9B); db(0xDF); db(0xE0); }");
997 puts("void fnstsw(const Reg16& r) { if (r.getIdx() != Operand::AX) XBYAK_THROW(ERR_BAD_PARAMETER) db(0xDF); db(0xE0); }");
998 }
999 {
1000 const struct Tbl {
1001 uint8_t code;
1002 const char *name;
1003 } tbl[] = {
1004 { 0x2B, "movntpd" },
1005 { 0xE7, "movntdq" },
1006 };
1007 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1008 const Tbl *p = &tbl[i];
1009 // cast xmm register to 16bit register to put 0x66
1010 printf("void %s(const Address& addr, const Xmm& reg) { opModM(addr, Reg16(reg.getIdx()), 0x0F, 0x%02X); }\n", p->name, p->code);
1011 }
1012 }
1013 {
1014 const struct Tbl {
1015 uint8_t code;
1016 const char *name;
1017 } tbl[] = {
1018 { 0xBE, "movsx" },
1019 { 0xB6, "movzx" },
1020 };
1021 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1022 const Tbl *p = &tbl[i];
1023 printf("void %s(const Reg& reg, const Operand& op) { opMovxx(reg, op, 0x%02X); }\n", p->name, p->code);
1024 }
1025 }
1026 { // in/out
1027 puts("void in_(const Reg& a, uint8_t v) { opInOut(a, 0xE4, v); }");
1028 puts("void in_(const Reg& a, const Reg& d) { opInOut(a, d, 0xEC); }");
1029 puts("void out_(uint8_t v, const Reg& a) { opInOut(a, 0xE6, v); }");
1030 puts("void out_(const Reg& d, const Reg& a) { opInOut(a, d, 0xEE); }");
1031 }
1032 // mpx
1033 {
1034 puts("void bndcl(const BoundsReg& bnd, const Operand& op) { db(0xF3); opR_ModM(op, i32e, bnd.getIdx(), 0x0F, 0x1A, NONE, !op.isMEM()); }");
1035 puts("void bndcu(const BoundsReg& bnd, const Operand& op) { db(0xF2); opR_ModM(op, i32e, bnd.getIdx(), 0x0F, 0x1A, NONE, !op.isMEM()); }");
1036 puts("void bndcn(const BoundsReg& bnd, const Operand& op) { db(0xF2); opR_ModM(op, i32e, bnd.getIdx(), 0x0F, 0x1B, NONE, !op.isMEM()); }");
1037 puts("void bndldx(const BoundsReg& bnd, const Address& addr) { opMIB(addr, bnd, 0x0F, 0x1A); }");
1038 puts("void bndmk(const BoundsReg& bnd, const Address& addr) { db(0xF3); opModM(addr, bnd, 0x0F, 0x1B); }");
1039 puts("void bndmov(const BoundsReg& bnd, const Operand& op) { db(0x66); opModRM(bnd, op, op.isBNDREG(), op.isMEM(), 0x0F, 0x1A); }");
1040 puts("void bndmov(const Address& addr, const BoundsReg& bnd) { db(0x66); opModM(addr, bnd, 0x0F, 0x1B); }");
1041 puts("void bndstx(const Address& addr, const BoundsReg& bnd) { opMIB(addr, bnd, 0x0F, 0x1B); }");
1042 }
1043 // misc
1044 {
1045 puts("void lea(const Reg& reg, const Address& addr) { if (!reg.isBit(16 | i32e)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) opModM(addr, reg, 0x8D); }");
1046 puts("void bswap(const Reg32e& reg) { opModR(Reg32(1), reg, 0x0F); }");
1047 puts("void ret(int imm = 0) { if (imm) { db(0xC2); dw(imm); } else { db(0xC3); } }");
1048 puts("void retf(int imm = 0) { if (imm) { db(0xCA); dw(imm); } else { db(0xCB); } }");
1049
1050 puts("void xadd(const Operand& op, const Reg& reg) { opModRM(reg, op, (op.isREG() && reg.isREG() && op.getBit() == reg.getBit()), op.isMEM(), 0x0F, 0xC0 | (reg.isBit(8) ? 0 : 1)); }");
1051 puts("void cmpxchg(const Operand& op, const Reg& reg) { opModRM(reg, op, (op.isREG() && reg.isREG() && op.getBit() == reg.getBit()), op.isMEM(), 0x0F, 0xB0 | (reg.isBit(8) ? 0 : 1)); }");
1052 puts("void movbe(const Reg& reg, const Address& addr) { opModM(addr, reg, 0x0F, 0x38, 0xF0); }");
1053 puts("void movbe(const Address& addr, const Reg& reg) { opModM(addr, reg, 0x0F, 0x38, 0xF1); }");
1054 puts("void adcx(const Reg32e& reg, const Operand& op) { opGen(reg, op, 0xF6, 0x66, isREG32_REG32orMEM, NONE, 0x38); }");
1055 puts("void adox(const Reg32e& reg, const Operand& op) { opGen(reg, op, 0xF6, 0xF3, isREG32_REG32orMEM, NONE, 0x38); }");
1056 puts("void cmpxchg8b(const Address& addr) { opModM(addr, Reg32(1), 0x0F, 0xC7); }");
1057
1058 puts("void pextrw(const Operand& op, const Mmx& xmm, uint8_t imm) { opExt(op, xmm, 0x15, imm, true); }");
1059 puts("void pextrb(const Operand& op, const Xmm& xmm, uint8_t imm) { opExt(op, xmm, 0x14, imm); }");
1060 puts("void pextrd(const Operand& op, const Xmm& xmm, uint8_t imm) { opExt(op, xmm, 0x16, imm); }");
1061 puts("void extractps(const Operand& op, const Xmm& xmm, uint8_t imm) { opExt(op, xmm, 0x17, imm); }");
1062 puts("void pinsrw(const Mmx& mmx, const Operand& op, int imm) { if (!op.isREG(32) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opGen(mmx, op, 0xC4, mmx.isXMM() ? 0x66 : NONE, 0, imm); }");
1063 puts("void insertps(const Xmm& xmm, const Operand& op, uint8_t imm) { opGen(xmm, op, 0x21, 0x66, isXMM_XMMorMEM, imm, 0x3A); }");
1064 puts("void pinsrb(const Xmm& xmm, const Operand& op, uint8_t imm) { opGen(xmm, op, 0x20, 0x66, isXMM_REG32orMEM, imm, 0x3A); }");
1065 puts("void pinsrd(const Xmm& xmm, const Operand& op, uint8_t imm) { opGen(xmm, op, 0x22, 0x66, isXMM_REG32orMEM, imm, 0x3A); }");
1066
1067 puts("void pmovmskb(const Reg32e& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opModR(reg, mmx, 0x0F, 0xD7); }");
1068 puts("void maskmovq(const Mmx& reg1, const Mmx& reg2) { if (!reg1.isMMX() || !reg2.isMMX()) XBYAK_THROW(ERR_BAD_COMBINATION) opModR(reg1, reg2, 0x0F, 0xF7); }");
1069 puts("void movmskps(const Reg32e& reg, const Xmm& xmm) { opModR(reg, xmm, 0x0F, 0x50); }");
1070 puts("void movmskpd(const Reg32e& reg, const Xmm& xmm) { db(0x66); movmskps(reg, xmm); }");
1071 puts("void movntps(const Address& addr, const Xmm& xmm) { opModM(addr, Mmx(xmm.getIdx()), 0x0F, 0x2B); }");
1072 puts("void movntdqa(const Xmm& xmm, const Address& addr) { db(0x66); opModM(addr, xmm, 0x0F, 0x38, 0x2A); }");
1073 puts("void lddqu(const Xmm& xmm, const Address& addr) { db(0xF2); opModM(addr, xmm, 0x0F, 0xF0); }");
1074 puts("void movnti(const Address& addr, const Reg32e& reg) { opModM(addr, reg, 0x0F, 0xC3); }");
1075 puts("void movntq(const Address& addr, const Mmx& mmx) { if (!mmx.isMMX()) XBYAK_THROW(ERR_BAD_COMBINATION) opModM(addr, mmx, 0x0F, 0xE7); }");
1076
1077 puts("void movd(const Address& addr, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opModM(addr, mmx, 0x0F, 0x7E); }");
1078 puts("void movd(const Reg32& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opModR(mmx, reg, 0x0F, 0x7E); }");
1079 puts("void movd(const Mmx& mmx, const Address& addr) { if (mmx.isXMM()) db(0x66); opModM(addr, mmx, 0x0F, 0x6E); }");
1080 puts("void movd(const Mmx& mmx, const Reg32& reg) { if (mmx.isXMM()) db(0x66); opModR(mmx, reg, 0x0F, 0x6E); }");
1081 puts("void movq2dq(const Xmm& xmm, const Mmx& mmx) { db(0xF3); opModR(xmm, mmx, 0x0F, 0xD6); }");
1082 puts("void movdq2q(const Mmx& mmx, const Xmm& xmm) { db(0xF2); opModR(mmx, xmm, 0x0F, 0xD6); }");
1083 puts("void movq(const Mmx& mmx, const Operand& op) { if (mmx.isXMM()) db(0xF3); opModRM(mmx, op, (mmx.getKind() == op.getKind()), op.isMEM(), 0x0F, mmx.isXMM() ? 0x7E : 0x6F); }");
1084 puts("void movq(const Address& addr, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opModM(addr, mmx, 0x0F, mmx.isXMM() ? 0xD6 : 0x7F); }");
1085 puts("void rdrand(const Reg& r) { if (r.isBit(8)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) opModR(Reg(6, Operand::REG, r.getBit()), r, 0x0F, 0xC7); }");
1086 puts("void rdseed(const Reg& r) { if (r.isBit(8)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) opModR(Reg(7, Operand::REG, r.getBit()), r, 0x0F, 0xC7); }");
1087 puts("void crc32(const Reg32e& reg, const Operand& op) { if (reg.isBit(32) && op.isBit(16)) db(0x66); db(0xF2); opModRM(reg, op, op.isREG(), op.isMEM(), 0x0F, 0x38, 0xF0 | (op.isBit(8) ? 0 : 1)); }");
1088 }
1089 {
1090 const struct Tbl {
1091 uint8_t m16;
1092 uint8_t m32;
1093 uint8_t m64;
1094 uint8_t ext;
1095 const char *name;
1096 uint8_t m64ext;
1097 } tbl[] = {
1098 { 0x00, 0xD8, 0xDC, 0, "fadd" },
1099 { 0xDE, 0xDA, 0x00, 0, "fiadd" },
1100 { 0x00, 0xD8, 0xDC, 2, "fcom" },
1101 { 0x00, 0xD8, 0xDC, 3, "fcomp" },
1102 { 0x00, 0xD8, 0xDC, 6, "fdiv" },
1103 { 0xDE, 0xDA, 0x00, 6, "fidiv" },
1104 { 0x00, 0xD8, 0xDC, 7, "fdivr" },
1105 { 0xDE, 0xDA, 0x00, 7, "fidivr" },
1106 { 0xDE, 0xDA, 0x00, 2, "ficom" },
1107 { 0xDE, 0xDA, 0x00, 3, "ficomp" },
1108 { 0xDF, 0xDB, 0xDF, 0, "fild", 5 },
1109 { 0xDF, 0xDB, 0x00, 2, "fist" },
1110 { 0xDF, 0xDB, 0xDF, 3, "fistp", 7 },
1111 { 0xDF, 0xDB, 0xDD, 1, "fisttp" },
1112 { 0x00, 0xD9, 0xDD, 0, "fld" },
1113 { 0x00, 0xD8, 0xDC, 1, "fmul" },
1114 { 0xDE, 0xDA, 0x00, 1, "fimul" },
1115 { 0x00, 0xD9, 0xDD, 2, "fst" },
1116 { 0x00, 0xD9, 0xDD, 3, "fstp" },
1117 { 0x00, 0xD8, 0xDC, 4, "fsub" },
1118 { 0xDE, 0xDA, 0x00, 4, "fisub" },
1119 { 0x00, 0xD8, 0xDC, 5, "fsubr" },
1120 { 0xDE, 0xDA, 0x00, 5, "fisubr" },
1121 };
1122 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1123 const Tbl *p = &tbl[i];
1124 printf("void %s(const Address& addr) { opFpuMem(addr, 0x%02X, 0x%02X, 0x%02X, %d, %d); }\n", p->name, p->m16, p->m32, p->m64, p->ext, p->m64ext);
1125 }
1126 }
1127 {
1128 const struct Tbl {
1129 uint32_t code1;
1130 uint32_t code2;
1131 const char *name;
1132 } tbl[] = {
1133 { 0xD8C0, 0xDCC0, "fadd" },
1134 { 0x0000, 0xDEC0, "faddp" },
1135
1136 { 0xDAC0, 0x00C0, "fcmovb" },
1137 { 0xDAC8, 0x00C8, "fcmove" },
1138 { 0xDAD0, 0x00D0, "fcmovbe" },
1139 { 0xDAD8, 0x00D8, "fcmovu" },
1140 { 0xDBC0, 0x00C0, "fcmovnb" },
1141 { 0xDBC8, 0x00C8, "fcmovne" },
1142 { 0xDBD0, 0x00D0, "fcmovnbe" },
1143 { 0xDBD8, 0x00D8, "fcmovnu" },
1144
1145 { 0xDBF0, 0x00F0, "fcomi" },
1146 { 0xDFF0, 0x00F0, "fcomip" },
1147 { 0xDBE8, 0x00E8, "fucomi" },
1148 { 0xDFE8, 0x00E8, "fucomip" },
1149
1150 { 0xD8F0, 0xDCF8, "fdiv" },
1151 { 0x0000, 0xDEF8, "fdivp" },
1152 { 0xD8F8, 0xDCF0, "fdivr" },
1153 { 0x0000, 0xDEF0, "fdivrp" },
1154 { 0xD8C8, 0xDCC8, "fmul" },
1155 { 0x0000, 0xDEC8, "fmulp" },
1156 { 0xD8E0, 0xDCE8, "fsub" },
1157 { 0x0000, 0xDEE8, "fsubp" },
1158 { 0xD8E8, 0xDCE0, "fsubr" },
1159 { 0x0000, 0xDEE0, "fsubrp" },
1160 };
1161 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1162 const Tbl *p = &tbl[i];
1163 printf("void %s(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0x%04X, 0x%04X); }\n", p->name, p->code1, p->code2);
1164 // omit st0 version(like nasm)
1165 if (p->code1) {
1166 printf("void %s(const Fpu& reg1) { opFpuFpu(st0, reg1, 0x%04X, 0x%04X); }\n", p->name, p->code1, p->code2);
1167 } else {
1168 printf("void %s(const Fpu& reg1) { opFpuFpu(reg1, st0, 0x%04X, 0x%04X); }\n", p->name, p->code1, p->code2);
1169 }
1170 }
1171 }
1172 {
1173 const struct Tbl {
1174 uint8_t code1;
1175 uint8_t code2;
1176 const char *name;
1177 } tbl[] = {
1178 { 0xD8, 0xD0, "fcom" },
1179 { 0xD8, 0xD8, "fcomp" },
1180 { 0xDD, 0xC0, "ffree" },
1181 { 0xD9, 0xC0, "fld" },
1182 { 0xDD, 0xD0, "fst" },
1183 { 0xDD, 0xD8, "fstp" },
1184 { 0xDD, 0xE0, "fucom" },
1185 { 0xDD, 0xE8, "fucomp" },
1186 { 0xD9, 0xC8, "fxch" },
1187 };
1188 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1189 const Tbl *p = &tbl[i];
1190 printf("void %s(const Fpu& reg) { opFpu(reg, 0x%02X, 0x%02X); }\n", p->name, p->code1, p->code2);
1191 }
1192 }
1193 // AVX
1194 { // pd, ps, sd, ss
1195 const struct Tbl {
1196 uint8_t code;
1197 const char *name;
1198 bool only_pd_ps;
1199 } tbl[] = {
1200 { 0x58, "add", false },
1201 { 0x5C, "sub", false },
1202 { 0x59, "mul", false },
1203 { 0x5E, "div", false },
1204 { 0x5F, "max", false },
1205 { 0x5D, "min", false },
1206 { 0x54, "and", true },
1207 { 0x55, "andn", true },
1208 { 0x56, "or", true },
1209 { 0x57, "xor", true },
1210 };
1211 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1212 const Tbl *p = &tbl[i];
1213 printf("void v%spd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x%02X); }\n", p->name, p->code);
1214 printf("void v%sps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x%02X); }\n", p->name, p->code);
1215 if (p->only_pd_ps) continue;
1216 printf("void v%ssd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_X | T_N8, 0x%02X); }\n", p->name, p->code);
1217 printf("void v%sss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_X | T_N4, 0x%02X); }\n", p->name, p->code);
1218 }
1219 }
1220 putX_X_XM(false);
1221
1222 // (x, x/m[, imm]) or (y, y/m[, imm])
1223 {
1224 const struct Tbl {
1225 uint8_t code;
1226 const char *name;
1227 int type;
1228 bool hasIMM;
1229 int mode; // 1 : SSE, 2 : AVX, 3 : SSE + AVX
1230 } tbl[] = {
1231 { 0x15, "blendvpd", T_0F38 | T_66, false, 1 },
1232 { 0x14, "blendvps", T_0F38 | T_66, false, 1 },
1233 { 0x10, "pblendvb", T_0F38 | T_66, false, 1 },
1234 { 0xDF, "aeskeygenassist", T_0F3A | T_66, true, 3 },
1235 { 0xDB, "aesimc", T_0F38 | T_66 | T_W0, false, 3 },
1236 { 0x09, "roundpd", T_0F3A | T_66 | T_YMM, true, 3 },
1237 { 0x08, "roundps", T_0F3A | T_66 | T_YMM, true, 3 },
1238 { 0x05, "permilpd", T_0F3A | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64, true, 2 },
1239 { 0x04, "permilps", T_0F3A | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, true, 2 },
1240 { 0x61, "pcmpestri", T_0F3A | T_66, true, 3 },
1241 { 0x60, "pcmpestrm", T_0F3A | T_66, true, 3 },
1242 { 0x63, "pcmpistri", T_0F3A | T_66, true, 3 },
1243 { 0x62, "pcmpistrm", T_0F3A | T_66, true, 3 },
1244 { 0x0E, "testps", T_0F38 | T_66 | T_YMM, false, 2 },
1245 { 0x0F, "testpd", T_0F38 | T_66 | T_YMM, false, 2 },
1246 { 0x2F, "comisd", T_0F | T_66 | T_EVEX | T_EW1 | T_SAE_X | T_N8, false, 2 },
1247 { 0x2F, "comiss", T_0F | T_EVEX | T_EW0 | T_SAE_X | T_N4, false, 2 },
1248 { 0x5B, "cvtdq2ps", T_0F | T_YMM | T_EVEX | T_EW0 | T_B32 | T_ER_Z, false, 2 },
1249 { 0x5B, "cvtps2dq", T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32 | T_ER_Z, false, 2 },
1250 { 0x5B, "cvttps2dq", T_0F | T_F3 | T_YMM | T_EVEX | T_EW0 | T_B32 | T_SAE_Z, false, 2 },
1251 { 0x28, "movapd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1, false, 2 },
1252 { 0x28, "movaps", T_0F | T_YMM | T_EVEX | T_EW0, false, 2 },
1253 { 0x12, "movddup", T_0F | T_F2 | T_YMM | T_EVEX | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z | T_DUP, false, 3 },
1254 { 0x6F, "movdqa", T_0F | T_66 | T_YMM, false, 2 },
1255 { 0x6F, "movdqu", T_0F | T_F3 | T_YMM, false, 2 },
1256 { 0x16, "movshdup", T_0F | T_F3 | T_YMM | T_EVEX | T_EW0, false, 3 },
1257 { 0x12, "movsldup", T_0F | T_F3 | T_YMM | T_EVEX | T_EW0, false, 3 },
1258 { 0x10, "movupd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1, false, 2 },
1259 { 0x10, "movups", T_0F | T_YMM | T_EVEX | T_EW0, false, 2 },
1260
1261 { 0x1C, "pabsb", T_0F38 | T_66 | T_YMM | T_EVEX, false, 2 },
1262 { 0x1D, "pabsw", T_0F38 | T_66 | T_YMM | T_EVEX, false, 2 },
1263 { 0x1E, "pabsd", T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false, 2 },
1264 { 0x41, "phminposuw", T_0F38 | T_66, false, 3 },
1265
1266 { 0x20, "pmovsxbw", T_0F38 | T_66 | T_YMM | T_EVEX | T_N8 | T_N_VL, false, 3 },
1267 { 0x21, "pmovsxbd", T_0F38 | T_66 | T_YMM | T_EVEX | T_N4 | T_N_VL, false, 3 },
1268 { 0x22, "pmovsxbq", T_0F38 | T_66 | T_YMM | T_EVEX | T_N2 | T_N_VL, false, 3 },
1269 { 0x23, "pmovsxwd", T_0F38 | T_66 | T_YMM | T_EVEX | T_N8 | T_N_VL, false, 3 },
1270 { 0x24, "pmovsxwq", T_0F38 | T_66 | T_YMM | T_EVEX | T_N4 | T_N_VL, false, 3 },
1271 { 0x25, "pmovsxdq", T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0 | T_N8 | T_N_VL, false, 3 },
1272
1273 { 0x30, "pmovzxbw", T_0F38 | T_66 | T_YMM | T_EVEX | T_N8 | T_N_VL, false, 3 },
1274 { 0x31, "pmovzxbd", T_0F38 | T_66 | T_YMM | T_EVEX | T_N4 | T_N_VL, false, 3 },
1275 { 0x32, "pmovzxbq", T_0F38 | T_66 | T_YMM | T_EVEX | T_N2 | T_N_VL, false, 3 },
1276 { 0x33, "pmovzxwd", T_0F38 | T_66 | T_YMM | T_EVEX | T_N8 | T_N_VL, false, 3 },
1277 { 0x34, "pmovzxwq", T_0F38 | T_66 | T_YMM | T_EVEX | T_N4 | T_N_VL, false, 3 },
1278 { 0x35, "pmovzxdq", T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0 | T_N8 | T_N_VL, false, 3 },
1279
1280 { 0x70, "pshufd", T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, true, 2 },
1281 { 0x70, "pshufhw", T_0F | T_F3 | T_YMM | T_EVEX, true, 2 },
1282 { 0x70, "pshuflw", T_0F | T_F2 | T_YMM | T_EVEX, true, 2 },
1283
1284 { 0x17, "ptest", T_0F38 | T_66 | T_YMM, false, 3 },
1285 { 0x53, "rcpps", T_0F | T_YMM, false, 2 },
1286 { 0x52, "rsqrtps", T_0F | T_YMM, false, 2 },
1287
1288 { 0x51, "sqrtpd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_ER_Z | T_B64, false, 2 },
1289 { 0x51, "sqrtps", T_0F | T_YMM | T_EVEX | T_EW0 | T_ER_Z | T_B32, false, 2 },
1290
1291 { 0x2E, "ucomisd", T_0F | T_66 | T_EVEX | T_EW1 | T_SAE_X | T_N8, false, 2 },
1292 { 0x2E, "ucomiss", T_0F | T_EVEX | T_EW0 | T_SAE_X | T_N4, false, 2 },
1293
1294 { 0xCC, "sha1rnds4", T_0F3A, true, 1 },
1295 { 0xC8, "sha1nexte", T_0F38, false, 1 },
1296 { 0xC9, "sha1msg1", T_0F38, false, 1 },
1297 { 0xCA, "sha1msg2", T_0F38, false, 1 },
1298 { 0xCB, "sha256rnds2", T_0F38, false, 1 },
1299 { 0xCC, "sha256msg1", T_0F38, false, 1 },
1300 { 0xCD, "sha256msg2", T_0F38, false, 1 },
1301 };
1302 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1303 const Tbl *p = &tbl[i];
1304 std::string type = type2String(p->type);
1305 if (p->mode & 1) {
1306 const char *immS1 = p->hasIMM ? ", uint8_t imm" : "";
1307 const char *immS2 = p->hasIMM ? ", imm" : ", NONE";
1308 const char *prefTbl[5] = { "NONE", "0x66", "0xF3", "0xF2" };
1309 const char *pref = prefTbl[getPP(p->type)];
1310 const char *suf = p->type & T_0F38 ? "0x38" : p->type & T_0F3A ? "0x3A" : "NONE";
1311 printf("void %s(const Xmm& xmm, const Operand& op%s) { opGen(xmm, op, 0x%02X, %s, isXMM_XMMorMEM%s, %s); }\n", p->name, immS1, p->code, pref, immS2, suf);
1312 }
1313 if (p->mode & 2) {
1314 printf("void v%s(const Xmm& xm, const Operand& op%s) { opAVX_X_XM_IMM(xm, op, %s, 0x%02X%s); }\n"
1315 , p->name, p->hasIMM ? ", uint8_t imm" : "", type.c_str(), p->code, p->hasIMM ? ", imm" : "");
1316 }
1317 }
1318 }
1319 // (m, x), (m, y)
1320 {
1321 const struct Tbl {
1322 uint8_t code;
1323 const char *name;
1324 int type;
1325 } tbl[] = {
1326 { 0x29, "movapd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_M_K },
1327 { 0x29, "movaps", T_0F | T_YMM | T_EVEX | T_EW0 | T_M_K },
1328 { 0x7F, "movdqa", T_0F | T_66 | T_YMM },
1329 { 0x7F, "movdqu", T_0F | T_F3 | T_YMM },
1330 { 0x11, "movupd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_M_K },
1331 { 0x11, "movups", T_0F | T_YMM | T_EVEX | T_EW0 | T_M_K },
1332 };
1333 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1334 const Tbl *p = &tbl[i];
1335 std::string type = type2String(p->type);
1336 printf("void v%s(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, %s, 0x%02X); }\n"
1337 , p->name, type.c_str(), p->code);
1338 }
1339 }
1340 // (x, x/m), (y, y/m), (x, x, x/m), (y, y, y/m)
1341 {
1342 const struct Tbl {
1343 uint8_t code;
1344 const char *name;
1345 int type;
1346 int mode; // 1 : sse, 2 : avx, 3 : sse + avx
1347 } tbl[] = {
1348 { 0xD0, "addsubpd", T_0F | T_66 | T_YMM, 3 },
1349 { 0xD0, "addsubps", T_0F | T_F2 | T_YMM, 3 },
1350 { 0x7C, "haddpd", T_0F | T_66 | T_YMM, 3 },
1351 { 0x7C, "haddps", T_0F | T_F2 | T_YMM, 3 },
1352 { 0x7D, "hsubpd", T_0F | T_66 | T_YMM, 3 },
1353 { 0x7D, "hsubps", T_0F | T_F2 | T_YMM, 3 },
1354
1355 { 0xDC, "aesenc", T_0F38 | T_66 | T_YMM | T_EVEX, 3 },
1356 { 0xDD, "aesenclast", T_0F38 | T_66 | T_YMM | T_EVEX, 3 },
1357 { 0xDE, "aesdec", T_0F38 | T_66 | T_YMM | T_EVEX, 3 },
1358 { 0xDF, "aesdeclast", T_0F38 | T_66 | T_YMM | T_EVEX, 3 },
1359 };
1360 const uint8_t ppTbl[] = { 0, 0x66, 0xf3, 0xf2 };
1361 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1362 const Tbl *p = &tbl[i];
1363 std::string type = type2String(p->type);
1364 if (p->mode & 1) {
1365 uint8_t pref = ppTbl[getPP(p->type)];
1366 printf("void %s(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x%02X, 0x%02X, isXMM_XMMorMEM%s); }\n", p->name, p->code, pref, p->type & T_0F38 ? ", NONE, 0x38" : "");
1367 }
1368 if (p->mode & 2) {
1369 printf("void v%s(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, %s, 0x%02X); }\n"
1370 , p->name, type.c_str(), p->code);
1371 }
1372 }
1373 }
1374 // vmaskmov
1375 {
1376 const char suf[][8] = { "ps", "pd" };
1377 for (int i = 0; i < 2; i++) {
1378 printf("void vmaskmov%s(const Xmm& x1, const Xmm& x2, const Address& addr) { opAVX_X_X_XM(x1, x2, addr, T_0F38 | T_66 | T_W0 | T_YMM, 0x%02X); }\n", suf[i], 0x2C + i);
1379 printf("void vmaskmov%s(const Address& addr, const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x2, x1, addr, T_0F38 | T_66 | T_W0 | T_YMM, 0x%02X); }\n", suf[i], 0x2E + i);
1380 }
1381 }
1382 // vpmaskmov
1383 {
1384 const char suf[][8] = { "d", "q" };
1385 for (int i = 0; i < 2; i++) {
1386 printf("void vpmaskmov%s(const Xmm& x1, const Xmm& x2, const Address& addr) { opAVX_X_X_XM(x1, x2, addr, T_0F38 | T_66 | T_W%d | T_YMM, 0x%02X); }\n", suf[i], i, 0x8C);
1387 printf("void vpmaskmov%s(const Address& addr, const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x2, x1, addr, T_0F38 | T_66 | T_W%d | T_YMM, 0x%02X); }\n", suf[i], i, 0x8E);
1388 }
1389 }
1390 // vpermd, vpermps
1391 {
1392 const struct Tbl {
1393 uint8_t code;
1394 const char *name;
1395 int type;
1396 } tbl[] = {
1397 { 0x36, "vpermd", T_66 | T_0F38 | T_W0 | T_YMM | T_EVEX | T_EW0 | T_B32 },
1398 { 0x36, "vpermq", T_66 | T_0F38 | T_W0 | T_YMM | T_EVEX | T_EW1 | T_B64 },
1399 { 0x16, "vpermps", T_66 | T_0F38 | T_W0 | T_YMM | T_EVEX | T_EW0 | T_B32 },
1400 { 0x16, "vpermpd", T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_YMM | T_B64 },
1401 };
1402 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1403 const Tbl& p = tbl[i];
1404 std::string type = type2String(p.type);
1405 printf("void %s(const Ymm& y1, const Ymm& y2, const Operand& op) { opAVX_X_X_XM(y1, y2, op, %s, 0x%02X); }\n", p.name, type.c_str(), p.code);
1406 }
1407 }
1408 // vpermq, vpermpd
1409 {
1410 const struct Tbl {
1411 uint8_t code;
1412 const char *name;
1413 int type;
1414 } tbl[] = {
1415 { 0x00, "vpermq", T_0F3A | T_66 | T_W1 | T_YMM | T_EVEX | T_EW1 | T_B64 },
1416 { 0x01, "vpermpd", T_0F3A | T_66 | T_W1 | T_YMM | T_EVEX | T_EW1 | T_B64 },
1417 };
1418 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1419 const Tbl& p = tbl[i];
1420 std::string type = type2String(p.type);
1421 printf("void %s(const Ymm& y, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(y, op, %s, 0x%02X, imm); }\n", p.name, type.c_str(), p.code);
1422 }
1423 }
1424 // vcmpeqps
1425 {
1426 const char pred[32][16] = {
1427 "eq", "lt", "le", "unord", "neq", "nlt", "nle", "ord",
1428 "eq_uq", "nge", "ngt", "false", "neq_oq", "ge", "gt",
1429 "true", "eq_os", "lt_oq", "le_oq", "unord_s", "neq_us", "nlt_uq", "nle_uq", "ord_s",
1430 "eq_us", "nge_uq", "ngt_uq", "false_os", "neq_os", "ge_oq", "gt_oq", "true_us"
1431 };
1432 const char suf[][4] = { "pd", "ps", "sd", "ss" };
1433 for (int i = 0; i < 4; i++) {
1434 const char *s = suf[i];
1435 for (int j = 0; j < 32; j++) {
1436 if (j < 8) {
1437 printf("void cmp%s%s(const Xmm& x, const Operand& op) { cmp%s(x, op, %d); }\n", pred[j], s, s, j);
1438 }
1439 printf("void vcmp%s%s(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmp%s(x1, x2, op, %d); }\n", pred[j], s, s, j);
1440 }
1441 }
1442 }
1443 // vmov(h|l)(pd|ps)
1444 {
1445 const struct Tbl {
1446 bool isH;
1447 bool isPd;
1448 uint8_t code;
1449 } tbl[] = {
1450 { true, true, 0x16 },
1451 { true, false, 0x16 },
1452 { false, true, 0x12 },
1453 { false, false, 0x12 },
1454 };
1455 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1456 const Tbl& p = tbl[i];
1457 char c = p.isH ? 'h' : 'l';
1458 const char *suf = p.isPd ? "pd" : "ps";
1459 const char *type = p.isPd ? "T_0F | T_66 | T_EVEX | T_EW1 | T_N8" : "T_0F | T_EVEX | T_EW0 | T_N8";
1460 printf("void vmov%c%s(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !op2.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, op1, op2, %s, 0x%02X); }\n"
1461 , c, suf, type, p.code);
1462 printf("void vmov%c%s(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, %s, 0x%02X); }\n"
1463 , c, suf, type, p.code + 1);
1464 }
1465 }
1466 // FMA
1467 {
1468 const struct Tbl {
1469 uint8_t code;
1470 const char *name;
1471 bool supportYMM;
1472 } tbl[] = {
1473 { 0x08, "vfmadd", true },
1474 { 0x09, "vfmadd", false },
1475 { 0x06, "vfmaddsub", true },
1476 { 0x07, "vfmsubadd", true },
1477 { 0x0A, "vfmsub", true },
1478 { 0x0B, "vfmsub", false },
1479 { 0x0C, "vfnmadd", true },
1480 { 0x0D, "vfnmadd", false },
1481 { 0x0E, "vfnmsub", true },
1482 { 0x0F, "vfnmsub", false },
1483 };
1484 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1485 for (int j = 0; j < 2; j++) {
1486 const char sufTbl[][2][8] = {
1487 { "pd", "ps" },
1488 { "sd", "ss" },
1489 };
1490 for (int k = 0; k < 3; k++) {
1491 const struct Ord {
1492 const char *str;
1493 uint8_t code;
1494 } ord[] = {
1495 { "132", 0x90 },
1496 { "213", 0xA0 },
1497 { "231", 0xB0 },
1498 };
1499 int t = T_0F38 | T_66 | T_EVEX;
1500 t |= (j == 0) ? (T_W1 | T_EW1) : (T_W0 | T_EW0);
1501 if (tbl[i].supportYMM) t |= T_YMM;
1502 const std::string suf = sufTbl[tbl[i].supportYMM ? 0 : 1][j];
1503 if (suf == "pd") {
1504 t |= T_B64;
1505 } else if (suf == "ps") {
1506 t |= T_B32;
1507 } else if (suf == "sd") {
1508 t |= T_ER_X | T_N8;
1509 } else { // ss
1510 t |= T_ER_X | T_N4;
1511 }
1512 std::string type = type2String(t);
1513 printf("void %s%s%s(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, %s, 0x%02X); }\n"
1514 , tbl[i].name, ord[k].str, suf.c_str(), type.c_str(), tbl[i].code + ord[k].code);
1515 }
1516 }
1517 }
1518 }
1519 // FMA others
1520 {
1521 printf("void vbroadcastf128(const Ymm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, T_0F38 | T_66 | T_W0 | T_YMM, 0x1A); }\n");
1522 printf("void vbroadcasti128(const Ymm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, T_0F38 | T_66 | T_W0 | T_YMM, 0x5A); }\n");
1523 printf("void vbroadcastsd(const Ymm& y, const Operand& op) { if (!op.isMEM() && !(y.isYMM() && op.isXMM()) && !(y.isZMM() && op.isXMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_XM_IMM(y, op, T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_EW1 | T_N8, 0x19); }\n");
1524 const struct Tbl {
1525 const char *name;
1526 uint8_t code;
1527 int type;
1528 bool ew1;
1529 } tbl[] = {
1530 { "vbroadcastss", 0x18, T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_N4 },
1531 { "vpbroadcastb", 0x78, T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_N1 },
1532 { "vpbroadcastw", 0x79, T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_N2 },
1533 { "vpbroadcastd", 0x58, T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_N4 },
1534 { "vpbroadcastq", 0x59, T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_EW1 | T_N8 },
1535 };
1536 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1537 const Tbl& p = tbl[i];
1538 std::string type = type2String(p.type);
1539 printf("void %s(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_XM_IMM(x, op, %s, 0x%02X); }\n", p.name, type.c_str(), p.code);
1540 }
1541
1542 puts("void vextractf128(const Operand& op, const Ymm& y, uint8_t imm) { if (!(op.isXMEM() && y.isYMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y, 0, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x19, imm); }");
1543 puts("void vextracti128(const Operand& op, const Ymm& y, uint8_t imm) { if (!(op.isXMEM() && y.isYMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y, 0, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x39, imm); }");
1544 puts("void vextractps(const Operand& op, const Xmm& x, uint8_t imm) { if (!((op.isREG(32) || op.isMEM()) && x.isXMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_N4, 0x17, imm); }");
1545 puts("void vinsertf128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { if (!(y1.isYMM() && y2.isYMM() && op.isXMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x18, imm); }");
1546 puts("void vinserti128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { if (!(y1.isYMM() && y2.isYMM() && op.isXMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x38, imm); }");
1547 puts("void vperm2f128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { if (!(y1.isYMM() && y2.isYMM() && op.isYMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x06, imm); }");
1548 puts("void vperm2i128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { if (!(y1.isYMM() && y2.isYMM() && op.isYMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x46, imm); }");
1549
1550 puts("void vlddqu(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, cvtIdx0(x), addr, T_0F | T_F2 | T_W0 | T_YMM, 0xF0); }");
1551 puts("void vldmxcsr(const Address& addr) { opAVX_X_X_XM(xm2, xm0, addr, T_0F, 0xAE); }");
1552 puts("void vstmxcsr(const Address& addr) { opAVX_X_X_XM(xm3, xm0, addr, T_0F, 0xAE); }");
1553 puts("void vmaskmovdqu(const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x1, xm0, x2, T_0F | T_66, 0xF7); }");
1554
1555 puts("void vpextrb(const Operand& op, const Xmm& x, uint8_t imm) { if (!((op.isREG(8|16|i32e) || op.isMEM()) && x.isXMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x, 0, op, T_0F3A | T_66 | T_EVEX | T_N1, 0x14, imm); }");
1556 puts("void vpextrw(const Operand& op, const Xmm& x, uint8_t imm) { if (!((op.isREG(16|i32e) || op.isMEM()) && x.isXMM())) XBYAK_THROW(ERR_BAD_COMBINATION) if (op.isREG() && x.getIdx() < 16) { opAVX_X_X_XM(Xmm(op.getIdx()), xm0, x, T_0F | T_66, 0xC5, imm); } else { opVex(x, 0, op, T_0F3A | T_66 | T_EVEX | T_N2, 0x15, imm); } }");
1557 puts("void vpextrd(const Operand& op, const Xmm& x, uint8_t imm) { if (!((op.isREG(32) || op.isMEM()) && x.isXMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N4, 0x16, imm); }");
1558 puts("void vpextrq(const Operand& op, const Xmm& x, uint8_t imm) { if (!((op.isREG(64) || op.isMEM()) && x.isXMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x, 0, op, T_0F3A | T_66 | T_W1 | T_EVEX | T_EW1 | T_N8, 0x16, imm); }");
1559
1560 puts("void vpinsrb(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(32) || op.isMEM()))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x1, &x2, op, T_0F3A | T_66 | T_EVEX | T_N1, 0x20, imm); }");
1561 puts("void vpinsrw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(32) || op.isMEM()))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x1, &x2, op, T_0F | T_66 | T_EVEX | T_N2, 0xC4, imm); }");
1562 puts("void vpinsrd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(32) || op.isMEM()))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x1, &x2, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N4, 0x22, imm); }");
1563 puts("void vpinsrq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(64) || op.isMEM()))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x1, &x2, op, T_0F3A | T_66 | T_W1 | T_EVEX | T_EW1 | T_N8, 0x22, imm); }");
1564
1565 puts("void vpmovmskb(const Reg32e& r, const Xmm& x) { if (!x.is(Operand::XMM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x.isYMM() ? Ymm(r.getIdx()) : Xmm(r.getIdx()), 0, x, T_0F | T_66 | T_YMM, 0xD7); }");
1566
1567 }
1568 // (x, x, imm), (x, imm)
1569 {
1570 const struct Tbl {
1571 const char *name;
1572 uint8_t code;
1573 int idx;
1574 int type;
1575 } tbl[] = {
1576 { "pslldq", 0x73, 7, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX },
1577 { "psrldq", 0x73, 3, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX },
1578 { "psllw", 0x71, 6, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX },
1579 { "pslld", 0x72, 6, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX | T_EW0 | T_B32 },
1580 { "psllq", 0x73, 6, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX | T_EW1 | T_B64 },
1581 { "psraw", 0x71, 4, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX },
1582 { "psrad", 0x72, 4, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX | T_EW0 | T_B32 },
1583 { "psrlw", 0x71, 2, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX },
1584 { "psrld", 0x72, 2, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX | T_EW0 | T_B32 },
1585 { "psrlq", 0x73, 2, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX | T_EW1 | T_B64 },
1586 };
1587 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1588 const Tbl& p = tbl[i];
1589 std::string type = type2String(p.type);
1590 printf("void v%s(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), %d), x, op, %s, 0x%02X, imm); }\n", p.name, p.idx, type.c_str(), p.code);
1591 }
1592 }
1593 // 4-op
1594 {
1595 const struct Tbl {
1596 const char *name;
1597 uint8_t code;
1598 } tbl[] = {
1599 { "vblendvpd", 0x4B },
1600 { "vblendvps", 0x4A },
1601 { "vpblendvb", 0x4C },
1602 };
1603 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1604 const Tbl& p = tbl[i];
1605 printf("void %s(const Xmm& x1, const Xmm& x2, const Operand& op, const Xmm& x4) { opAVX_X_X_XM(x1, x2, op, T_0F3A | T_66 | T_YMM, 0x%02X, x4.getIdx() << 4); }\n", p.name, p.code);
1606 }
1607 }
1608 // mov
1609 {
1610 printf("void vmovd(const Xmm& x, const Operand& op) { if (!op.isREG(32) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, xm0, op, T_0F | T_66 | T_W0 | T_EVEX | T_N4, 0x6E); }\n");
1611 printf("void vmovd(const Operand& op, const Xmm& x) { if (!op.isREG(32) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, xm0, op, T_0F | T_66 | T_W0 | T_EVEX | T_N4, 0x7E); }\n");
1612
1613 printf("void vmovq(const Xmm& x, const Address& addr) { int type, code; if (x.getIdx() < 16) { type = T_0F | T_F3; code = 0x7E; } else { type = T_0F | T_66 | T_EVEX | T_EW1 | T_N8; code = 0x6E; } opAVX_X_X_XM(x, xm0, addr, type, code); }\n");
1614 printf("void vmovq(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, T_0F | T_66 | T_EVEX | T_EW1 | T_N8, x.getIdx() < 16 ? 0xD6 : 0x7E); }\n");
1615 printf("void vmovq(const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x1, xm0, x2, T_0F | T_F3 | T_EVEX | T_EW1 | T_N8, 0x7E); }\n");
1616
1617 printf("void vmovhlps(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x1, x2, op, T_0F | T_EVEX | T_EW0, 0x12); }\n");
1618 printf("void vmovlhps(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x1, x2, op, T_0F | T_EVEX | T_EW0, 0x16); }\n");
1619
1620 printf("void vmovmskpd(const Reg& r, const Xmm& x) { if (!r.isBit(i32e)) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x.isXMM() ? Xmm(r.getIdx()) : Ymm(r.getIdx()), cvtIdx0(x), x, T_0F | T_66 | T_W0 | T_YMM, 0x50); }\n");
1621 printf("void vmovmskps(const Reg& r, const Xmm& x) { if (!r.isBit(i32e)) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x.isXMM() ? Xmm(r.getIdx()) : Ymm(r.getIdx()), cvtIdx0(x), x, T_0F | T_W0 | T_YMM, 0x50); }\n");
1622
1623 puts("void vmovntdq(const Address& addr, const Xmm& x) { opVex(x, 0, addr, T_0F | T_66 | T_YMM | T_EVEX | T_EW0, 0xE7); }");
1624 puts("void vmovntpd(const Address& addr, const Xmm& x) { opVex(x, 0, addr, T_0F | T_66 | T_YMM | T_EVEX | T_EW1, 0x2B); }");
1625 puts("void vmovntps(const Address& addr, const Xmm& x) { opVex(x, 0, addr, T_0F | T_YMM | T_EVEX | T_EW0, 0x2B); }");
1626 puts("void vmovntdqa(const Xmm& x, const Address& addr) { opVex(x, 0, addr, T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0, 0x2A); }");
1627
1628 // vmovsd, vmovss
1629 for (int i = 0; i < 2; i++) {
1630 char c1 = i == 0 ? 'd' : 's';
1631 int type = T_0F | T_EVEX;
1632 type |= i == 0 ? (T_F2 | T_EW1 | T_N8) : (T_F3 | T_EW0 | T_N4);
1633 std::string s = type2String(type);
1634 printf("void vmovs%c(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x1, x2, op, %s, 0x10); }\n", c1, s.c_str());
1635 printf("void vmovs%c(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, %s, 0x10); }\n", c1, s.c_str());
1636 printf("void vmovs%c(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, %s | T_M_K, 0x11); }\n", c1, s.c_str());
1637 }
1638 }
1639 // cvt
1640 {
1641 puts("void vcvtss2si(const Reg32& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F3 | T_W0 | T_EVEX | T_EW0 | T_ER_X | T_N8, 0x2D); }");
1642 puts("void vcvttss2si(const Reg32& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F3 | T_W0 | T_EVEX | T_EW0 | T_SAE_X | T_N8, 0x2C); }");
1643 puts("void vcvtsd2si(const Reg32& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F2 | T_W0 | T_EVEX | T_EW0 | T_N4 | T_ER_X, 0x2D); }");
1644 puts("void vcvttsd2si(const Reg32& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F2 | T_W0 | T_EVEX | T_EW0 | T_N4 | T_SAE_X, 0x2C); }");
1645
1646 puts("void vcvtsi2ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt3(x1, x2, op, T_0F | T_F3 | T_EVEX | T_ER_X, T_W1 | T_EW1 | T_N8, T_W0 | T_EW0 | T_N4, 0x2A); }");
1647 puts("void vcvtsi2sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt3(x1, x2, op, T_0F | T_F2 | T_EVEX, T_W1 | T_EW1 | T_ER_X | T_N8, T_W0 | T_EW0 | T_N4, 0x2A); }");
1648
1649
1650 puts("void vcvtps2pd(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_0F | T_YMM | T_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL | T_SAE_Y, 0x5A); }");
1651 puts("void vcvtdq2pd(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_0F | T_F3 | T_YMM | T_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL, 0xE6); }");
1652
1653 puts("void vcvtpd2ps(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64 | T_ER_Z, 0x5A); }");
1654 puts("void vcvtpd2dq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_F2 | T_YMM | T_EVEX | T_EW1 | T_B64 | T_ER_Z, 0xE6); }");
1655
1656 puts("void vcvttpd2dq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_66 | T_0F | T_YMM | T_EVEX |T_EW1 | T_B64 | T_ER_Z, 0xE6); }");
1657
1658 puts("void vcvtph2ps(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_0F38 | T_66 | T_W0 | T_EVEX | T_EW0 | T_N8 | T_N_VL | T_SAE_Y, 0x13); }");
1659 puts("void vcvtps2ph(const Operand& op, const Xmm& x, uint8_t imm) { checkCvt1(x, op); opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N8 | T_N_VL | T_SAE_Y | T_M_K, 0x1D, imm); }");
1660
1661 }
1662 // haswell gpr(reg, reg, r/m)
1663 {
1664 const struct Tbl {
1665 const char *name;
1666 int type;
1667 uint8_t code;
1668 } tbl[] = {
1669 { "andn", T_0F38, 0xF2 },
1670 { "mulx", T_F2 | T_0F38, 0xF6 },
1671 { "pdep", T_F2 | T_0F38, 0xF5 },
1672 { "pext", T_F3 | T_0F38, 0xF5 },
1673 };
1674 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1675 const Tbl& p = tbl[i];
1676 printf("void %s(const Reg32e& r1, const Reg32e& r2, const Operand& op) { opGpr(r1, r2, op, %s, 0x%x, true); }\n", p.name, type2String(p.type).c_str(), p.code);
1677 }
1678 }
1679 // gpr(reg, r/m, reg)
1680 {
1681 const struct Tbl {
1682 const char *name;
1683 int type;
1684 uint8_t code;
1685 } tbl[] = {
1686 { "bextr", T_0F38, 0xF7 },
1687 { "bzhi", T_0F38, 0xF5 },
1688 { "sarx", T_0F38 | T_F3, 0xF7 },
1689 { "shlx", T_0F38 | T_66, 0xF7 },
1690 { "shrx", T_0F38 | T_F2, 0xF7 },
1691 };
1692 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1693 const Tbl& p = tbl[i];
1694 printf("void %s(const Reg32e& r1, const Operand& op, const Reg32e& r2) { opGpr(r1, op, r2, %s, 0x%x, false); }\n", p.name, type2String(p.type).c_str(), p.code);
1695 }
1696 puts("void rorx(const Reg32e& r, const Operand& op, uint8_t imm) { opGpr(r, op, Reg32e(0, r.getBit()), T_0F3A | T_F2, 0xF0, false, imm); }");
1697 }
1698 // gpr(reg, r/m)
1699 {
1700 const struct Tbl {
1701 const char *name;
1702 int type;
1703 uint8_t code;
1704 uint8_t idx;
1705 } tbl[] = {
1706 { "blsi", T_0F38, 0xF3, 3 },
1707 { "blsmsk", T_0F38, 0xF3, 2 },
1708 { "blsr", T_0F38, 0xF3, 1 },
1709 };
1710 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1711 const Tbl& p = tbl[i];
1712 printf("void %s(const Reg32e& r, const Operand& op) { opGpr(Reg32e(%d, r.getBit()), op, r, %s, 0x%x, false); }\n", p.name, p.idx, type2String(p.type).c_str(), p.code);
1713 }
1714 }
1715 // gather
1716 {
1717 const int y_vx_y = 0;
1718 const int y_vy_y = 1;
1719 const int x_vy_x = 2;
1720 const struct Tbl {
1721 const char *name;
1722 uint8_t code;
1723 int w;
1724 int mode;
1725 } tbl[] = {
1726 { "vgatherdpd", 0x92, 1, y_vx_y },
1727 { "vgatherqpd", 0x93, 1, y_vy_y },
1728 { "vgatherdps", 0x92, 0, y_vy_y },
1729 { "vgatherqps", 0x93, 0, x_vy_x },
1730 { "vpgatherdd", 0x90, 0, y_vy_y },
1731 { "vpgatherqd", 0x91, 0, x_vy_x },
1732 { "vpgatherdq", 0x90, 1, y_vx_y },
1733 { "vpgatherqq", 0x91, 1, y_vy_y },
1734 };
1735 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1736 const Tbl& p = tbl[i];
1737 printf("void %s(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W%d, 0x%x, %d); }\n", p.name, p.w, p.code, p.mode);
1738 }
1739 }
1740 // vnni
1741 {
1742 const struct Tbl {
1743 uint8_t code;
1744 const char *name;
1745 int type;
1746 } tbl[] = {
1747 { 0x50, "vpdpbusd", T_66 | T_0F38 | T_YMM | T_EW0 | T_SAE_Z | T_B32},
1748 { 0x51, "vpdpbusds", T_66 | T_0F38 | T_YMM | T_EW0 | T_SAE_Z | T_B32},
1749 { 0x52, "vpdpwssd", T_66 | T_0F38 | T_YMM | T_EW0 | T_SAE_Z | T_B32},
1750 { 0x53, "vpdpwssds", T_66 | T_0F38 | T_YMM | T_EW0 | T_SAE_Z | T_B32},
1751 };
1752 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1753 const Tbl *p = &tbl[i];
1754 std::string type = type2String(p->type);
1755 printf("void %s(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opVnni(x1, x2, op, %s, 0x%02X, encoding); }\n", p->name, type.c_str(), p->code);
1756 }
1757 }
1758 }
1759
put32()1760 void put32()
1761 {
1762 put_jREGz("cx", true);
1763 put_jREGz("ecx", false);
1764
1765 const GenericTbl tbl[] = {
1766 { "aaa", 0x37 },
1767 { "aad", 0xD5, 0x0A },
1768 { "aam", 0xD4, 0x0A },
1769 { "aas", 0x3F },
1770 { "daa", 0x27 },
1771 { "das", 0x2F },
1772 { "into", 0xCE },
1773 { "popad", 0x61 },
1774 { "popfd", 0x9D },
1775 { "pusha", 0x60 },
1776 { "pushad", 0x60 },
1777 { "pushfd", 0x9C },
1778 { "popa", 0x61 },
1779 };
1780 putGeneric(tbl, NUM_OF_ARRAY(tbl));
1781 putLoadSeg("lds", 0xC5, NONE);
1782 putLoadSeg("les", 0xC4, NONE);
1783 }
1784
put64()1785 void put64()
1786 {
1787 put_jREGz("ecx", true);
1788 put_jREGz("rcx", false);
1789
1790 const GenericTbl tbl[] = {
1791 { "cdqe", 0x48, 0x98 },
1792 { "cqo", 0x48, 0x99 },
1793 { "cmpsq", 0x48, 0xA7 },
1794 { "popfq", 0x9D },
1795 { "pushfq", 0x9C },
1796 { "lodsq", 0x48, 0xAD },
1797 { "movsq", 0x48, 0xA5 },
1798 { "scasq", 0x48, 0xAF },
1799 { "stosq", 0x48, 0xAB },
1800 { "syscall", 0x0F, 0x05 },
1801 { "sysret", 0x0F, 0x07 },
1802 };
1803 putGeneric(tbl, NUM_OF_ARRAY(tbl));
1804
1805 putMemOp("cmpxchg16b", 0, 1, 0x0F, 0xC7, 64);
1806 putMemOp("fxrstor64", 0, 1, 0x0F, 0xAE, 64);
1807 puts("void movq(const Reg64& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opModR(mmx, reg, 0x0F, 0x7E); }");
1808 puts("void movq(const Mmx& mmx, const Reg64& reg) { if (mmx.isXMM()) db(0x66); opModR(mmx, reg, 0x0F, 0x6E); }");
1809 puts("void movsxd(const Reg64& reg, const Operand& op) { if (!op.isBit(32)) XBYAK_THROW(ERR_BAD_COMBINATION) opModRM(reg, op, op.isREG(), op.isMEM(), 0x63); }");
1810 puts("void pextrq(const Operand& op, const Xmm& xmm, uint8_t imm) { if (!op.isREG(64) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opGen(Reg64(xmm.getIdx()), op, 0x16, 0x66, 0, imm, 0x3A); }");
1811 puts("void pinsrq(const Xmm& xmm, const Operand& op, uint8_t imm) { if (!op.isREG(64) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opGen(Reg64(xmm.getIdx()), op, 0x22, 0x66, 0, imm, 0x3A); }");
1812
1813 puts("void vcvtss2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F3 | T_W1 | T_EVEX | T_EW1 | T_ER_X | T_N8, 0x2D); }");
1814 puts("void vcvttss2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F3 | T_W1 | T_EVEX | T_EW1 | T_SAE_X | T_N8, 0x2C); }");
1815 puts("void vcvtsd2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F2 | T_W1 | T_EVEX | T_EW1 | T_N4 | T_ER_X, 0x2D); }");
1816 puts("void vcvttsd2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F2 | T_W1 | T_EVEX | T_EW1 | T_N4 | T_SAE_X, 0x2C); }");
1817
1818 puts("void vmovq(const Xmm& x, const Reg64& r) { opAVX_X_X_XM(x, xm0, Xmm(r.getIdx()), T_66 | T_0F | T_W1 | T_EVEX | T_EW1, 0x6E); }");
1819 puts("void vmovq(const Reg64& r, const Xmm& x) { opAVX_X_X_XM(x, xm0, Xmm(r.getIdx()), T_66 | T_0F | T_W1 | T_EVEX | T_EW1, 0x7E); }");
1820 }
1821
putAMX_TILE()1822 void putAMX_TILE()
1823 {
1824 puts("void ldtilecfg(const Address& addr) { opVex(tmm0, &tmm0, addr, T_0F38 | T_W0, 0x49); }");
1825 puts("void sttilecfg(const Address& addr) { opVex(tmm0, &tmm0, addr, T_66 | T_0F38 | T_W0, 0x49); }");
1826 puts("void tileloadd(const Tmm& tm, const Address& addr) { opAMX(tm, addr, T_F2 | T_0F38 | T_W0, 0x4b); }");
1827 puts("void tileloaddt1(const Tmm& tm, const Address& addr) { opAMX(tm, addr, T_66 | T_0F38 | T_W0, 0x4b); }");
1828 puts("void tilerelease() { db(0xc4); db(0xe2); db(0x78); db(0x49); db(0xc0); }");
1829 puts("void tilestored(const Address& addr, const Tmm& tm) { opVex(tm, &tmm0, addr, T_F3 | T_0F38 | T_W0, 0x4b); }");
1830 puts("void tilezero(const Tmm& Tmm) { opVex(Tmm, &tmm0, tmm0, T_F2 | T_0F38 | T_W0, 0x49); }");
1831 }
putAMX_INT8()1832 void putAMX_INT8()
1833 {
1834 puts("void tdpbssd(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_F2 | T_0F38 | T_W0, 0x5e); }");
1835 puts("void tdpbsud(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_F3 | T_0F38 | T_W0, 0x5e); }");
1836 puts("void tdpbusd(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_66 | T_0F38 | T_W0, 0x5e); }");
1837 puts("void tdpbuud(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_0F38 | T_W0, 0x5e); }");
1838 }
putAMX_BF16()1839 void putAMX_BF16()
1840 {
1841 puts("void tdpbf16ps(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_F3 | T_0F38 | T_W0, 0x5c); }");
1842 }
1843
putFixed()1844 void putFixed()
1845 {
1846 puts("#ifdef XBYAK64");
1847 put64();
1848 putAMX_TILE();
1849 putAMX_INT8();
1850 putAMX_BF16();
1851 puts("#else");
1852 put32();
1853 puts("#endif");
1854 puts("#ifndef XBYAK_NO_OP_NAMES");
1855 const char *tbl[] = {
1856 "and", "or", "xor",
1857 };
1858 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1859 const char *name = tbl[i];
1860 printf("void %s(const Operand& op1, const Operand& op2) { %s_(op1, op2); }\n", name, name);
1861 printf("void %s(const Operand& op, uint32_t imm) { %s_(op, imm); }\n", name, name);
1862 }
1863 puts("void not(const Operand& op) { not_(op); }");
1864 puts("#endif");
1865 }
1866
putOmit()1867 void putOmit()
1868 {
1869 puts("void vpinsrb(const Xmm& x, const Operand& op, uint8_t imm) { vpinsrb(x, x, op, imm); }");
1870 puts("void vpinsrd(const Xmm& x, const Operand& op, uint8_t imm) { vpinsrd(x, x, op, imm); }");
1871 puts("void vpinsrq(const Xmm& x, const Operand& op, uint8_t imm) { vpinsrq(x, x, op, imm); }");
1872 puts("void vpinsrw(const Xmm& x, const Operand& op, uint8_t imm) { vpinsrw(x, x, op, imm); }");
1873
1874 puts("void vcvtsi2sd(const Xmm& x, const Operand& op) { vcvtsi2sd(x, x, op); }");
1875 puts("void vcvtsi2ss(const Xmm& x, const Operand& op) { vcvtsi2ss(x, x, op); }");
1876 {
1877 const char pred[32][16] = {
1878 "eq", "lt", "le", "unord", "neq", "nlt", "nle", "ord",
1879 "eq_uq", "nge", "ngt", "false", "neq_oq", "ge", "gt",
1880 "true", "eq_os", "lt_oq", "le_oq", "unord_s", "neq_us", "nlt_uq", "nle_uq", "ord_s",
1881 "eq_us", "nge_uq", "ngt_uq", "false_os", "neq_os", "ge_oq", "gt_oq", "true_us"
1882 };
1883 const char suf[][4] = { "pd", "ps", "sd", "ss" };
1884 for (int i = 0; i < 4; i++) {
1885 const char *s = suf[i];
1886 for (int j = 0; j < 32; j++) {
1887 printf("void vcmp%s%s(const Xmm& x, const Operand& op) { vcmp%s%s(x, x, op); }\n", pred[j], s, pred[j], s);
1888 }
1889 }
1890 }
1891 {
1892 const char *tbl[] = {
1893 "pslldq",
1894 "psrldq",
1895 "psllw",
1896 "pslld",
1897 "psllq",
1898 "psraw",
1899 "psrad",
1900 "psrlw",
1901 "psrld",
1902 "psrlq",
1903 };
1904 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1905 const char *name = tbl[i];
1906 printf("void v%s(const Xmm& x, uint8_t imm) { v%s(x, x, imm); }\n", name, name);
1907 }
1908 }
1909 {
1910 const char *tbl[] = {
1911 "vblendvpd",
1912 "vblendvps",
1913 "vpblendvb",
1914 };
1915 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1916 const char *name = tbl[i];
1917 printf("void %s(const Xmm& x1, const Operand& op, const Xmm& x4) { %s(x1, x1, op, x4); }\n", name, name);
1918 }
1919 }
1920 putX_X_XM(true);
1921 }
1922
main(int argc,char * argv[])1923 int main(int argc, char *argv[])
1924 {
1925 std::string mode = argc == 2 ? argv[1] : "";
1926 if (mode == "") {
1927 put();
1928 } else if (mode == "fixed") {
1929 putFixed();
1930 } else {
1931 putOmit();
1932 }
1933 }
1934