1 #include <stdio.h>
2 #define XBYAK_NO_OP_NAMES
3 #include "xbyak/xbyak.h"
4 #include "xbyak/xbyak_bin2hex.h"
5 #include <stdlib.h>
6 #include <string.h>
7 #include "cybozu/inttype.hpp"
8 #define NUM_OF_ARRAY(x) (sizeof(x) / sizeof(x[0]))
9
10 using namespace Xbyak;
11
12 const int bitEnd = 64;
13
14 const uint64 MMX = 1ULL << 0;
15 const uint64 _XMM = 1ULL << 1;
16 const uint64 _MEM = 1ULL << 2;
17 const uint64 _REG32 = 1ULL << 3;
18 const uint64 EAX = 1ULL << 4;
19 const uint64 IMM32 = 1ULL << 5;
20 const uint64 IMM8 = 1ULL << 6;
21 const uint64 _REG8 = 1ULL << 7;
22 const uint64 _REG16 = 1ULL << 8;
23 const uint64 NEG8 = 1ULL << 9;
24 const uint64 IMM16 = 1ULL << 10;
25 const uint64 NEG16 = 1ULL << 11;
26 const uint64 AX = 1ULL << 12;
27 const uint64 AL = 1ULL << 13;
28 const uint64 IMM_1 = 1ULL << 14;
29 const uint64 MEM8 = 1ULL << 15;
30 const uint64 MEM16 = 1ULL << 16;
31 const uint64 MEM32 = 1ULL << 17;
32 const uint64 ONE = 1ULL << 19;
33 const uint64 CL = 1ULL << 20;
34 const uint64 MEM_ONLY_DISP = 1ULL << 21;
35 const uint64 NEG32 = 1ULL << 23;
36 const uint64 _YMM = 1ULL << 24;
37 const uint64 VM32X_32 = 1ULL << 39;
38 const uint64 VM32X_64 = 1ULL << 40;
39 const uint64 VM32Y_32 = 1ULL << 41;
40 const uint64 VM32Y_64 = 1ULL << 42;
41 #ifdef XBYAK64
42 const uint64 _MEMe = 1ULL << 25;
43 const uint64 REG32_2 = 1ULL << 26; // r8d, ...
44 const uint64 REG16_2 = 1ULL << 27; // r8w, ...
45 const uint64 REG8_2 = 1ULL << 28; // r8b, ...
46 const uint64 REG8_3 = 1ULL << 29; // spl, ...
47 const uint64 _REG64 = 1ULL << 30; // rax, ...
48 const uint64 _REG64_2 = 1ULL << 31; // r8, ...
49 const uint64 RAX = 1ULL << 32;
50 const uint64 _XMM2 = 1ULL << 33;
51 const uint64 _YMM2 = 1ULL << 34;
52 const uint64 VM32X = VM32X_32 | VM32X_64;
53 const uint64 VM32Y = VM32Y_32 | VM32Y_64;
54 #else
55 const uint64 _MEMe = 0;
56 const uint64 REG32_2 = 0;
57 const uint64 REG16_2 = 0;
58 const uint64 REG8_2 = 0;
59 const uint64 REG8_3 = 0;
60 const uint64 _REG64 = 0;
61 const uint64 _REG64_2 = 0;
62 const uint64 RAX = 0;
63 const uint64 _XMM2 = 0;
64 const uint64 _YMM2 = 0;
65 const uint64 VM32X = VM32X_32;
66 const uint64 VM32Y = VM32Y_32;
67 #endif
68 const uint64 REG64 = _REG64 | _REG64_2 | RAX;
69 const uint64 REG32 = _REG32 | REG32_2 | EAX;
70 const uint64 REG16 = _REG16 | REG16_2 | AX;
71 const uint64 REG32e = REG32 | REG64;
72 const uint64 REG8 = _REG8 | REG8_2|AL;
73 const uint64 MEM = _MEM | _MEMe;
74 const uint64 MEM64 = 1ULL << 35;
75 const uint64 ST0 = 1ULL << 36;
76 const uint64 STi = 1ULL << 37;
77 const uint64 IMM_2 = 1ULL << 38;
78 const uint64 IMM = IMM_1 | IMM_2;
79 const uint64 XMM = _XMM | _XMM2;
80 const uint64 YMM = _YMM | _YMM2;
81 const uint64 K = 1ULL << 43;
82 const uint64 _ZMM = 1ULL << 44;
83 const uint64 _ZMM2 = 1ULL << 45;
84 #ifdef XBYAK64
85 const uint64 ZMM = _ZMM | _ZMM2;
86 const uint64 _YMM3 = 1ULL << 46;
87 #else
88 const uint64 ZMM = _ZMM;
89 const uint64 _YMM3 = 0;
90 #endif
91 const uint64 K2 = 1ULL << 47;
92 const uint64 ZMM_SAE = 1ULL << 48;
93 const uint64 ZMM_ER = 1ULL << 49;
94 #ifdef XBYAK64
95 const uint64 _XMM3 = 1ULL << 50;
96 #endif
97 const uint64 XMM_SAE = 1ULL << 51;
98 #ifdef XBYAK64
99 const uint64 XMM_KZ = 1ULL << 52;
100 const uint64 YMM_KZ = 1ULL << 53;
101 const uint64 ZMM_KZ = 1ULL << 54;
102 #else
103 const uint64 XMM_KZ = 0;
104 const uint64 YMM_KZ = 0;
105 const uint64 ZMM_KZ = 0;
106 #endif
107 const uint64 MEM_K = 1ULL << 55;
108 const uint64 M_1to2 = 1ULL << 56;
109 const uint64 M_1to4 = 1ULL << 57;
110 const uint64 M_1to8 = 1ULL << 58;
111 const uint64 M_1to16 = 1ULL << 59;
112 const uint64 XMM_ER = 1ULL << 60;
113 const uint64 M_xword = 1ULL << 61;
114 const uint64 M_yword = 1ULL << 62;
115 const uint64 MY_1to4 = 1ULL << 18;
116 const uint64 BNDREG = 1ULL << 22;
117
118 const uint64 NOPARA = 1ULL << (bitEnd - 1);
119
120 class Test {
121 Test(const Test&);
122 void operator=(const Test&);
123 const bool isXbyak_;
124 int funcNum_;
125 /*
126 and_, or_, xor_, not_ => and, or, xor, not
127 */
removeUnderScore(std::string s) const128 std::string removeUnderScore(std::string s) const
129 {
130 if (!isXbyak_ && s[s.size() - 1] == '_') s.resize(s.size() - 1);
131 return s;
132 }
133
134 // check all op1, op2, op3
put(const std::string & nm,uint64 op1=NOPARA,uint64 op2=NOPARA,uint64 op3=NOPARA,uint64 op4=NOPARA) const135 void put(const std::string& nm, uint64 op1 = NOPARA, uint64 op2 = NOPARA, uint64 op3 = NOPARA, uint64 op4 = NOPARA) const
136 {
137 for (int i = 0; i < bitEnd; i++) {
138 if ((op1 & (1ULL << i)) == 0) continue;
139 for (int j = 0; j < bitEnd; j++) {
140 if ((op2 & (1ULL << j)) == 0) continue;
141 for (int k = 0; k < bitEnd; k++) {
142 if ((op3 & (1ULL << k)) == 0) continue;
143 for (int s = 0; s < bitEnd; s++) {
144 if ((op4 & (1ULL << s)) == 0) continue;
145 printf("%s ", nm.c_str());
146 if (isXbyak_) printf("(");
147 if (!(op1 & NOPARA)) printf("%s", get(1ULL << i));
148 if (!(op2 & NOPARA)) printf(", %s", get(1ULL << j));
149 if (!(op3 & NOPARA)) printf(", %s", get(1ULL << k));
150 if (!(op4 & NOPARA)) printf(", %s", get(1ULL << s));
151 if (isXbyak_) printf("); dump();");
152 printf("\n");
153 }
154 }
155 }
156 }
157 }
put(const char * nm,uint64 op,const char * xbyak,const char * nasm) const158 void put(const char *nm, uint64 op, const char *xbyak, const char *nasm) const
159 {
160 for (int i = 0; i < bitEnd; i++) {
161 if ((op & (1ULL << i)) == 0) continue;
162 printf("%s ", nm);
163 if (isXbyak_) printf("(");
164 if (!(op & NOPARA)) printf("%s", get(1ULL << i));
165 printf(", %s", isXbyak_ ? xbyak : nasm);
166 if (isXbyak_) printf("); dump();");
167 printf("\n");
168 }
169 }
put(const char * nm,const char * xbyak,const char * nasm=0,uint64 op=NOPARA) const170 void put(const char *nm, const char *xbyak, const char *nasm = 0, uint64 op = NOPARA) const
171 {
172 if (nasm == 0) nasm = xbyak;
173 for (int i = 0; i < bitEnd; i++) {
174 if ((op & (1ULL << i)) == 0) continue;
175 printf("%s ", nm);
176 if (isXbyak_) printf("(");
177 printf("%s ", isXbyak_ ? xbyak : nasm);
178 if (!(op & NOPARA)) printf(", %s", get(1ULL << i));
179 if (isXbyak_) printf("); dump();");
180 printf("\n");
181 }
182 }
get(uint64 type) const183 const char *get(uint64 type) const
184 {
185 int idx = (rand() / 31) & 7;
186 if (type == ST0) {
187 return "st0";
188 }
189 if (type == STi) {
190 return "st2";
191 }
192 switch (type) {
193 case MMX:
194 {
195 static const char MmxTbl[][4] = {
196 "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7"
197 };
198 return MmxTbl[idx];
199 }
200 case _XMM:
201 {
202 static const char tbl[][6] = {
203 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",
204 };
205 return tbl[idx];
206 }
207 case _YMM:
208 {
209 static const char tbl[][6] = {
210 "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7"
211 };
212 return tbl[idx];
213 }
214 case _ZMM:
215 {
216 static const char tbl[][6] = {
217 "zmm0", "zmm1", "zmm2", "zmm3", "zmm4", "zmm5", "zmm6", "zmm7"
218 };
219 return tbl[idx];
220 }
221 #ifdef XBYAK64
222 case _XMM2:
223 {
224 static const char tbl[][6] = {
225 "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15"
226 };
227 return tbl[idx];
228 }
229 case _XMM3:
230 {
231 static const char tbl[][6] = {
232 "xmm16", "xmm17", "xmm18", "xmm19", "xmm20", "xmm21", "xmm22", "xmm23"
233 };
234 return tbl[idx];
235 }
236 case _YMM2:
237 {
238 static const char tbl[][6] = {
239 "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15",
240 };
241 return tbl[idx];
242 }
243 case _YMM3:
244 {
245 static const char tbl[][6] = {
246 "ymm16", "ymm17", "ymm18", "ymm19", "ymm20", "ymm21", "ymm22", "ymm23",
247 };
248 return tbl[idx];
249 }
250 case _ZMM2:
251 {
252 static const char tbl[][6] = {
253 "zmm8", "zmm9", "zmm10", "zmm11", "zmm28", "zmm29", "zmm30", "zmm31",
254 };
255 return tbl[idx];
256 }
257 #endif
258 case _MEM:
259 {
260 return isXbyak_ ? "ptr[eax+ecx+3]" : "[eax+ecx+3]"; // QQQ : disp8N
261 /*
262 idx %= 5;
263 switch (idx) {
264 case 0: return isXbyak_ ? "ptr[eax+ecx]" : "[eax+ecx]";
265 case 1: return isXbyak_ ? "ptr[eax+ecx+1]" : "[eax+ecx+1]";
266 case 2: return isXbyak_ ? "ptr[eax+ecx+16]" : "[eax+ecx+16]";
267 case 3: return isXbyak_ ? "ptr[eax+ecx+32]" : "[eax+ecx+32]";
268 case 4: return isXbyak_ ? "ptr[eax+ecx+48]" : "[eax+ecx+48]";
269 }
270 */
271 }
272 case _MEMe:
273 {
274 static int ccc = 1;
275 #ifdef USE_YASM
276 ccc++;
277 #endif
278 if (ccc & 1) {
279 return isXbyak_ ? "ptr[rdx+r15+0x12]" : "[rdx+r15+0x12]";
280 } else {
281 return isXbyak_ ? "ptr[rip - 0x13456+1-3]" : "[rip - 0x13456+1-3]";
282 }
283 }
284 case MEM8:
285 return "byte [eax+edx]";
286 case MEM16:
287 return "word [esi]";
288 case MEM32:
289 return "dword [ebp*2]";
290 case MEM64:
291 return "qword [eax+ecx*8]";
292 case MEM_ONLY_DISP:
293 return isXbyak_ ? "ptr[(void*)0x123]" : "[0x123]";
294 case _REG16: // not ax
295 {
296 static const char Reg16Tbl[][4] = {
297 "ax", "cx", "dx", "bx", "sp", "bp", "si", "di"
298 };
299 return Reg16Tbl[(idx % 7) + 1];
300 }
301 case _REG8: // not al
302 {
303 static const char Reg8Tbl[][4] = {
304 #ifdef XBYAK64 // QQQ
305 "al", "cl", "dl", "bl", "al", "cl", "dl", "bl"
306 #else
307 "al", "cl", "dl", "bl", "ah", "ch", "dh", "bh"
308 #endif
309 };
310 return Reg8Tbl[(idx % 7) + 1];
311 }
312 case _REG32: // not eax
313 {
314 static const char Reg32Tbl[][4] = {
315 "eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi"
316 };
317 return Reg32Tbl[(idx % 7) + 1];
318 }
319 #ifdef XBYAK64
320 case _REG64: // not rax
321 {
322 static const char Reg64Tbl[][4] = {
323 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi"
324 };
325 return Reg64Tbl[(idx % 7) + 1];
326 }
327 case _REG64_2:
328 {
329 static const char Reg64_2Tbl[][4] = {
330 "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
331 };
332 return Reg64_2Tbl[idx];
333 }
334 case REG32_2:
335 {
336 static const char Reg32eTbl[][5] = {
337 "r8d", "r9d", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d"
338 };
339 return Reg32eTbl[idx];
340 }
341 case REG16_2:
342 {
343 static const char Reg16eTbl[][5] = {
344 "r8w", "r9w", "r10w", "r11w", "r12w", "r13w", "r14w", "r15w"
345 };
346 return Reg16eTbl[idx];
347 }
348 case REG8_2:
349 {
350 static const char Reg8_2Tbl[][5] = {
351 "r8b", "r9b", "r10b", "r11b", "r12b", "r13b", "r14b", "r15b"
352 };
353 return Reg8_2Tbl[idx];
354 }
355 case REG8_3:
356 {
357 static const char Reg8_3Tbl[][5] = {
358 "spl", "bpl", "sil", "dil", "spl", "bpl", "sil", "dil"
359 };
360 return Reg8_3Tbl[idx];
361 }
362 case RAX:
363 return "rax";
364 #endif
365 case EAX:
366 return "eax";
367 case AX:
368 return "ax";
369 case AL:
370 return "al";
371 case CL:
372 return "cl";
373 case ONE:
374 return "1";
375 case IMM32:
376 return isXbyak_ ? "12345678" : "dword 12345678";
377 case IMM16:
378 return isXbyak_ ? "1000" : "word 1000";
379 case IMM8:
380 return isXbyak_ ? "4" : "byte 4";
381 case NEG8:
382 return isXbyak_ ? "-30" : "byte -30";
383 case NEG16:
384 return isXbyak_ ? "-1000" : "word -1000";
385 case NEG32:
386 return isXbyak_ ? "-100000" : "dword -100000";
387 case IMM_1:
388 return "4";
389 case IMM_2:
390 return isXbyak_ ? "0xda" : "0xda";
391 case VM32X_32:
392 return isXbyak_ ? "ptr [ebp+4+xmm1*8]" : "[ebp+4+xmm1*8]";
393 case VM32X_64:
394 return isXbyak_ ? "ptr [12345+xmm13*2]" : "[12345+xmm13*2]";
395 case VM32Y_32:
396 return isXbyak_ ? "ptr [ymm4]" : "[ymm4]";
397 case VM32Y_64:
398 return isXbyak_ ? "ptr [12345+ymm13*2+r13]" : "[12345+ymm13*2+r13]";
399 case M_1to2: return isXbyak_ ? "ptr_b [eax+32]" : "[eax+32]{1to2}";
400 case M_1to4: return isXbyak_ ? "ptr_b [eax+32]" : "[eax+32]{1to4}";
401 case M_1to8: return isXbyak_ ? "ptr_b [eax+32]" : "[eax+32]{1to8}";
402 case M_1to16: return isXbyak_ ? "ptr_b [eax+32]" : "[eax+32]{1to16}";
403
404 case M_xword: return isXbyak_ ? "ptr [eax+33]" : "oword [eax+33]";
405 case M_yword: return isXbyak_ ? "yword [eax+33]" : "yword [eax+33]";
406 case MY_1to4: return isXbyak_ ? "yword_b [eax+32]" : "[eax+32]{1to4}";
407 case K:
408 {
409 static const char kTbl[][5] = {
410 "k1", "k2", "k3", "k4", "k5", "k6", "k7",
411 };
412 return kTbl[idx % 7];
413 }
414 case K2:
415 return isXbyak_ ? "k3 | k5" : "k3{k5}";
416 case BNDREG:
417 {
418 static const char tbl[][5] = {
419 "bnd0", "bnd1", "bnd2", "bnd3",
420 };
421 return tbl[idx % 4];
422 }
423 #ifdef XBYAK64
424 case XMM_SAE:
425 return isXbyak_ ? "xmm25 | T_sae" : "xmm25, {sae}";
426 case ZMM_SAE:
427 return isXbyak_ ? "zmm25 | T_sae" : "zmm25, {sae}";
428 case XMM_ER:
429 return isXbyak_ ? "xmm4 | T_rd_sae" : "xmm4, {rd-sae}";
430 case ZMM_ER:
431 return isXbyak_ ? "zmm20 | T_rd_sae" : "zmm20, {rd-sae}";
432 case XMM_KZ:
433 return isXbyak_ ? "xmm5 | k5" : "xmm5{k5}";
434 case YMM_KZ:
435 return isXbyak_ ? "ymm2 |k3|T_z" : "ymm2{k3}{z}";
436 case ZMM_KZ:
437 return isXbyak_ ? "zmm7|k1" : "zmm7{k1}";
438 case MEM_K:
439 return isXbyak_ ? "ptr [rax] | k1" : "[rax]{k1}";
440 #else
441 case XMM_SAE:
442 return isXbyak_ ? "xmm5 | T_sae" : "xmm5, {sae}";
443 case ZMM_SAE:
444 return isXbyak_ ? "zmm5 | T_sae" : "zmm5, {sae}";
445 case XMM_ER:
446 return isXbyak_ ? "xmm30 | T_rd_sae" : "xmm30, {rd-sae}";
447 case ZMM_ER:
448 return isXbyak_ ? "zmm2 | T_rd_sae" : "zmm2, {rd-sae}";
449 case MEM_K:
450 return isXbyak_ ? "ptr [eax] | k1" : "[eax]{k1}";
451 #endif
452 }
453 return 0;
454 }
putSIMPLE() const455 void putSIMPLE() const
456 {
457 const char tbl[][20] = {
458 #ifdef XBYAK64
459 "cdqe",
460 "cqo",
461 "cmpsq",
462 "movsq",
463 "scasq",
464 "stosq",
465 #else
466 "aaa",
467 "aad",
468 "aam",
469 "aas",
470 "daa",
471 "das",
472 "popad",
473 "popfd",
474 "pusha",
475 "pushad",
476 "pushfd",
477 "popa",
478 #endif
479
480 "cbw",
481 "cdq",
482 "clc",
483 "cld",
484 "cli",
485 "cmc",
486
487 "cpuid",
488 "cwd",
489 "cwde",
490
491 "lahf",
492 // "lock",
493 "cmpsb",
494 "cmpsw",
495 "cmpsd",
496 "movsb",
497 "movsw",
498 "movsd",
499 "scasb",
500 "scasw",
501 "scasd",
502 "stosb",
503 "stosw",
504 "stosd",
505 "nop",
506
507 "sahf",
508 "stc",
509 "std",
510 "sti",
511
512 "emms",
513 "pause",
514 "sfence",
515 "lfence",
516 "mfence",
517 "monitor",
518 "mwait",
519
520 "rdmsr",
521 "rdpmc",
522 "rdtsc",
523 "rdtscp",
524 "ud2",
525 "wait",
526 "fwait",
527 "wbinvd",
528 "wrmsr",
529 "xlatb",
530
531 "popf",
532 "pushf",
533 "stac",
534
535 "xgetbv",
536 "vzeroall",
537 "vzeroupper",
538
539 "f2xm1",
540 "fabs",
541 "faddp",
542 "fchs",
543 "fcom",
544 "fcomp",
545 "fcompp",
546 "fcos",
547 "fdecstp",
548 "fdivp",
549 "fdivrp",
550 "fincstp",
551 "finit",
552 "fninit",
553 "fld1",
554 "fldl2t",
555 "fldl2e",
556 "fldpi",
557 "fldlg2",
558 "fldln2",
559 "fldz",
560 "fmulp",
561 "fnop",
562 "fpatan",
563 "fprem",
564 "fprem1",
565 "fptan",
566 "frndint",
567 "fscale",
568 "fsin",
569 "fsincos",
570 "fsqrt",
571 "fsubp",
572 "fsubrp",
573 "ftst",
574 "fucom",
575 "fucomp",
576 "fucompp",
577 "fxam",
578 "fxch",
579 "fxtract",
580 "fyl2x",
581 "fyl2xp1",
582 };
583 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
584 put(tbl[i]);
585 }
586
587 put("bswap", REG32e);
588 put("lea", REG32e|REG16, MEM);
589 put("fldcw", MEM);
590 put("fstcw", MEM);
591 }
putJmp() const592 void putJmp() const
593 {
594 #ifdef XBYAK64
595 put("jmp", REG64);
596 put("call", REG64);
597 #else
598 put("jmp", REG32);
599 put("call", REG16|REG32);
600 #endif
601 put("jmp", MEM);
602 put("jmp", MEM);
603 put("jmp", MEM);
604 put("call", MEM|MEM_ONLY_DISP);
605 #ifndef USE_YASM
606 // call(ptr [getCode() + 5]); means to construct the opecode of "call"
607 // after calling getCode().
608 // Its behavior is same as NASM(MASM). YASM makes different opecode.
609 put("call", "getCode() + 5", "$ + 5");
610 #endif
611
612 #ifdef XBYAK64
613 put("jmp", "ptr[(void*)0x12345678]", "[0x12345678]");
614 put("call", "ptr[(void*)0x12345678]", "[0x12345678]");
615 #ifdef USE_YASM
616 put("jmp", "ptr[rip + 0x12345678]", "[rip+0x12345678]");
617 put("call", "ptr[rip + 0x12345678]", "[rip+0x12345678]");
618 put("call", "ptr[rip -23]", "[rip-23]");
619 put("call", "ptr[rip -23+56]", "[rip-23+56]");
620 #else
621 // bug of yasm?
622 if (isXbyak_) {
623 puts("{ Label label0;");
624 puts("L(label0);");
625 puts("pshufb (xmm14, ptr [rip+label0]); dump();");
626 puts("}");
627 } else {
628 puts("label0:");
629 puts("pshufb xmm14, [rel label0]");
630 }
631 #endif
632 #endif
633 }
putMMX1() const634 void putMMX1() const
635 {
636 // emms etc
637 put("ldmxcsr", MEM);
638 put("movmskps", REG32e, XMM);
639 put("movmskpd", REG32e, XMM);
640 put("stmxcsr", MEM);
641 put("maskmovq", MMX, MMX);
642 put("movntps", MEM, XMM);
643 put("movntq", MEM, MMX);
644 put("prefetcht0", MEM);
645 put("prefetcht1", MEM);
646 put("prefetcht2", MEM);
647 put("prefetchnta", MEM);
648 put("prefetchwt1", MEM);
649 put("prefetchw", MEM);
650
651 // SSE2 misc
652 put("maskmovdqu", XMM, XMM);
653 put("movntpd", MEM, XMM);
654 put("movntdq", MEM, XMM);
655 put("movnti", MEM, REG32); // QQQ:REG32e?
656
657 put("movhlps", XMM, XMM);
658 put("movlhps", XMM, XMM);
659
660 // movd for MMX, XMM
661 put("movd", MEM|MEM32|REG32, MMX|XMM);
662 put("movd", MMX|XMM, MEM|REG32|MEM32);
663
664 // movq for MMX
665 put("movq", MMX, MMX|MEM);
666 put("movq", MEM, MMX);
667 // movq for XMM
668 put("movq", XMM, XMM|MEM);
669 put("movq", MEM, XMM);
670 put("movq", XMM|MMX, "qword[eax]", "qword[eax]");
671 put("movq", XMM|MMX, "ptr[eax]", "qword[eax]");
672 put("movq", "qword[eax]", "qword[eax]", XMM|MMX);
673 put("movq", "ptr[eax]", "qword[eax]", XMM|MMX);
674 #ifdef XBYAK64
675 put("movq", REG64, XMM|MMX);
676 put("movq", XMM|MMX, REG64);
677 #endif
678
679 // SSE3 int
680 put("lddqu", XMM, MEM);
681 }
putMMX2() const682 void putMMX2() const
683 {
684 static const char nmTbl[][16] = {
685 // MMX
686 "packssdw",
687 "packsswb",
688 "packuswb",
689 "pand",
690 "pandn",
691 "pmaddwd",
692 "pmulhuw",
693 "pmulhw",
694 "pmullw",
695 "por",
696 "punpckhbw",
697 "punpckhwd",
698 "punpckhdq",
699 "punpcklbw",
700 "punpcklwd",
701 "punpckldq",
702 "pxor",
703 "paddb",
704 "paddw",
705 "paddd",
706 "paddsb",
707 "paddsw",
708 "paddusb",
709 "paddusw",
710 "pcmpeqb",
711 "pcmpeqw",
712 "pcmpeqd",
713 "pcmpgtb",
714 "pcmpgtw",
715 "pcmpgtd",
716 "psllw",
717 "pslld",
718 "psllq",
719 "psraw",
720 "psrad",
721 "psrlw",
722 "psrld",
723 "psrlq",
724 "psubb",
725 "psubw",
726 "psubd",
727 "psubsb",
728 "psubsw",
729 "psubusb",
730 "psubusw",
731 // MMX2
732 "pavgb",
733 "pavgw",
734 "pmaxsw",
735 "pmaxub",
736 "pminsw",
737 "pminub",
738 "psadbw",
739 //
740 "paddq",
741 "pmuludq",
742 "psubq",
743 };
744 for (size_t i = 0; i < NUM_OF_ARRAY(nmTbl); i++) {
745 put(nmTbl[i], MMX, MMX|MEM);
746 put(nmTbl[i], XMM, XMM|MEM);
747 }
748 }
putMMX3() const749 void putMMX3() const
750 {
751 static const char nmTbl[][16] = {
752 "psllw",
753 "pslld",
754 "psllq",
755 "psraw",
756 "psrad",
757 "psrlw",
758 "psrld",
759 "psrlq",
760 };
761 for (size_t i = 0; i < NUM_OF_ARRAY(nmTbl); i++) {
762 put(nmTbl[i], MMX|XMM, IMM);
763 }
764 put("pslldq", XMM, IMM);
765 put("psrldq", XMM, IMM);
766 put("pmovmskb", REG32, MMX|XMM); // QQQ
767 put("pextrw", REG32, MMX|XMM, IMM); // QQQ
768 put("pinsrw", MMX|XMM, REG32|MEM, IMM); // QQQ
769 }
putMMX4() const770 void putMMX4() const
771 {
772 put("pshufw", MMX, MMX|MEM, IMM);
773 put("pshuflw", XMM, XMM|MEM, IMM);
774 put("pshufhw", XMM, XMM|MEM, IMM);
775 put("pshufd", XMM, XMM|MEM, IMM);
776 }
putMMX5() const777 void putMMX5() const
778 {
779 static const char nmTbl[][16] = {
780 "movdqa",
781 "movdqu",
782 "movaps",
783 "movss",
784 "movups",
785 "movapd",
786 "movsd",
787 "movupd",
788 };
789 for (size_t i = 0; i < NUM_OF_ARRAY(nmTbl); i++) {
790 put(nmTbl[i], XMM, XMM|MEM);
791 put(nmTbl[i], MEM, XMM);
792 }
793 put("movq2dq", XMM, MMX);
794 put("movdq2q", MMX, XMM);
795 }
796
putXMM1() const797 void putXMM1() const
798 {
799 enum {
800 PS = 1 << 0,
801 SS = 1 << 1,
802 PD = 1 << 2,
803 SD = 1 << 3
804 };
805 const struct {
806 uint8 code;
807 const char *name;
808 } sufTbl[] = {
809 { 0, "ps" },
810 { 0xF3, "ss" },
811 { 0x66, "pd" },
812 { 0xF2, "sd" },
813 };
814 static const struct XmmTbl1 {
815 uint8 code;
816 int mode;
817 const char *name;
818 bool hasImm;
819 } xmmTbl1[] = {
820 { B01011000, PS|SS|PD|SD, "add", false },
821 { B01010101, PS|PD , "andn", false },
822 { B01010100, PS|PD , "and", false },
823 { B11000010, PS|SS|PD|SD, "cmp", true },
824 { B01011110, PS|SS|PD|SD, "div", false },
825 { B01011111, PS|SS|PD|SD, "max", false },
826 { B01011101, PS|SS|PD|SD, "min", false },
827 { B01011001, PS|SS|PD|SD, "mul", false },
828 { B01010110, PS|PD , "or", false },
829 { B01010011, PS|SS , "rcp", false },
830 { B01010010, PS|SS , "rsqrt", false },
831 { B11000110, PS|PD , "shuf", true },
832 { B01010001, PS|SS|PD|SD, "sqrt", false },
833 { B01011100, PS|SS|PD|SD, "sub", false },
834 { B00010101, PS|PD , "unpckh", false },
835 { B00010100, PS|PD , "unpckl", false },
836 { B01010111, PS|PD , "xor", false },
837 //
838 };
839 for (size_t i = 0; i < NUM_OF_ARRAY(xmmTbl1); i++) {
840 const XmmTbl1 *p = &xmmTbl1[i];
841 for (size_t j = 0; j < NUM_OF_ARRAY(sufTbl); j++) {
842 if (!(p->mode & (1 << j))) continue;
843 char buf[16];
844 sprintf(buf, "%s%s", p->name, sufTbl[j].name);
845 if (p->hasImm) {
846 put(buf, XMM, XMM|MEM, IMM);
847 } else {
848 put(buf, XMM, XMM|MEM);
849 }
850 }
851 }
852 }
putXMM2() const853 void putXMM2() const
854 {
855 // (XMM, XMM|MEM)
856 static const char tbl[][16] = {
857 "punpckhqdq",
858 "punpcklqdq",
859
860 "comiss",
861 "ucomiss",
862 "comisd",
863 "ucomisd",
864
865 "cvtpd2ps",
866 "cvtps2pd",
867 "cvtsd2ss",
868 "cvtss2sd",
869 "cvtpd2dq",
870 "cvttpd2dq",
871 "cvtdq2pd",
872 "cvtps2dq",
873 "cvttps2dq",
874 "cvtdq2ps",
875
876 "addsubpd",
877 "addsubps",
878 "haddpd",
879 "haddps",
880 "hsubpd",
881 "hsubps",
882 "movddup",
883 "movshdup",
884 "movsldup",
885 };
886 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
887 put(tbl[i], XMM, XMM|MEM);
888 }
889 }
putXMM3() const890 void putXMM3() const
891 {
892 static const struct Tbl {
893 const char *name;
894 uint64 op1;
895 uint64 op2;
896 } tbl[] = {
897 { "cvtpi2ps", XMM, MMX|MEM },
898 { "cvtps2pi", MMX, XMM|MEM },
899 { "cvtsi2ss", XMM, REG32|MEM },
900 { "cvtss2si", REG32, XMM|MEM },
901 { "cvttps2pi", MMX, XMM|MEM },
902 { "cvttss2si", REG32, XMM|MEM },
903 { "cvtpi2pd", XMM, MMX|MEM },
904 { "cvtpd2pi", MMX, XMM|MEM },
905 { "cvtsi2sd", XMM, REG32|MEM },
906 { "cvtsd2si", REG32, XMM|MEM },
907 { "cvttpd2pi", MMX, XMM|MEM },
908 { "cvttsd2si", REG32, XMM|MEM },
909 };
910 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
911 const Tbl *p = &tbl[i];
912 put(p->name, p->op1, p->op2);
913 }
914 }
putXMM4() const915 void putXMM4() const
916 {
917 static const char tbl[][16] = {
918 "movhps",
919 "movlps",
920 "movhpd",
921 "movlpd",
922 };
923 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
924 const char *p = tbl[i];
925 put(p, XMM, MEM);
926 put(p, MEM, XMM);
927 }
928 }
putCmov() const929 void putCmov() const
930 {
931 const char tbl[][4] = {
932 "o",
933 "no",
934 "b",
935 "c",
936 "nae",
937 "nb",
938 "nc",
939 "ae",
940 "e",
941 "z",
942 "ne",
943 "nz",
944 "be",
945 "na",
946 "nbe",
947 "a",
948 "s",
949 "ns",
950 "p",
951 "pe",
952 "np",
953 "po",
954 "l",
955 "nge",
956 "nl",
957 "ge",
958 "le",
959 "ng",
960 "nle",
961 "g",
962 };
963 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
964 char buf[16];
965 sprintf(buf, "cmov%s", tbl[i]);
966 put(buf, REG16, REG16|MEM);
967 put(buf, REG32, REG32|MEM);
968 put(buf, REG64, REG64|MEM);
969 sprintf(buf, "set%s", tbl[i]);
970 put(buf, REG8|REG8_3|MEM);
971 }
972 }
putReg1() const973 void putReg1() const
974 {
975 // (REG, REG|MEM)
976 {
977 static const char tbl[][16] = {
978 "adc",
979 "add",
980 "and_",
981 "cmp",
982 "or_",
983 "sbb",
984 "sub",
985 "xor_",
986 };
987 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
988 const std::string s = removeUnderScore(tbl[i]);
989 const char *p = s.c_str();
990 put(p, REG32, REG32|MEM);
991 put(p, REG64, REG64|MEM);
992 put(p, REG16, REG16|MEM);
993 put(p, REG8|REG8_3, REG8|MEM);
994 put(p, MEM, REG32e|REG16|REG8|REG8_3);
995
996 put(p, MEM8, IMM8|NEG8);
997 put(p, MEM16, IMM8|IMM16|NEG8|NEG16);
998 put(p, MEM32, IMM8|IMM32|NEG8|NEG32);
999
1000 put(p, REG64|RAX, IMM8|NEG8);
1001 put(p, REG64|RAX, "0x12345678", "0x12345678");
1002 put(p, REG64|RAX, "192", "192");
1003 put(p, REG64|RAX, "0x1234", "0x1234");
1004 put(p, REG32|EAX, IMM8|IMM32|NEG8);
1005 put(p, REG16|AX, IMM8|IMM16|NEG8|NEG16);
1006 put(p, REG8|REG8_3|AL, IMM|NEG8);
1007 }
1008 }
1009 {
1010 const char tbl[][8] = {
1011 "adcx",
1012 "adox",
1013 };
1014 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1015 const char *p = tbl[i];
1016 put(p, REG32, REG32|MEM);
1017 put(p, REG64, REG64|MEM);
1018 }
1019 }
1020 }
putBt() const1021 void putBt() const
1022 {
1023 static const char tbl[][16] = {
1024 "bt",
1025 "bts",
1026 "btr",
1027 "btc",
1028 };
1029 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1030 const char *p = tbl[i];
1031 put(p, MEM|REG16, REG16);
1032 put(p, MEM|REG32, REG32);
1033 put(p, MEM|REG64, REG64);
1034 put(p, MEM16|REG16, IMM);
1035 }
1036 }
putRorM() const1037 void putRorM() const
1038 {
1039 static const char tbl[][16] = {
1040 "inc",
1041 "dec",
1042 "div",
1043 "idiv",
1044 "imul",
1045 "mul",
1046 "neg",
1047 "not_",
1048 };
1049 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1050 const std::string s = removeUnderScore(tbl[i]);
1051 const char *p = s.c_str();
1052 put(p, REG32e|REG16|REG8|REG8_3);
1053 put(p, MEM32|MEM16|MEM8);
1054 }
1055 const char *p = "imul";
1056 put(p, REG16, REG16|MEM16);
1057 put(p, REG32, REG32|MEM32);
1058 put(p, REG64, REG64|MEM);
1059 put(p, REG16, REG16|MEM, IMM8|IMM16);
1060 put(p, REG32, REG32|MEM, IMM8|IMM32);
1061 put(p, REG64, REG64|MEM, IMM8|IMM32);
1062 }
putPushPop() const1063 void putPushPop() const
1064 {
1065 /*
1066 QQQ:
1067 push byte 2
1068 push dword 2
1069 reduce 4-byte stack
1070 push word 2
1071 reduce 2-byte stack, so I can't support it
1072 */
1073
1074 put("push", IMM8|IMM32);
1075 if (isXbyak_) {
1076 puts("push(word, 1000);dump();");
1077 } else {
1078 puts("push word 1000");
1079 }
1080
1081 put("push", REG16|MEM16);
1082 put("pop", REG16|MEM16);
1083 #ifdef XBYAK64
1084 put("push", REG64|IMM32|MEM64);
1085 put("pop", REG64|MEM64);
1086 #else
1087 put("push", REG32|IMM32|MEM32);
1088 put("pop", REG32|MEM32);
1089 #endif
1090 }
putTest() const1091 void putTest() const
1092 {
1093 const char *p = "test";
1094 put(p, REG32|MEM, REG32);
1095 put(p, REG64|MEM, REG64);
1096 put(p, REG16|MEM, REG16);
1097 put(p, REG8|REG8_3|MEM, REG8|REG8_3);
1098 put(p, REG32e|REG16|REG8|REG8_3|EAX|AX|AL|MEM32|MEM16|MEM8, IMM);
1099 }
putMov64() const1100 void putMov64() const
1101 {
1102 const struct {
1103 const char *a;
1104 const char *b;
1105 } tbl[] = {
1106 { "0", "0" },
1107 { "0x123", "0x123" },
1108 { "0x12345678", "0x12345678" },
1109 { "0x7fffffff", "0x7fffffff" },
1110 { "0xffffffff", "0xffffffff" },
1111 { "0x80000000", "0x80000000" },
1112 { "2147483648U", "2147483648" },
1113 { "0x80000001", "0x80000001" },
1114 { "0xffffffffffffffff", "0xffffffffffffffff" },
1115 { "-1", "-1" },
1116 { "0xffffffff80000000", "0xffffffff80000000" },
1117 { "0xffffffff80000001", "0xffffffff80000001" },
1118 { "0xffffffff12345678", "0xffffffff12345678" },
1119 };
1120 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1121 put("mov", REG64, tbl[i].a, tbl[i].b);
1122 }
1123 }
1124 // only nasm
putMovImm64() const1125 void putMovImm64() const
1126 {
1127 put("mov", REG64, "0x1234567890abcdefLL", "0x1234567890abcdef");
1128 put("mov", REG64, "0x12345678", "0x12345678");
1129 put("mov", REG64, "0xffffffff12345678LL", "0xffffffff12345678");
1130 put("mov", REG32e|REG16|REG8|RAX|EAX|AX|AL, IMM);
1131 }
putEtc() const1132 void putEtc() const
1133 {
1134 {
1135 const char *p = "ret";
1136 put(p);
1137 put(p, IMM);
1138 p = "mov";
1139 put(p, EAX|REG32|MEM|MEM_ONLY_DISP, REG32|EAX);
1140 put(p, REG64|MEM|MEM_ONLY_DISP, REG64|RAX);
1141 put(p, AX|REG16|MEM|MEM_ONLY_DISP, REG16|AX);
1142 put(p, AL|REG8|REG8_3|MEM|MEM_ONLY_DISP, REG8|REG8_3|AL);
1143 put(p, REG32e|REG16|REG8|RAX|EAX|AX|AL, MEM|MEM_ONLY_DISP);
1144 put(p, MEM32|MEM16|MEM8, IMM);
1145 put(p, REG64, "0x1234567890abcdefLL", "0x1234567890abcdef");
1146 put("movbe", REG16|REG32e, MEM);
1147 put("movbe", MEM, REG16|REG32e);
1148 #ifdef XBYAK64
1149 put(p, RAX|EAX|AX|AL, "ptr [0x1234567890abcdefLL]", "[qword 0x1234567890abcdef]");
1150 put(p, "ptr [0x1234567890abcdefLL]", "[qword 0x1234567890abcdef]", RAX|EAX|AX|AL);
1151 put(p, "qword [rax], 0");
1152 put(p, "qword [rax], 0x12");
1153 put(p, "qword [rax], 0x1234");
1154 put(p, "qword [rax], 0x12345678");
1155 // put(p, "qword [rax], 0x123456789ab");
1156 put(p, "qword [rax], 1000000");
1157 put(p, "rdx, qword [rax]");
1158 #endif
1159 put("mov", EAX, "ptr [eax + ecx * 0]", "[eax + ecx * 0]"); // ignore scale = 0
1160 }
1161 {
1162 const char tbl[][8] = {
1163 "movsx",
1164 "movzx",
1165 };
1166 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1167 const char *p = tbl[i];
1168 put(p, REG64, REG16|REG8|MEM8|MEM16);
1169 put(p, REG32, REG16|REG8|MEM8|MEM16);
1170 put(p, REG16, REG8|MEM8);
1171 }
1172 }
1173 #ifdef XBYAK64
1174 put("movsxd", REG64, REG32|MEM32);
1175 #endif
1176 put("cmpxchg8b", MEM);
1177 #ifdef XBYAK64
1178 put("cmpxchg16b", MEM);
1179 #endif
1180 {
1181 const char tbl[][8] = {
1182 "xadd",
1183 "cmpxchg"
1184 };
1185 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1186 const char *p = tbl[i];
1187 put(p, REG8|MEM, REG8);
1188 put(p, REG16|MEM, REG16);
1189 put(p, REG32|MEM, REG32);
1190 put(p, REG64|MEM, REG64);
1191 }
1192 }
1193
1194 put("xchg", AL|REG8, AL|REG8|MEM);
1195 put("xchg", MEM, AL|REG8);
1196 put("xchg", AX|REG16, AX|REG16|MEM);
1197 put("xchg", MEM, AX|REG16);
1198 put("xchg", EAX|REG32, EAX|REG32|MEM);
1199 put("xchg", MEM, EAX|REG32);
1200 put("xchg", REG64, REG64|MEM);
1201 }
putShift() const1202 void putShift() const
1203 {
1204 const char tbl[][8] = {
1205 "rcl",
1206 "rcr",
1207 "rol",
1208 "ror",
1209 "sar",
1210 "shl",
1211 "shr",
1212
1213 "sal",
1214 };
1215 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1216 const char *p = tbl[i];
1217 put(p, REG32e|REG16|REG8|MEM32|MEM16|MEM8, ONE|CL|IMM);
1218 }
1219 }
putShxd() const1220 void putShxd() const
1221 {
1222 const char tbl[][8] = {
1223 "shld",
1224 "shrd",
1225 };
1226 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1227 const char *p = tbl[i];
1228 put(p, REG64|MEM, REG64, IMM|CL);
1229 put(p, REG32|MEM, REG32, IMM|CL);
1230 put(p, REG16|MEM, REG16, IMM|CL);
1231 }
1232 }
putBs() const1233 void putBs() const
1234 {
1235 const char tbl[][8] = {
1236 "bsr",
1237 "bsf",
1238 "lzcnt",
1239 "tzcnt",
1240 "popcnt",
1241 };
1242 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1243 const char *p = tbl[i];
1244 put(p, REG64, REG64|MEM);
1245 put(p, REG32, REG32|MEM);
1246 put(p, REG16, REG16|MEM);
1247 }
1248 }
putSSSE3() const1249 void putSSSE3() const
1250 {
1251 const char tbl[][16] = {
1252 "pshufb",
1253 "phaddw",
1254 "phaddd",
1255 "phaddsw",
1256 "pmaddubsw",
1257 "phsubw",
1258 "phsubd",
1259 "phsubsw",
1260 "psignb",
1261 "psignw",
1262 "psignd",
1263 "pmulhrsw",
1264 "pabsb",
1265 "pabsw",
1266 "pabsd",
1267 };
1268 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1269 const char *p = tbl[i];
1270 put(p, XMM, XMM|MEM);
1271 put(p, MMX, MMX|MEM);
1272 }
1273 put("palignr", XMM, XMM|MEM, IMM8);
1274 put("palignr", MMX, MMX|MEM, IMM8);
1275 }
putSSE4_1() const1276 void putSSE4_1() const
1277 {
1278 const char tbl[][16] = {
1279 "blendvpd",
1280 "blendvps",
1281 "packusdw",
1282 "pblendvb",
1283 "pcmpeqq",
1284 "ptest",
1285 "pmovsxbw",
1286 "pmovsxbd",
1287 "pmovsxbq",
1288 "pmovsxwd",
1289 "pmovsxwq",
1290 "pmovsxdq",
1291 "pmovzxbw",
1292 "pmovzxbd",
1293 "pmovzxbq",
1294 "pmovzxwd",
1295 "pmovzxwq",
1296 "pmovzxdq",
1297 "pminsb",
1298 "pminsd",
1299 "pminuw",
1300 "pminud",
1301 "pmaxsb",
1302 "pmaxsd",
1303 "pmaxuw",
1304 "pmaxud",
1305 "pmuldq",
1306 "pmulld",
1307 "phminposuw",
1308 "pcmpgtq",
1309 "aesdec",
1310 "aesdeclast",
1311 "aesenc",
1312 "aesenclast",
1313 "aesimc",
1314 };
1315 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1316 const char *p = tbl[i];
1317 put(p, XMM, XMM|MEM);
1318 }
1319 }
putSSE4_2() const1320 void putSSE4_2() const
1321 {
1322 {
1323 const char tbl[][16] = {
1324 "blendpd",
1325 "blendps",
1326 "dppd",
1327 "dpps",
1328 "mpsadbw",
1329 "pblendw",
1330 "roundps",
1331 "roundpd",
1332 "roundss",
1333 "roundsd",
1334 "pcmpestrm",
1335 "pcmpestri",
1336 "pcmpistrm",
1337 "pcmpistri",
1338 "pclmulqdq",
1339 "aeskeygenassist",
1340 };
1341 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1342 const char *p = tbl[i];
1343 put(p, XMM, XMM|MEM, IMM);
1344 }
1345 }
1346 {
1347 const char tbl[][16] = {
1348 "pclmullqlqdq",
1349 "pclmulhqlqdq",
1350 // "pclmullqhdq", // QQQ : not supported by nasm/yasm
1351 // "pclmulhqhdq",
1352 };
1353 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1354 const char *p = tbl[i];
1355 put(p, XMM, XMM|MEM);
1356 }
1357 }
1358 put("extractps", REG32e|MEM, XMM, IMM);
1359 put("pextrw", REG32e|MEM, XMM, IMM); // pextrw for REG32 is for MMX2
1360 put("pextrb", REG32e|MEM, XMM, IMM);
1361 put("pextrd", REG32|MEM, XMM, IMM);
1362
1363 put("insertps", XMM, XMM|MEM, IMM);
1364 put("pinsrb", XMM, REG32|MEM, IMM);
1365 put("pinsrd", XMM, REG32|MEM, IMM);
1366 put("movntdqa", XMM, MEM);
1367 put("crc32", REG32, REG8|REG16|REG32|MEM8|MEM16|MEM32);
1368 put("crc32", REG64, REG64|REG8|MEM8);
1369 #ifdef XBYAK64
1370 put("pextrq", REG64|MEM, XMM, IMM);
1371 put("pinsrq", XMM, REG64|MEM, IMM);
1372 #endif
1373 }
putSHA() const1374 void putSHA() const
1375 {
1376 put("sha1rnds4", XMM, XMM|MEM, IMM);
1377 put("sha1nexte", XMM, XMM|MEM);
1378 put("sha1msg1", XMM, XMM|MEM);
1379 put("sha1msg2", XMM, XMM|MEM);
1380 put("sha256rnds2", XMM, XMM|MEM);
1381 put("sha256msg1", XMM, XMM|MEM);
1382 put("sha256msg2", XMM, XMM|MEM);
1383 }
putMPX() const1384 void putMPX() const
1385 {
1386 #ifdef XBYAK64
1387 const uint64 reg = REG64;
1388 #else
1389 const uint64 reg = REG32;
1390 #endif
1391 put("bndcl", BNDREG, reg|MEM);
1392 put("bndcu", BNDREG, reg|MEM);
1393 put("bndcn", BNDREG, reg|MEM);
1394 put("bndldx", BNDREG, MEM);
1395 put("bndmk", BNDREG, MEM);
1396 put("bndmov", BNDREG, BNDREG|MEM);
1397 put("bndstx", MEM, BNDREG);
1398 put("bndstx", "ptr [eax]", "[eax]", BNDREG);
1399 put("bndstx", "ptr [eax+5]", "[eax+5]", BNDREG);
1400 put("bndstx", "ptr [eax+500]", "[eax+500]", BNDREG);
1401 put("bndstx", "ptr [eax+ecx]", "[eax+ecx]", BNDREG);
1402 put("bndstx", "ptr [ecx+eax]", "[ecx+eax]", BNDREG);
1403 put("bndstx", "ptr [eax+esp]", "[eax+esp]", BNDREG);
1404 put("bndstx", "ptr [esp+eax]", "[esp+eax]", BNDREG);
1405 put("bndstx", "ptr [eax+ecx*2]", "[eax+ecx*2]", BNDREG);
1406 put("bndstx", "ptr [ecx+ecx]", "[ecx+ecx]", BNDREG);
1407 put("bndstx", "ptr [ecx*2]", "[ecx*2]", BNDREG);
1408 put("bndstx", "ptr [eax+ecx*2+500]", "[eax+ecx*2+500]", BNDREG);
1409 #ifdef XBYAK64
1410 put("bndstx", "ptr [rax+rcx*2]", "[rax+rcx*2]", BNDREG);
1411 put("bndstx", "ptr [r9*2]", "[r9*2]", BNDREG);
1412 put("bndstx", "ptr [r9*2+r15]", "[r9*2+r15]", BNDREG);
1413 #endif
1414 }
putFpuMem16_32() const1415 void putFpuMem16_32() const
1416 {
1417 const char tbl[][8] = {
1418 "fiadd",
1419 "fidiv",
1420 "fidivr",
1421 "ficom",
1422 "ficomp",
1423 "fimul",
1424 "fist",
1425 "fisub",
1426 "fisubr",
1427 };
1428 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1429 const char *p = tbl[i];
1430 put(p, MEM16|MEM32);
1431 }
1432 }
putFpuMem32_64() const1433 void putFpuMem32_64() const
1434 {
1435 const char tbl[][8] = {
1436 "fadd",
1437 "fcom",
1438 "fcomp",
1439 "fdiv",
1440 "fdivr",
1441 "fld",
1442 "fmul",
1443 "fst",
1444 "fstp",
1445 "fsub",
1446 "fsubr",
1447 };
1448 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1449 const char *p = tbl[i];
1450 put(p, MEM32|MEM64);
1451 }
1452 }
putFpuMem16_32_64() const1453 void putFpuMem16_32_64() const
1454 {
1455 const char tbl[][8] = {
1456 "fild",
1457 "fistp",
1458 "fisttp",
1459 };
1460 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1461 const char *p = tbl[i];
1462 put(p, MEM16|MEM32|MEM64);
1463 }
1464 }
putFpuFpu() const1465 void putFpuFpu() const
1466 {
1467 const struct Tbl {
1468 const char *name;
1469 int mode; /* 1:only (st0, sti), 2: only (sti, st0), 3: both */
1470 } tbl[] = {
1471 { "fadd", 3 },
1472 { "faddp", 2 },
1473 { "fcmovb", 1 },
1474 { "fcmove", 1 },
1475 { "fcmovbe", 1 },
1476 { "fcmovu", 1 },
1477 { "fcmovnb", 1 },
1478 { "fcmovne", 1 },
1479 { "fcmovnbe", 1 },
1480 { "fcmovnu", 1 },
1481 { "fcomi", 1 },
1482 { "fcomip", 1 },
1483 { "fucomi", 1 },
1484 { "fucomip", 1 },
1485 { "fdiv", 3 },
1486 { "fdivp", 2 },
1487 { "fdivr", 3 },
1488 { "fdivrp", 2 },
1489 { "fmul", 3 },
1490 { "fmulp", 2 },
1491 { "fsub", 3 },
1492 { "fsubp", 2 },
1493 { "fsubr", 3 },
1494 { "fsubrp", 2 },
1495 };
1496 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1497 const Tbl *p = &tbl[i];
1498 if (p->mode & 1) put(p->name, ST0, STi);
1499 if (p->mode & 2) put(p->name, STi, ST0);
1500 if (p->mode) put(p->name, STi);
1501 }
1502 }
putFpu() const1503 void putFpu() const
1504 {
1505 const char tbl[][16] = {
1506 "fcom",
1507 "fcomp",
1508 "ffree",
1509 "fld",
1510 "fst",
1511 "fstp",
1512 "fucom",
1513 "fucomp",
1514 "fxch",
1515 };
1516 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1517 put(tbl[i], STi);
1518 }
1519 }
putAVX1()1520 void putAVX1()
1521 {
1522 const struct Tbl {
1523 const char *name;
1524 bool only_pd_ps;
1525 } tbl[] = {
1526 { "add", false },
1527 { "sub", false },
1528 { "mul", false },
1529 { "div", false },
1530 { "max", false },
1531 { "min", false },
1532 { "and", true },
1533 { "andn", true },
1534 { "or", true },
1535 { "xor", true },
1536
1537 { "addsub", true },
1538 { "hadd", true },
1539 { "hsub", true },
1540 };
1541 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1542 const struct Suf {
1543 const char *suf;
1544 bool supportYMM;
1545 } suf[] = {
1546 { "pd", true },
1547 { "ps", true },
1548 { "sd", false },
1549 { "ss", false },
1550 };
1551 for (size_t j = 0; j < NUM_OF_ARRAY(suf); j++) {
1552 if (tbl[i].only_pd_ps && j == 2) break;
1553 std::string name = std::string("v") + tbl[i].name + suf[j].suf;
1554 const char *p = name.c_str();
1555 put(p, XMM, XMM | MEM);
1556 put(p, XMM, XMM, XMM | MEM);
1557 if (!suf[j].supportYMM) continue;
1558 put(p, YMM, YMM | MEM);
1559 put(p, YMM, YMM, YMM | MEM);
1560 }
1561 }
1562 }
putAVX_X_X_XM_omit()1563 void putAVX_X_X_XM_omit()
1564 {
1565 const struct Tbl {
1566 const char *name;
1567 bool supportYMM;
1568 } tbl[] = {
1569 { "vaesenc", false },
1570 { "vaesenclast", false },
1571 { "vaesdec", false },
1572 { "vaesdeclast", false },
1573 { "vcvtsd2ss", false },
1574 { "vcvtss2sd", false },
1575 { "vpacksswb", true },
1576 { "vpackssdw", true },
1577 { "vpackuswb", true },
1578 { "vpackusdw", true },
1579
1580 { "vpaddb", true },
1581 { "vpaddw", true },
1582 { "vpaddd", true },
1583 { "vpaddq", true },
1584
1585 { "vpaddsb", true },
1586 { "vpaddsw", true },
1587
1588 { "vpaddusb", true },
1589 { "vpaddusw", true },
1590
1591 { "vpand", true },
1592 { "vpandn", true },
1593 { "vpavgb", true },
1594 { "vpavgw", true },
1595
1596 { "vpcmpeqb", true },
1597 { "vpcmpeqw", true },
1598 { "vpcmpeqd", true },
1599 { "vpcmpeqq", true },
1600
1601 { "vpcmpgtb", true },
1602 { "vpcmpgtw", true },
1603 { "vpcmpgtd", true },
1604 { "vpcmpgtq", true },
1605
1606 { "vphaddw", true },
1607 { "vphaddd", true },
1608 { "vphaddsw", true },
1609
1610 { "vphsubw", true },
1611 { "vphsubd", true },
1612 { "vphsubsw", true },
1613 { "vpmaddwd", true },
1614 { "vpmaddubsw", true },
1615
1616 { "vpmaxsb", true },
1617 { "vpmaxsw", true },
1618 { "vpmaxsd", true },
1619
1620 { "vpmaxub", true },
1621 { "vpmaxuw", true },
1622 { "vpmaxud", true },
1623
1624 { "vpminsb", true },
1625 { "vpminsw", true },
1626 { "vpminsd", true },
1627
1628 { "vpminub", true },
1629 { "vpminuw", true },
1630 { "vpminud", true },
1631
1632 { "vpmulhuw", true },
1633 { "vpmulhrsw", true },
1634 { "vpmulhw", true },
1635 { "vpmullw", true },
1636 { "vpmulld", true },
1637
1638 { "vpmuludq", true },
1639 { "vpmuldq", true },
1640
1641 { "vpor", true },
1642 { "vpsadbw", true },
1643
1644 { "vpsignb", true },
1645 { "vpsignw", true },
1646 { "vpsignd", true },
1647
1648 { "vpsllw", false },
1649 { "vpslld", false },
1650 { "vpsllq", false },
1651
1652 { "vpsraw", false },
1653 { "vpsrad", false },
1654 { "vpsrlw", false },
1655 { "vpsrld", false },
1656 { "vpsrlq", false },
1657
1658 { "vpsubb", true },
1659 { "vpsubw", true },
1660 { "vpsubd", true },
1661 { "vpsubq", true },
1662
1663 { "vpsubsb", true },
1664 { "vpsubsw", true },
1665
1666 { "vpsubusb", true },
1667 { "vpsubusw", true },
1668
1669 { "vpunpckhbw", true },
1670 { "vpunpckhwd", true },
1671 { "vpunpckhdq", true },
1672 { "vpunpckhqdq", true },
1673
1674 { "vpunpcklbw", true },
1675 { "vpunpcklwd", true },
1676 { "vpunpckldq", true },
1677 { "vpunpcklqdq", true },
1678
1679 { "vpxor", true },
1680 { "vsqrtsd", false },
1681 { "vsqrtss", false },
1682
1683 { "vunpckhpd", true },
1684 { "vunpckhps", true },
1685 { "vunpcklpd", true },
1686 { "vunpcklps", true },
1687 };
1688 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1689 const Tbl *p = &tbl[i];
1690 put(p->name, XMM, XMM | MEM);
1691 put(p->name, XMM, XMM, XMM | MEM);
1692 if (!p->supportYMM) continue;
1693 put(p->name, YMM, YMM | MEM);
1694 put(p->name, YMM, YMM, YMM | MEM);
1695 }
1696 }
putAVX_X_X_XM_IMM()1697 void putAVX_X_X_XM_IMM()
1698 {
1699 const struct Tbl {
1700 const char *name;
1701 bool supportYMM;
1702 } tbl[] = {
1703 { "vblendpd", true },
1704 { "vblendps", true },
1705 { "vdppd", false },
1706 { "vdpps", true },
1707 { "vmpsadbw", true },
1708 { "vpblendw", true },
1709 { "vpblendd", true },
1710 { "vroundsd", false },
1711 { "vroundss", false },
1712 { "vpclmulqdq", false },
1713 { "vcmppd", true },
1714 { "vcmpps", true },
1715 { "vcmpsd", false },
1716 { "vcmpss", false },
1717 { "vinsertps", false },
1718 { "vpalignr", true },
1719 { "vshufpd", true },
1720 { "vshufps", true },
1721 };
1722 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1723 const Tbl *p = &tbl[i];
1724 put(p->name, XMM, XMM, XMM | MEM, IMM);
1725 put(p->name, XMM, XMM | MEM, IMM);
1726 if (!p->supportYMM) continue;
1727 put(p->name, YMM, YMM, YMM | MEM, IMM);
1728 put(p->name, YMM, YMM | MEM, IMM);
1729 }
1730 }
putAVX_X_XM_IMM()1731 void putAVX_X_XM_IMM()
1732 {
1733 const struct Tbl {
1734 const char *name;
1735 bool supportYMM;
1736 } tbl[] = {
1737 { "vroundpd", true },
1738 { "vroundps", true },
1739 { "vpcmpestri", false },
1740 { "vpcmpestrm", false },
1741 { "vpcmpistri", false },
1742 { "vpcmpistrm", false },
1743 { "vpermilpd", true },
1744 { "vpermilps", true },
1745 { "vaeskeygenassist", false },
1746 { "vpshufd", true },
1747 { "vpshufhw", true },
1748 { "vpshuflw", true },
1749 };
1750 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1751 const Tbl *p = &tbl[i];
1752 put(p->name, XMM, XMM | MEM, IMM);
1753 if (!p->supportYMM) continue;
1754 put(p->name, YMM, YMM | MEM, IMM);
1755 }
1756 }
putAVX_X_X_XM()1757 void putAVX_X_X_XM()
1758 {
1759 const struct Tbl {
1760 const char *name;
1761 bool supportYMM;
1762 } tbl[] = {
1763 { "vpermilpd", true },
1764 { "vpermilps", true },
1765 { "vpshufb", true },
1766
1767 { "vpsllvd", true },
1768 { "vpsllvq", true },
1769 { "vpsravd", true },
1770 { "vpsrlvd", true },
1771 { "vpsrlvq", true },
1772 };
1773 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1774 const Tbl *p = &tbl[i];
1775 put(p->name, XMM, XMM, XMM | MEM);
1776 if (!p->supportYMM) continue;
1777 put(p->name, YMM, YMM, YMM | MEM);
1778 }
1779 }
putAVX_X_XM()1780 void putAVX_X_XM()
1781 {
1782 const struct Tbl {
1783 const char *name;
1784 bool supportYMM;
1785 } tbl[] = {
1786 { "vaesimc", false },
1787 { "vtestps", true },
1788 { "vtestpd", true },
1789 { "vcomisd", false },
1790 { "vcomiss", false },
1791 { "vcvtdq2ps", true },
1792 { "vcvtps2dq", true },
1793 { "vcvttps2dq", true },
1794 { "vmovapd", true },
1795 { "vmovaps", true },
1796 { "vmovddup", true },
1797 { "vmovdqa", true },
1798 { "vmovdqu", true },
1799 { "vmovupd", true },
1800 { "vmovups", true },
1801
1802 { "vpabsb", true },
1803 { "vpabsw", true },
1804 { "vpabsd", true },
1805 { "vphminposuw", false },
1806
1807 { "vpmovsxbw", false },
1808 { "vpmovsxbd", false },
1809 { "vpmovsxbq", false },
1810 { "vpmovsxwd", false },
1811 { "vpmovsxwq", false },
1812 { "vpmovsxdq", false },
1813
1814 { "vpmovzxbw", false },
1815 { "vpmovzxbd", false },
1816 { "vpmovzxbq", false },
1817 { "vpmovzxwd", false },
1818 { "vpmovzxwq", false },
1819 { "vpmovzxdq", false },
1820
1821 { "vptest", true },
1822 { "vrcpps", true },
1823 { "vrcpss", false },
1824
1825 { "vrsqrtps", true },
1826 { "vrsqrtss", false },
1827
1828 { "vsqrtpd", true },
1829 { "vsqrtps", true },
1830 { "vucomisd", false },
1831 { "vucomiss", false },
1832 };
1833 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1834 const Tbl *p = &tbl[i];
1835 put(p->name, XMM, XMM | MEM);
1836 if (!p->supportYMM) continue;
1837 put(p->name, YMM, YMM | MEM);
1838 }
1839 }
putAVX_Y_XM()1840 void putAVX_Y_XM()
1841 {
1842 const char *tbl[] = {
1843 "vpmovsxbw",
1844 "vpmovsxbd",
1845 "vpmovsxbq",
1846 "vpmovsxwd",
1847 "vpmovsxwq",
1848 "vpmovsxdq",
1849 "vpmovzxbw",
1850 "vpmovzxbd",
1851 "vpmovzxbq",
1852 "vpmovzxwd",
1853 "vpmovzxwq",
1854 "vpmovzxdq",
1855 };
1856 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1857 const char *name = tbl[i];
1858 put(name, YMM, XMM);
1859 }
1860 }
putAVX_M_X()1861 void putAVX_M_X()
1862 {
1863 const struct Tbl {
1864 const char *name;
1865 bool supportYMM;
1866 } tbl[] = {
1867 { "vmovapd", true },
1868 { "vmovaps", true },
1869 { "vmovdqa", true },
1870 { "vmovdqu", true },
1871 { "vmovupd", true },
1872 { "vmovups", true },
1873 };
1874 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1875 const Tbl *p = &tbl[i];
1876 put(p->name, MEM, XMM);
1877 if (!p->supportYMM) continue;
1878 put(p->name, MEM, YMM);
1879 }
1880 }
putAVX_X_X_IMM_omit()1881 void putAVX_X_X_IMM_omit()
1882 {
1883 const struct Tbl {
1884 const char *name;
1885 bool support_Y_Y_X;
1886 } tbl[] = {
1887 { "vpslldq", false },
1888 { "vpsrldq", false },
1889 { "vpsllw", true },
1890 { "vpslld", true },
1891 { "vpsllq", true },
1892 { "vpsraw", true },
1893 { "vpsrad", true },
1894 { "vpsrlw", true },
1895 { "vpsrld", true },
1896 { "vpsrlq", true },
1897 };
1898 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1899 const Tbl& p = tbl[i];
1900 put(p.name, XMM, XMM, IMM);
1901 put(p.name, YMM, YMM, IMM);
1902 put(p.name, YMM, IMM);
1903 put(p.name, _ZMM, _ZMM, IMM8);
1904 #ifdef XBYAK64
1905 put(p.name, _XMM3, _XMM3, IMM8);
1906 put(p.name, _YMM3, _YMM3, IMM8);
1907 #endif
1908 if (p.support_Y_Y_X) {
1909 put(p.name, YMM, YMM, XMM);
1910 }
1911 }
1912 }
putFMA()1913 void putFMA()
1914 {
1915 const struct Tbl {
1916 const char *name;
1917 bool supportYMM;
1918 } tbl[] = {
1919 { "vfmadd", true },
1920 { "vfmadd", false },
1921 { "vfmaddsub", true },
1922 { "vfmsubadd", true },
1923 { "vfmsub", true },
1924 { "vfmsub", false },
1925 { "vfnmadd", true },
1926 { "vfnmadd", false },
1927 { "vfnmsub", true },
1928 { "vfnmsub", false },
1929 };
1930 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1931 const Tbl& p = tbl[i];
1932 const struct Ord {
1933 const char *name;
1934 } ord[] = {
1935 { "132" },
1936 { "213" },
1937 { "231" },
1938 };
1939 for (size_t j = 0; j < NUM_OF_ARRAY(ord); j++) {
1940 const char sufTbl[][2][8] = {
1941 { "pd", "ps" },
1942 { "sd", "ss" },
1943 };
1944 for (size_t k = 0; k < 2; k++) {
1945 const std::string suf = sufTbl[p.supportYMM ? 0 : 1][k];
1946 std::string name = std::string(p.name) + ord[j].name + suf;
1947 const char *q = name.c_str();
1948 put(q, XMM, XMM, XMM | MEM);
1949 if (!p.supportYMM) continue;
1950 put(q, YMM, YMM, YMM | MEM);
1951 }
1952 }
1953 }
1954 }
putAVX2()1955 void putAVX2()
1956 {
1957 put("vextractps", REG32 | MEM, XMM, IMM);
1958 put("vldmxcsr", MEM);
1959 put("vstmxcsr", MEM);
1960 put("vmaskmovdqu", XMM, XMM);
1961
1962 put("vmovd", XMM, REG32 | MEM);
1963 put("vmovd", REG32 | MEM, XMM);
1964
1965 put("vmovq", XMM, XMM | MEM);
1966 put("vmovq", MEM, XMM);
1967
1968 put("vmovhlps", XMM, XMM);
1969 put("vmovhlps", XMM, XMM, XMM);
1970 put("vmovlhps", XMM, XMM);
1971 put("vmovlhps", XMM, XMM, XMM);
1972
1973 {
1974 const char tbl[][16] = {
1975 "vmovhpd",
1976 "vmovhps",
1977 "vmovlpd",
1978 "vmovlps",
1979 };
1980 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1981 put(tbl[i], XMM, XMM, MEM);
1982 put(tbl[i], XMM, MEM);
1983 put(tbl[i], MEM, XMM);
1984 }
1985 }
1986 put("vmovmskpd", REG32e, XMM | YMM);
1987 put("vmovmskps", REG32e, XMM | YMM);
1988
1989 put("vmovntdq", MEM, XMM | YMM);
1990 put("vmovntpd", MEM, XMM | YMM);
1991 put("vmovntdqa", XMM | YMM, MEM);
1992
1993 {
1994 const char tbl[][8] = { "vmovsd", "vmovss" };
1995 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1996 put(tbl[i], XMM, XMM, XMM);
1997 put(tbl[i], XMM, XMM | MEM);
1998 put(tbl[i], MEM, XMM);
1999 }
2000 }
2001 put("vpextrb", REG32e|MEM, XMM, IMM);
2002 put("vpextrd", REG32|MEM, XMM, IMM);
2003
2004 for (int i = 0; i < 3; i++) {
2005 const char tbl[][8] = { "vpinsrb", "vpinsrw", "vpinsrd" };
2006 put(tbl[i], XMM, XMM, REG32|MEM, IMM);
2007 put(tbl[i], XMM, REG32|MEM, IMM);
2008 }
2009
2010 put("vpmovmskb", REG32e, XMM|YMM);
2011
2012 {
2013 const struct Tbl {
2014 const char *name;
2015 bool supportYMM;
2016 } tbl[] = {
2017 { "vblendvpd", true },
2018 { "vblendvps", true },
2019 { "vpblendvb", true },
2020 };
2021 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
2022 const Tbl& p = tbl[i];
2023 put(p.name, XMM, XMM, XMM | MEM, XMM);
2024 put(p.name, XMM, XMM | MEM, XMM);
2025 if (!p.supportYMM) continue;
2026 put(p.name, YMM, YMM, YMM | MEM, YMM);
2027 put(p.name, YMM, YMM | MEM, YMM);
2028 }
2029 }
2030 // cvt
2031 {
2032 put("vcvtss2si", REG32e, XMM | MEM);
2033 put("vcvttss2si", REG32e, XMM | MEM);
2034 put("vcvtsd2si", REG32e, XMM | MEM);
2035 put("vcvttsd2si", REG32e, XMM | MEM);
2036
2037 put("vcvtsi2ss", XMM, XMM, REG32e | MEM);
2038 put("vcvtsi2ss", XMM, REG32e | MEM);
2039
2040 put("vcvtsi2sd", XMM, XMM, REG32e | MEM);
2041 put("vcvtsi2sd", XMM, REG32e | MEM);
2042 #ifdef XBYAK64
2043 put("vcvtsi2sd", XMM, XMM, MEM64);
2044 put("vcvtsi2sd", XMM, MEM64);
2045 #endif
2046
2047 put("vcvtps2pd", XMM | YMM, XMM | MEM);
2048 put("vcvtdq2pd", XMM | YMM, XMM | MEM);
2049
2050 put("vcvtpd2ps", XMM, XMM | YMM | MEM);
2051 put("vcvtpd2dq", XMM, XMM | YMM | MEM);
2052 put("vcvttpd2dq", XMM, XMM | YMM | MEM);
2053
2054 put("vcvtph2ps", XMM | YMM, XMM | MEM);
2055 put("vcvtps2ph", XMM | MEM, XMM | YMM, IMM8);
2056 }
2057 #ifdef XBYAK64
2058 put("vmovq", XMM, REG64);
2059 put("vmovq", REG64, XMM);
2060
2061 put("vpextrq", REG64|MEM, XMM, IMM);
2062
2063 put("vpinsrq", XMM, XMM, REG64|MEM, IMM);
2064 put("vpinsrq", XMM, REG64|MEM, IMM);
2065
2066 #endif
2067 }
putFMA2()2068 void putFMA2()
2069 {
2070 #ifdef USE_YASM
2071 put("vextractf128", XMM | MEM, YMM, IMM);
2072 put("vextracti128", XMM | MEM, YMM, IMM);
2073 put("vmaskmovps", MEM, YMM, YMM);
2074 put("vmaskmovpd", MEM, YMM, YMM);
2075 put("vlddqu", XMM | YMM, MEM);
2076
2077 put("vmovshdup", XMM, XMM | MEM);
2078 put("vmovshdup", YMM, YMM | MEM);
2079 put("vmovsldup", XMM, XMM | MEM);
2080 put("vmovsldup", YMM, YMM | MEM);
2081
2082 // QQQ:nasm is wrong
2083 put("vpcmpeqq", XMM, XMM | MEM);
2084 put("vpcmpeqq", XMM, XMM, XMM | MEM);
2085 put("vpcmpgtq", XMM, XMM | MEM);
2086 put("vpcmpgtq", XMM, XMM, XMM | MEM);
2087
2088 put("vmovntps", MEM, XMM | YMM); // nasm error
2089 #else
2090 put("vmaskmovps", XMM, XMM, MEM);
2091 put("vmaskmovps", YMM, YMM, MEM);
2092
2093 put("vmaskmovpd", YMM, YMM, MEM);
2094 put("vmaskmovpd", XMM, XMM, MEM);
2095
2096 put("vmaskmovps", MEM, XMM, XMM);
2097 put("vmaskmovpd", MEM, XMM, XMM);
2098 #endif
2099 }
putCmp()2100 void putCmp()
2101 {
2102 const char pred[32][16] = {
2103 "eq", "lt", "le", "unord", "neq", "nlt", "nle", "ord",
2104 "eq_uq", "nge", "ngt", "false", "neq_oq", "ge", "gt",
2105 "true", "eq_os", "lt_oq", "le_oq", "unord_s", "neq_us", "nlt_uq", "nle_uq", "ord_s",
2106 "eq_us", "nge_uq", "ngt_uq", "false_os", "neq_os", "ge_oq", "gt_oq", "true_us"
2107 };
2108 const char suf[][4] = { "pd", "ps", "sd", "ss" };
2109 for (int i = 0; i < 4; i++) {
2110 for (int j = 0; j < 32; j++) {
2111 if (j < 8) {
2112 put((std::string("cmp") + pred[j] + suf[i]).c_str(), XMM, XMM | MEM);
2113 }
2114 std::string str = std::string("vcmp") + pred[j] + suf[i];
2115 const char *p = str.c_str();
2116 put(p, XMM, XMM | MEM);
2117 put(p, XMM, XMM, XMM | MEM);
2118 if (i >= 2) continue;
2119 put(p, YMM, YMM | MEM);
2120 put(p, YMM, YMM, YMM | MEM);
2121 }
2122 }
2123 }
putRip()2124 void putRip()
2125 {
2126 const char tbl[][2][64] = {
2127 { "mov(byte [rip - 10], 3);dump();", "mov byte [rip - 10], 3" },
2128 { "mov(word [rip - 10], 3);dump();", "mov word [rip - 10], 3" },
2129 { "mov(dword[rip - 10], 3);dump();", "mov dword [rip - 10], 3" },
2130 { "mov(qword [rip - 10], 3);dump();", "mov qword [rip - 10], 3" },
2131 { "mov(ptr [rip - 10], al);dump();", "mov byte [rip - 10], al" },
2132 { "mov(ptr [rip - 10], ax);dump();", "mov word [rip - 10], ax" },
2133 { "mov(ptr [rip - 10], eax);dump();", "mov dword [rip - 10], eax" },
2134 { "mov(ptr [rip - 10], rax);dump();", "mov qword [rip - 10], rax" },
2135 };
2136 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
2137 puts(tbl[i][isXbyak_ ? 0 : 1]);
2138 }
2139 }
2140 public:
Test(bool isXbyak)2141 Test(bool isXbyak)
2142 : isXbyak_(isXbyak)
2143 , funcNum_(1)
2144 {
2145 if (!isXbyak_) return;
2146 printf("%s",
2147 " void gen0()\n"
2148 " {\n");
2149 }
2150 /*
2151 gcc and vc give up to compile this source,
2152 so I split functions.
2153 */
separateFunc()2154 void separateFunc()
2155 {
2156 if (!isXbyak_) return;
2157 printf(
2158 " }\n"
2159 " void gen%d()\n"
2160 " {\n", funcNum_++);
2161 }
~Test()2162 ~Test()
2163 {
2164 if (!isXbyak_) return;
2165 printf("%s",
2166 " }\n"
2167 " void gen()\n"
2168 " {\n");
2169 for (int i = 0; i < funcNum_; i++) {
2170 printf(
2171 " gen%d();\n", i);
2172 }
2173 printf(
2174 " }\n");
2175 }
putGprR_R_RM()2176 void putGprR_R_RM()
2177 {
2178 const char *tbl[] = {
2179 "andn",
2180 "mulx",
2181 "pdep",
2182 "pext",
2183 };
2184 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
2185 const char *name = tbl[i];
2186 put(name, REG32, REG32, REG32 | MEM);
2187 #ifdef XBYAK64
2188 put(name, REG64, REG64, REG64 | MEM);
2189 #endif
2190 }
2191 }
putGprR_RM_R()2192 void putGprR_RM_R()
2193 {
2194 const char *tbl[] = {
2195 "bextr",
2196 "bzhi",
2197 "sarx",
2198 "shlx",
2199 "shrx",
2200 };
2201 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
2202 const char *name = tbl[i];
2203 put(name, REG32, REG32 | MEM, REG32);
2204 #ifdef XBYAK64
2205 put(name, REG64, REG64 | MEM, REG64);
2206 #endif
2207 }
2208 }
putGprR_RM()2209 void putGprR_RM()
2210 {
2211 const char *tbl[] = {
2212 "blsi",
2213 "blsmsk",
2214 "blsr",
2215 };
2216 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
2217 const char *name = tbl[i];
2218 put(name, REG32, REG32 | MEM);
2219 #ifdef XBYAK64
2220 put(name, REG64, REG64 | MEM);
2221 #endif
2222 }
2223 }
putGprOtherwise()2224 void putGprOtherwise()
2225 {
2226 put("rdrand", REG16 | REG32e);
2227 put("rdseed", REG16 | REG32e);
2228 put("rorx", REG32, REG32 | MEM, IMM8);
2229 #ifdef XBYAK64
2230 put("rorx", REG64, REG64 | MEM, IMM8);
2231 #endif
2232 }
putGather()2233 void putGather()
2234 {
2235 const int y_vx_y = 0;
2236 const int y_vy_y = 1;
2237 const int x_vy_x = 2;
2238 const struct Tbl {
2239 const char *name;
2240 int mode;
2241 } tbl[] = {
2242 { "vgatherdpd", y_vx_y },
2243 { "vgatherqpd", y_vy_y },
2244 { "vgatherdps", y_vy_y },
2245 { "vgatherqps", x_vy_x },
2246 { "vpgatherdd", y_vy_y },
2247 { "vpgatherqd", x_vy_x },
2248 { "vpgatherdq", y_vx_y },
2249 { "vpgatherqq", y_vy_y },
2250 };
2251 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
2252 const Tbl& p = tbl[i];
2253 const char *name = p.name;
2254 put(name, XMM, VM32X, XMM);
2255 switch (p.mode) {
2256 case y_vx_y:
2257 put(name, YMM, VM32X, YMM);
2258 break;
2259 case y_vy_y:
2260 put(name, YMM, VM32Y, YMM);
2261 break;
2262 case x_vy_x:
2263 put(name, XMM, VM32Y, XMM);
2264 break;
2265 default:
2266 printf("ERR mode=%d\n", p.mode);
2267 exit(1);
2268 }
2269 }
2270 }
putGath(const std::string & vsib)2271 void putGath(const std::string& vsib)
2272 {
2273 std::string x = "xmm1, ";
2274 std::string a = std::string("[") + vsib + "], xmm3";
2275 put("vgatherdpd", (x + "ptr" + a).c_str(), (x + a).c_str());
2276 }
2277
putGatherAll()2278 void putGatherAll()
2279 {
2280 const char *xmmTbl[] = {
2281 "xmm2",
2282 "xmm4",
2283 "xmm2*1",
2284 "xmm2*4",
2285 };
2286 for (size_t i = 0; i < NUM_OF_ARRAY(xmmTbl); i++) {
2287 std::string s = xmmTbl[i];
2288 putGath(s);
2289 putGath(s + "+3");
2290 putGath(s + "+eax");
2291 putGath("3+" + s);
2292 putGath("eax+" + s);
2293 }
2294 for (size_t i = 0; i < NUM_OF_ARRAY(xmmTbl); i++) {
2295 int ord[] = { 0, 1, 2 };
2296 do {
2297 std::string s;
2298 for (int j = 0; j < 3; j++) {
2299 if (j > 0) s += '+';
2300 switch (ord[j]) {
2301 case 0: s += xmmTbl[i]; break;
2302 case 1: s += "123"; break;
2303 case 2: s += "ebp"; break;
2304 }
2305 }
2306 putGath(s);
2307 } while (std::next_permutation(ord, ord + 3));
2308 }
2309 }
putSeg()2310 void putSeg()
2311 {
2312 {
2313 const char *segTbl[] = {
2314 "es",
2315 "cs",
2316 "ss",
2317 "ds",
2318 "fs",
2319 "gs",
2320 };
2321 for (size_t i = 0; i < NUM_OF_ARRAY(segTbl); i++) {
2322 const char *seg = segTbl[i];
2323 const char *op1Tbl[] = {
2324 "ax",
2325 "edx",
2326 (isXbyak_ ? "ptr [eax]" : "[eax]"),
2327 #ifdef XBYAK64
2328 "r9",
2329 #endif
2330 };
2331 for (size_t j = 0; j < NUM_OF_ARRAY(op1Tbl); j++) {
2332 const char *op1 = op1Tbl[j];
2333 if (isXbyak_) {
2334 printf("mov(%s, %s); dump();\n", op1, seg);
2335 printf("mov(%s, %s); dump();\n", seg, op1);
2336 } else {
2337 printf("mov %s, %s\n", op1, seg);
2338 printf("mov %s, %s\n", seg, op1);
2339 }
2340 }
2341 }
2342 }
2343 {
2344 const char *segTbl[] = {
2345 #ifdef XBYAK32
2346 "es",
2347 "ss",
2348 "ds",
2349 #endif
2350 "fs",
2351 "gs",
2352 };
2353 for (size_t i = 0; i < NUM_OF_ARRAY(segTbl); i++) {
2354 const char *seg = segTbl[i];
2355 if (isXbyak_) {
2356 printf("push(%s); dump();\n", seg);
2357 printf("pop(%s); dump();\n", seg);
2358 } else {
2359 printf("push %s\n", seg);
2360 printf("pop %s\n", seg);
2361 }
2362 }
2363 }
2364 }
put()2365 void put()
2366 {
2367 #ifdef USE_AVX512
2368 putAVX512();
2369 #else
2370
2371 #ifdef USE_AVX
2372
2373 separateFunc();
2374 putFMA2();
2375
2376 #ifdef USE_YASM
2377 putGprR_R_RM();
2378 putGprR_RM_R();
2379 putGprR_RM();
2380 putGprOtherwise();
2381 putGather();
2382 putGatherAll();
2383 #else
2384 putAVX1();
2385 separateFunc();
2386 putAVX2();
2387 putAVX_X_X_XM_omit();
2388 separateFunc();
2389 putAVX_X_X_XM_IMM();
2390 separateFunc();
2391 putAVX_X_XM_IMM();
2392 separateFunc();
2393 putAVX_X_X_XM();
2394 separateFunc();
2395 putAVX_X_XM();
2396 separateFunc();
2397 putAVX_M_X();
2398 putAVX_X_X_IMM_omit();
2399 separateFunc();
2400 putAVX_Y_XM();
2401 separateFunc();
2402 putFMA();
2403 putSHA();
2404 #endif
2405
2406 #else // USE_AVX
2407
2408 putJmp();
2409
2410 #ifdef USE_YASM
2411
2412 putSSSE3();
2413 putSSE4_1();
2414 separateFunc();
2415 putSSE4_2();
2416 putSeg(); // same behavior as yasm for mov rax, cx
2417 #else
2418 putSIMPLE();
2419 putReg1();
2420 putBt();
2421 putRorM();
2422 separateFunc();
2423 putPushPop();
2424 putTest();
2425 separateFunc();
2426 putEtc();
2427 putShift();
2428 putShxd();
2429
2430 separateFunc();
2431
2432 putBs();
2433 putMMX1();
2434 putMMX2();
2435 separateFunc();
2436 putMMX3();
2437 putMMX4();
2438 putMMX5();
2439 separateFunc();
2440 putXMM1();
2441 putXMM2();
2442 putXMM3();
2443 putXMM4();
2444 separateFunc();
2445 putCmov();
2446 putFpuMem16_32();
2447 putFpuMem32_64();
2448 separateFunc();
2449 putFpuMem16_32_64();
2450 put("clflush", MEM); // current nasm is ok
2451 putFpu();
2452 putFpuFpu();
2453 putCmp();
2454 putMPX();
2455 #endif
2456
2457 #ifdef XBYAK64
2458
2459 #ifdef USE_YASM
2460 putRip();
2461 #else
2462 putMov64();
2463 putMovImm64();
2464 #endif
2465
2466 #endif // XBYAK64
2467
2468 #endif // USE_AVX
2469
2470 #endif // USE_AVX512
2471 }
2472 #ifdef USE_AVX512
putOpmask()2473 void putOpmask()
2474 {
2475 {
2476 const char *tbl[] = {
2477 "kadd",
2478 "kand",
2479 "kandn",
2480 "kor",
2481 "kxnor",
2482 "kxor",
2483 };
2484 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
2485 std::string name = tbl[i];
2486 put(name + "b", K, K, K);
2487 put(name + "w", K, K, K);
2488 put(name + "q", K, K, K);
2489 put(name + "d", K, K, K);
2490 }
2491 put("kunpckbw", K, K, K);
2492 put("kunpckwd", K, K, K);
2493 put("kunpckdq", K, K, K);
2494 }
2495 {
2496 const char *tbl[] = {
2497 "knot",
2498 "kortest",
2499 "ktest",
2500 };
2501 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
2502 std::string name = tbl[i];
2503 put(name + "b", K, K);
2504 put(name + "w", K, K);
2505 put(name + "q", K, K);
2506 put(name + "d", K, K);
2507 }
2508 }
2509 {
2510 const char *tbl[] = {
2511 "kshiftl",
2512 "kshiftr",
2513 };
2514 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
2515 std::string name = tbl[i];
2516 put(name + "b", K, K, IMM8);
2517 put(name + "w", K, K, IMM8);
2518 put(name + "q", K, K, IMM8);
2519 put(name + "d", K, K, IMM8);
2520 }
2521 }
2522 put("kmovw", K, K | MEM | REG32);
2523 put("kmovq", K, K | MEM);
2524 put("kmovb", K, K | MEM | REG32);
2525 put("kmovd", K, K | MEM | REG32);
2526
2527 put("kmovw", MEM | REG32, K);
2528 put("kmovq", MEM, K);
2529 put("kmovb", MEM | REG32, K);
2530 put("kmovd", MEM | REG32, K);
2531 #ifdef XBYAK64
2532 put("kmovq", K, REG64);
2533 put("kmovq", REG64, K);
2534 #endif
2535 }
put_vaddpd(const char * r1,const char * r2,const char * r3,int kIdx=0,bool z=false,int sae=0)2536 void put_vaddpd(const char *r1, const char *r2, const char *r3, int kIdx = 0, bool z = false, int sae = 0)
2537 {
2538 std::string modifier;
2539 char pk[16] = "";
2540 const char *pz = "";
2541 const char *saeTblXbyak[] = { "", "|T_rn_sae", "|T_rd_sae", "|T_ru_sae", "|T_rz_sae" };
2542 const char *saeTblNASM[] = { "", ",{rn-sae}", ",{rd-sae}", ",{ru-sae}", ",{rz-sae}" };
2543 if (isXbyak_) {
2544 if (kIdx) CYBOZU_SNPRINTF(pk, sizeof(pk), "|k%d", kIdx);
2545 if (z) pz = "|T_z";
2546 printf("vaddpd(%s%s%s, %s, %s%s); dump();\n", r1, pk, pz, r2, r3, saeTblXbyak[sae]);
2547 } else {
2548 if (kIdx) CYBOZU_SNPRINTF(pk, sizeof(pk), "{k%d}", kIdx);
2549 if (z) pz = "{z}";
2550 printf("vaddpd %s%s%s, %s, %s%s\n", r1, pk, pz, r2, r3, saeTblNASM[sae]);
2551 }
2552 }
putCombi()2553 void putCombi()
2554 {
2555 const char *xTbl[] = {
2556 "xmm2",
2557 #ifdef XBYAK64
2558 "xmm8", "xmm31"
2559 #else
2560 "xmm5", "xmm6"
2561 #endif
2562 };
2563 const char *yTbl[] = {
2564 "ymm0",
2565 #ifdef XBYAK64
2566 "ymm15", "ymm31"
2567 #else
2568 "ymm4", "ymm2"
2569 #endif
2570 };
2571 const char *zTbl[] = {
2572 "zmm1",
2573 #ifdef XBYAK64
2574 "zmm9", "zmm30"
2575 #else
2576 "zmm3", "zmm7"
2577 #endif
2578 };
2579 const size_t N = NUM_OF_ARRAY(zTbl);
2580 for (size_t i = 0; i < N; i++) {
2581 for (size_t j = 0; j < N; j++) {
2582 separateFunc();
2583 for (size_t k = 0; k < N; k++) {
2584 #ifdef XBYAK64
2585 for (int kIdx = 0; kIdx < 8; kIdx++) {
2586 for (int z = 0; z < 2; z++) {
2587 put_vaddpd(xTbl[i], xTbl[j], xTbl[k], kIdx, z == 1);
2588 put_vaddpd(yTbl[i], yTbl[j], yTbl[k], kIdx, z == 1);
2589 for (int sae = 0; sae < 5; sae++) {
2590 put_vaddpd(zTbl[i], zTbl[j], zTbl[k], kIdx, z == 1, sae);
2591 }
2592 }
2593 }
2594 #else
2595 put_vaddpd(xTbl[i], xTbl[j], xTbl[k]);
2596 put_vaddpd(yTbl[i], yTbl[j], yTbl[k]);
2597 for (int sae = 0; sae < 5; sae++) {
2598 put_vaddpd(zTbl[i], zTbl[j], zTbl[k], sae);
2599 }
2600 #endif
2601 }
2602 }
2603 }
2604 put("vaddpd", XMM, XMM, _MEM);
2605 put("vaddpd", YMM, YMM, _MEM);
2606 put("vaddpd", ZMM, ZMM, _MEM);
2607 }
putCmpK()2608 void putCmpK()
2609 {
2610 {
2611 const struct Tbl {
2612 const char *name;
2613 bool supportYMM;
2614 } tbl[] = {
2615 { "vcmppd", true },
2616 { "vcmpps", true },
2617 { "vcmpsd", false },
2618 { "vcmpss", false },
2619 };
2620 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
2621 const Tbl *p = &tbl[i];
2622 put(p->name, K, _XMM, _XMM | MEM, IMM);
2623 if (!p->supportYMM) continue;
2624 put(p->name, K, _YMM, _YMM | MEM, IMM);
2625 put(p->name, K, _ZMM, _ZMM | MEM, IMM);
2626 }
2627 }
2628 put("vcmppd", K2, ZMM, ZMM_SAE, IMM);
2629 #ifdef XBYAK64
2630 {
2631 const struct Tbl {
2632 const char *name;
2633 } tbl[] = {
2634 { "vcomisd" },
2635 { "vcomiss" },
2636 { "vucomisd" },
2637 { "vucomiss" },
2638 };
2639 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
2640 const Tbl *p = &tbl[i];
2641 put(p->name, XMM | _XMM3, XMM_SAE | XMM | MEM);
2642 }
2643 }
2644 put("vcomiss", _XMM3, XMM | MEM);
2645 put("vcomiss", XMM, XMM_SAE);
2646 #endif
2647 }
putBroadcastSub(int idx,int disp)2648 void putBroadcastSub(int idx, int disp)
2649 {
2650 #ifdef XBYAK64
2651 const char *a = "rax";
2652 #else
2653 const char *a = "eax";
2654 #endif
2655 if (isXbyak_) {
2656 printf("vaddpd(zmm%d, zmm1, ptr_b[%s+%d]);dump();\n", idx, a, disp);
2657 printf("vaddpd(ymm%d, ymm1, ptr_b[%s+%d]);dump();\n", idx, a, disp);
2658 printf("vaddpd(xmm%d, xmm1, ptr_b[%s+%d]);dump();\n", idx, a, disp);
2659 } else {
2660 printf("vaddpd zmm%d, zmm1, [%s+%d]{1to8}\n", idx, a, disp);
2661 printf("vaddpd ymm%d, ymm1, [%s+%d]{1to4}\n", idx, a, disp);
2662 printf("vaddpd xmm%d, xmm1, [%s+%d]{1to2}\n", idx, a, disp);
2663 }
2664 }
putBroadcast()2665 void putBroadcast()
2666 {
2667 for (int i = 0; i < 9; i++) {
2668 putBroadcastSub(0, i);
2669 #ifdef XBYAK64
2670 putBroadcastSub(10, i);
2671 putBroadcastSub(20, i);
2672 #endif
2673 }
2674 put("vpbroadcastb", XMM_KZ | ZMM_KZ, REG8);
2675 put("vpbroadcastw", XMM_KZ | ZMM_KZ, REG16);
2676 put("vpbroadcastd", XMM_KZ | ZMM_KZ, REG32);
2677 #ifdef XBYAK64
2678 put("vpbroadcastq", XMM_KZ | ZMM_KZ, REG64);
2679 #endif
2680 {
2681 const char *tbl[] = {
2682 "vpbroadcastb",
2683 "vpbroadcastw",
2684 "vpbroadcastd",
2685 "vpbroadcastq",
2686 };
2687 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
2688 put(tbl[i], XMM_KZ | ZMM_KZ, _XMM | _MEM);
2689 }
2690 }
2691 put("vbroadcasti32x2", XMM_KZ | YMM_KZ | ZMM_KZ, _XMM | _MEM);
2692 put("vbroadcasti32x4", YMM_KZ | ZMM_KZ, _MEM);
2693 put("vbroadcasti64x2", YMM_KZ | ZMM_KZ, _MEM);
2694 put("vbroadcasti32x8", ZMM_KZ, _MEM);
2695 put("vbroadcasti64x4", ZMM_KZ, _MEM);
2696 }
putAVX512_M_X()2697 void putAVX512_M_X()
2698 {
2699 const char *tbl[] = {
2700 "vmovapd",
2701 "vmovaps",
2702 "vmovupd",
2703 "vmovups",
2704 };
2705 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
2706 const char *name = tbl[i];
2707 put(name, MEM|MEM_K, ZMM|XMM|YMM);
2708 put(name, ZMM, MEM);
2709 }
2710 }
put_vmov()2711 void put_vmov()
2712 {
2713 #ifdef XBYAK64
2714 put("vmovd", _XMM3, MEM|REG32);
2715 put("vmovd", MEM|REG32, _XMM3);
2716 put("vmovq", _XMM3, MEM|REG64|XMM);
2717 put("vmovq", MEM|REG64|XMM, _XMM3);
2718 put("vmovhlps", _XMM3, _XMM3, _XMM3);
2719 put("vmovlhps", _XMM3, _XMM3, _XMM3);
2720 put("vmovntdqa", _XMM3|_YMM3|ZMM, MEM);
2721 put("vmovntdq", MEM, _XMM3 | _YMM3 | ZMM);
2722 put("vmovntpd", MEM, _XMM3 | _YMM3 | ZMM);
2723 put("vmovntps", MEM, _XMM3 | _YMM3 | ZMM);
2724
2725 put("vmovsd", XMM_KZ, _XMM3, _XMM3);
2726 put("vmovsd", XMM_KZ, MEM);
2727 put("vmovsd", MEM_K, XMM);
2728 put("vmovss", XMM_KZ, _XMM3, _XMM3);
2729 put("vmovss", XMM_KZ, MEM);
2730 put("vmovss", MEM_K, XMM);
2731
2732 put("vmovshdup", _ZMM, _ZMM);
2733 put("vmovsldup", _ZMM, _ZMM);
2734
2735
2736 {
2737 const char *tbl[] = {
2738 "valignd",
2739 "valignq",
2740 };
2741 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
2742 const char *name = tbl[i];
2743 put(name, XMM_KZ, _XMM, _XMM | MEM, IMM);
2744 put(name, _YMM3, _YMM3, _YMM3, IMM);
2745 put(name, _ZMM, _ZMM, _ZMM, IMM);
2746 }
2747 }
2748 {
2749 const char tbl[][16] = {
2750 "vmovhpd",
2751 "vmovhps",
2752 "vmovlpd",
2753 "vmovlps",
2754 };
2755 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
2756 put(tbl[i], _XMM3, _XMM3, MEM);
2757 put(tbl[i], MEM, _XMM3);
2758 }
2759 }
2760 #endif
2761 }
put512_X_XM()2762 void put512_X_XM()
2763 {
2764 const struct Tbl {
2765 const char *name;
2766 bool M_X;
2767 } tbl[] = {
2768 { "vmovddup", false },
2769 { "vmovdqa32", true },
2770 { "vmovdqa64", true },
2771 { "vmovdqu8", true },
2772 { "vmovdqu16", true },
2773 { "vmovdqu32", true },
2774 { "vmovdqu64", true },
2775 { "vpabsb", false },
2776 { "vpabsw", false },
2777 { "vpabsd", false },
2778 { "vpabsq", false },
2779 };
2780 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
2781 const Tbl& p = tbl[i];
2782 put(p.name, _XMM|XMM_KZ, _XMM|MEM);
2783 put(p.name, _YMM|YMM_KZ, _YMM|MEM);
2784 put(p.name, _ZMM|ZMM_KZ, _ZMM|MEM);
2785 if (!p.M_X) continue;
2786 put(p.name, MEM, _XMM);
2787 put(p.name, MEM, _YMM);
2788 put(p.name, MEM, _ZMM);
2789 }
2790 put("vsqrtpd", XMM_KZ, M_1to2);
2791 put("vsqrtpd", YMM_KZ, M_1to4);
2792 put("vsqrtpd", ZMM_KZ, M_1to8);
2793 put("vsqrtpd", ZMM_KZ, ZMM_ER);
2794
2795 put("vsqrtps", XMM_KZ, M_1to4);
2796 put("vsqrtps", YMM_KZ, M_1to8);
2797 put("vsqrtps", ZMM_KZ, M_1to16);
2798 put("vsqrtps", ZMM_KZ, ZMM_ER);
2799
2800 put("vpabsd", ZMM_KZ, M_1to16);
2801 put("vpabsq", ZMM_KZ, M_1to8);
2802
2803 put("vbroadcastf32x2", YMM_KZ | ZMM_KZ, _XMM | _MEM);
2804 put("vbroadcastf32x4", YMM_KZ | ZMM_KZ, _MEM);
2805
2806 put("vbroadcastf64x2", YMM_KZ | ZMM_KZ, _MEM);
2807 put("vbroadcastf64x4", ZMM_KZ, _MEM);
2808 }
put512_X_X_XM()2809 void put512_X_X_XM()
2810 {
2811 const struct Tbl {
2812 const char *name;
2813 uint64_t mem;
2814 } tbl[] = {
2815 { "vsqrtsd", MEM },
2816 { "vsqrtss", MEM },
2817 { "vunpckhpd", M_1to2 },
2818 { "vunpckhps", M_1to4 },
2819 { "vunpcklpd", M_1to2 },
2820 { "vunpcklps", M_1to4 },
2821 };
2822 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
2823 const Tbl& p = tbl[i];
2824 put(p.name, XMM_KZ, _XMM, _XMM|p.mem);
2825 }
2826 }
put512_X3()2827 void put512_X3()
2828 {
2829 #ifdef XBYAK64
2830 const struct Tbl {
2831 const char *name;
2832 uint64_t x1;
2833 uint64_t x2;
2834 uint64_t xm;
2835 } tbl[] = {
2836 { "vpacksswb", XMM_KZ, _XMM, _XMM | _MEM },
2837 { "vpacksswb", YMM_KZ, _YMM, _YMM | _MEM },
2838 { "vpacksswb", ZMM_KZ, _ZMM, _ZMM | _MEM },
2839
2840 { "vpackssdw", XMM_KZ, _XMM, _XMM | M_1to4 },
2841 { "vpackssdw", YMM_KZ, _YMM, _YMM | M_1to8 },
2842 { "vpackssdw", ZMM_KZ, _ZMM, _ZMM | M_1to16 },
2843
2844 { "vpackusdw", XMM_KZ, _XMM, _XMM | M_1to4 },
2845 { "vpackusdw", YMM_KZ, _YMM, _YMM | M_1to8 },
2846 { "vpackusdw", ZMM_KZ, _ZMM, _ZMM | M_1to16 },
2847
2848 { "vpackuswb", XMM_KZ, _XMM, _XMM | _MEM },
2849 { "vpackuswb", YMM_KZ, _YMM, _YMM | _MEM },
2850 { "vpackuswb", ZMM_KZ, _ZMM, _ZMM | _MEM },
2851
2852 { "vpaddb", XMM_KZ, _XMM, _XMM | _MEM },
2853 { "vpaddw", XMM_KZ, _XMM, _XMM | _MEM },
2854 { "vpaddd", XMM_KZ, _XMM, _XMM | M_1to4 },
2855 { "vpaddq", ZMM_KZ, _ZMM, M_1to8 },
2856
2857 { "vpaddsb", XMM_KZ, _XMM, _XMM | _MEM },
2858 { "vpaddsb", ZMM_KZ, _ZMM, _ZMM | _MEM },
2859
2860 { "vpaddsw", XMM_KZ, _XMM, _XMM | _MEM },
2861 { "vpaddsw", ZMM_KZ, _ZMM, _ZMM | _MEM },
2862
2863 { "vpaddusb", XMM_KZ, _XMM, _XMM | MEM },
2864 { "vpaddusb", ZMM_KZ, _ZMM, _ZMM | MEM },
2865
2866 { "vpaddusw", XMM_KZ, _XMM, _XMM | MEM },
2867 { "vpaddusw", ZMM_KZ, _ZMM, _ZMM | MEM },
2868
2869 { "vpsubb", XMM_KZ, _XMM, _XMM | _MEM },
2870 { "vpsubw", XMM_KZ, _XMM, _XMM | _MEM },
2871 { "vpsubd", XMM_KZ, _XMM, _XMM | M_1to4 },
2872 { "vpsubq", ZMM_KZ, _ZMM, M_1to8 },
2873
2874 { "vpsubsb", XMM_KZ, _XMM, _XMM | _MEM },
2875 { "vpsubsb", ZMM_KZ, _ZMM, _ZMM | _MEM },
2876
2877 { "vpsubsw", XMM_KZ, _XMM, _XMM | _MEM },
2878 { "vpsubsw", ZMM_KZ, _ZMM, _ZMM | _MEM },
2879
2880 { "vpsubusb", XMM_KZ, _XMM, _XMM | MEM },
2881 { "vpsubusb", ZMM_KZ, _ZMM, _ZMM | MEM },
2882
2883 { "vpsubusw", XMM_KZ, _XMM, _XMM | MEM },
2884 { "vpsubusw", ZMM_KZ, _ZMM, _ZMM | MEM },
2885
2886 { "vpandd", ZMM_KZ, _ZMM, _ZMM | M_1to16 },
2887 { "vpandq", ZMM_KZ, _ZMM, _ZMM | M_1to8 },
2888
2889 { "vpandnd", ZMM_KZ, _ZMM, _ZMM | M_1to16 },
2890 { "vpandnq", ZMM_KZ, _ZMM, _ZMM | M_1to8 },
2891
2892 { "vpavgb", ZMM_KZ, _ZMM, _ZMM },
2893 { "vpavgw", ZMM_KZ, _ZMM, _ZMM },
2894
2895 { "vpcmpeqb", K2, _ZMM, _ZMM | _MEM },
2896 { "vpcmpeqw", K2, _ZMM, _ZMM | _MEM },
2897 { "vpcmpeqd", K2, _ZMM, _ZMM | M_1to16 },
2898 { "vpcmpeqq", K2, _ZMM, _ZMM | M_1to8 },
2899
2900 { "vpcmpgtb", K2, _ZMM, _ZMM | _MEM },
2901 { "vpcmpgtw", K2, _ZMM, _ZMM | _MEM },
2902 { "vpcmpgtd", K2, _ZMM, _ZMM | M_1to16 },
2903 { "vpcmpgtq", K2, _ZMM, _ZMM | M_1to8 },
2904
2905 { "vpmaddubsw", ZMM_KZ, _ZMM, _ZMM | _MEM },
2906 { "vpmaddwd", ZMM_KZ, _ZMM, _ZMM | _MEM },
2907
2908 { "vpmaxsb", ZMM_KZ, _ZMM, _ZMM | _MEM },
2909 { "vpmaxsw", ZMM_KZ, _ZMM, _ZMM | _MEM },
2910 { "vpmaxsd", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to16 },
2911 { "vpmaxsq", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to8 },
2912
2913 { "vpmaxub", ZMM_KZ, _ZMM, _ZMM | _MEM },
2914 { "vpmaxuw", ZMM_KZ, _ZMM, _ZMM | _MEM },
2915 { "vpmaxud", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to16 },
2916 { "vpmaxuq", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to8 },
2917
2918 { "vpminsb", ZMM_KZ, _ZMM, _ZMM | _MEM },
2919 { "vpminsw", ZMM_KZ, _ZMM, _ZMM | _MEM },
2920 { "vpminsd", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to16 },
2921 { "vpminsq", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to8 },
2922
2923 { "vpminub", ZMM_KZ, _ZMM, _ZMM | _MEM },
2924 { "vpminuw", ZMM_KZ, _ZMM, _ZMM | _MEM },
2925 { "vpminud", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to16 },
2926 { "vpminuq", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to8 },
2927
2928 { "vpslldq", _XMM3, _XMM3 | _MEM, IMM8 },
2929 { "vpslldq", _YMM3, _YMM3 | _MEM, IMM8 },
2930 { "vpslldq", _ZMM, _ZMM | _MEM, IMM8 },
2931
2932 { "vpsrldq", _XMM3, _XMM3 | _MEM, IMM8 },
2933 { "vpsrldq", _YMM3, _YMM3 | _MEM, IMM8 },
2934 { "vpsrldq", _ZMM, _ZMM | _MEM, IMM8 },
2935
2936 { "vpsraw", XMM_KZ, _XMM, IMM8 },
2937 { "vpsraw", ZMM_KZ, _ZMM, IMM8 },
2938
2939 { "vpsrad", XMM_KZ, _XMM | M_1to4, IMM8 },
2940 { "vpsrad", ZMM_KZ, _ZMM | M_1to16, IMM8 },
2941
2942 { "vpsraq", XMM, XMM, IMM8 },
2943 { "vpsraq", XMM_KZ, _XMM | M_1to2, IMM8 },
2944 { "vpsraq", ZMM_KZ, _ZMM | M_1to8, IMM8 },
2945
2946 { "vpsllw", _XMM3, _XMM3 | _MEM, IMM8 },
2947 { "vpslld", _XMM3, _XMM3 | _MEM | M_1to4, IMM8 },
2948 { "vpsllq", _XMM3, _XMM3 | _MEM | M_1to2, IMM8 },
2949
2950 { "vpsrlw", XMM_KZ, _XMM, IMM8 },
2951 { "vpsrlw", ZMM_KZ, _ZMM, IMM8 },
2952
2953 { "vpsrld", XMM_KZ, _XMM | M_1to4, IMM8 },
2954 { "vpsrld", ZMM_KZ, _ZMM | M_1to16, IMM8 },
2955
2956 { "vpsrlq", _XMM3, _XMM3 | _MEM | M_1to2, IMM8 },
2957 { "vpsrlq", _ZMM, _ZMM | _MEM | M_1to8, IMM8 },
2958
2959 { "vpsravw", XMM_KZ | _XMM, _XMM, _XMM },
2960 { "vpsravw", _ZMM, _ZMM, _MEM },
2961
2962 { "vpsravd", XMM_KZ | _XMM, _XMM, _XMM },
2963 { "vpsravd", _ZMM, _ZMM, M_1to16 },
2964
2965 { "vpsravq", XMM_KZ | _XMM, _XMM, _XMM },
2966 { "vpsravq", _ZMM, _ZMM, M_1to8 },
2967
2968 { "vpsllvw", XMM_KZ | _XMM, _XMM, _XMM },
2969 { "vpsllvw", _ZMM, _ZMM, _MEM },
2970
2971 { "vpsllvd", XMM_KZ | _XMM, _XMM, _XMM },
2972 { "vpsllvd", _ZMM, _ZMM, M_1to16 },
2973
2974 { "vpsllvq", XMM_KZ | _XMM, _XMM, _XMM },
2975 { "vpsllvq", _ZMM, _ZMM, M_1to8 },
2976
2977 { "vpsrlvw", XMM_KZ | _XMM, _XMM, _XMM },
2978 { "vpsrlvw", _ZMM, _ZMM, _MEM },
2979
2980 { "vpsrlvd", XMM_KZ | _XMM, _XMM, _XMM },
2981 { "vpsrlvd", _ZMM, _ZMM, M_1to16 },
2982
2983 { "vpsrlvq", XMM_KZ | _XMM, _XMM, _XMM },
2984 { "vpsrlvq", _ZMM, _ZMM, M_1to8 },
2985
2986 { "vpshufb", _XMM | XMM_KZ, _XMM, _XMM },
2987 { "vpshufb", ZMM_KZ, _ZMM, _MEM },
2988
2989 { "vpshufhw", _XMM | XMM_KZ, _XMM, IMM8 },
2990 { "vpshufhw", ZMM_KZ, _MEM, IMM8 },
2991
2992 { "vpshuflw", _XMM | XMM_KZ, _XMM, IMM8 },
2993 { "vpshuflw", ZMM_KZ, _MEM, IMM8 },
2994
2995 { "vpshufd", _XMM | XMM_KZ, _XMM | M_1to4, IMM8 },
2996 { "vpshufd", _ZMM | ZMM_KZ, _ZMM | M_1to16, IMM8 },
2997
2998 { "vpord", _XMM | XMM_KZ, _XMM, _XMM | M_1to4 },
2999 { "vpord", _ZMM | ZMM_KZ, _ZMM, M_1to16 },
3000
3001 { "vporq", _XMM | XMM_KZ, _XMM, _XMM | M_1to2 },
3002 { "vporq", _ZMM | ZMM_KZ, _ZMM, M_1to8 },
3003
3004 { "vpxord", _XMM | XMM_KZ, _XMM, _XMM | M_1to4 },
3005 { "vpxord", _ZMM | ZMM_KZ, _ZMM, M_1to16 },
3006
3007 { "vpxorq", _XMM | XMM_KZ, _XMM, _XMM | M_1to2 },
3008 { "vpxorq", _ZMM | ZMM_KZ, _ZMM, M_1to8 },
3009
3010 { "vpsadbw", _XMM3, _XMM, _XMM },
3011 { "vpsadbw", _ZMM, _ZMM, _MEM },
3012
3013 { "vpmuldq", _XMM3, _XMM, _XMM | M_1to2 },
3014 { "vpmuldq", ZMM_KZ, _ZMM, M_1to8 },
3015
3016 { "vpmulhrsw", _XMM3, _XMM, _XMM },
3017 { "vpmulhrsw", ZMM_KZ, _ZMM, _MEM },
3018
3019 { "vpmulhuw", _XMM3, _XMM, _XMM },
3020 { "vpmulhuw", ZMM_KZ, _ZMM, _MEM },
3021
3022 { "vpmulhw", _XMM3, _XMM, _XMM },
3023 { "vpmulhw", ZMM_KZ, _ZMM, _MEM },
3024
3025 { "vpmullw", _XMM3, _XMM, _XMM },
3026 { "vpmullw", ZMM_KZ, _ZMM, _MEM },
3027
3028 { "vpmulld", _XMM3, _XMM, M_1to4 },
3029 { "vpmulld", ZMM_KZ, _ZMM, M_1to16 },
3030
3031 { "vpmullq", _XMM3, _XMM, M_1to2 },
3032 { "vpmullq", ZMM_KZ, _ZMM, M_1to8 },
3033
3034 { "vpmuludq", _XMM3, _XMM, M_1to2 },
3035 { "vpmuludq", ZMM_KZ, _ZMM, M_1to8 },
3036
3037 { "vpunpckhbw", _XMM3, _XMM, _XMM },
3038 { "vpunpckhbw", _ZMM, _ZMM, _MEM },
3039
3040 { "vpunpckhwd", _XMM3, _XMM, _XMM },
3041 { "vpunpckhwd", _ZMM, _ZMM, _MEM },
3042
3043 { "vpunpckhdq", _XMM3, _XMM, M_1to4 },
3044 { "vpunpckhdq", _ZMM, _ZMM, M_1to16 },
3045
3046 { "vpunpckhqdq", _XMM3, _XMM, M_1to2 },
3047 { "vpunpckhqdq", _ZMM, _ZMM, M_1to8 },
3048
3049 { "vpunpcklbw", _XMM3, _XMM, _XMM },
3050 { "vpunpcklbw", _ZMM, _ZMM, _MEM },
3051
3052 { "vpunpcklwd", _XMM3, _XMM, _XMM },
3053 { "vpunpcklwd", _ZMM, _ZMM, _MEM },
3054
3055 { "vpunpckldq", _XMM3, _XMM, M_1to4 },
3056 { "vpunpckldq", _ZMM, _ZMM, M_1to16 },
3057
3058 { "vpunpcklqdq", _XMM3, _XMM, M_1to2 },
3059 { "vpunpcklqdq", _ZMM, _ZMM, M_1to8 },
3060
3061 { "vextractf32x4", _XMM | XMM_KZ | _MEM, _YMM | _ZMM, IMM8 },
3062 { "vextractf64x2", _XMM | XMM_KZ | _MEM, _YMM | _ZMM, IMM8 },
3063 { "vextractf32x8", _YMM | YMM_KZ | _MEM, _ZMM, IMM8 },
3064 { "vextractf64x4", _YMM | YMM_KZ | _MEM, _ZMM, IMM8 },
3065
3066 { "vextracti32x4", _XMM | XMM_KZ | _MEM, _YMM | _ZMM, IMM8 },
3067 { "vextracti64x2", _XMM | XMM_KZ | _MEM, _YMM | _ZMM, IMM8 },
3068 { "vextracti32x8", _YMM | YMM_KZ | _MEM, _ZMM, IMM8 },
3069 { "vextracti64x4", _YMM | YMM_KZ | _MEM, _ZMM, IMM8 },
3070
3071 { "vextractps", REG32 | _MEM, _XMM3, IMM8 },
3072
3073 { "vpermb", XMM_KZ, _XMM, _XMM },
3074 { "vpermb", ZMM_KZ, _ZMM, _ZMM | _MEM },
3075
3076 { "vpermw", XMM_KZ, _XMM, _XMM },
3077 { "vpermw", ZMM_KZ, _ZMM, _ZMM | _MEM },
3078
3079 { "vpermd", YMM_KZ, _YMM, _YMM | M_1to8 },
3080 { "vpermd", ZMM_KZ, _ZMM, _ZMM | M_1to16 },
3081
3082 { "vpermilpd", XMM_KZ, _XMM, _XMM | M_1to2 },
3083 { "vpermilpd", ZMM_KZ, _ZMM, M_1to8 },
3084 { "vpermilpd", XMM_KZ, M_1to2, IMM8 },
3085 { "vpermilpd", ZMM_KZ, M_1to8, IMM8 },
3086
3087 { "vpermilps", XMM_KZ, _XMM, _XMM | M_1to4 },
3088 { "vpermilps", ZMM_KZ, _ZMM, M_1to16 },
3089 { "vpermilps", XMM_KZ, M_1to4, IMM8 },
3090 { "vpermilps", ZMM_KZ, M_1to16, IMM8 },
3091
3092 { "vpermpd", YMM_KZ, _YMM | M_1to4, IMM8 },
3093 { "vpermpd", ZMM_KZ, _ZMM | M_1to8, IMM8 },
3094 { "vpermpd", YMM_KZ, _YMM, M_1to4 },
3095 { "vpermpd", ZMM_KZ, _ZMM, M_1to8 },
3096
3097 { "vpermps", YMM_KZ, _YMM, M_1to8 },
3098 { "vpermps", ZMM_KZ, _ZMM, M_1to16 },
3099
3100 { "vpermq", YMM_KZ, _YMM | M_1to4, IMM8 },
3101 { "vpermq", ZMM_KZ, _ZMM | M_1to8, IMM8 },
3102 { "vpermq", YMM_KZ, _YMM, M_1to4 },
3103 { "vpermq", ZMM_KZ, _ZMM, M_1to8 },
3104 };
3105 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
3106 const Tbl& p = tbl[i];
3107 put(p.name, p.x1, p.x2, p.xm);
3108 }
3109 #endif
3110 }
put512_X3_I()3111 void put512_X3_I()
3112 {
3113 const struct Tbl {
3114 const char *name;
3115 uint64_t x1;
3116 uint64_t x2;
3117 uint64_t xm;
3118 } tbl[] = {
3119 #ifdef XBYAK64
3120 { "vinsertps", _XMM, _XMM, _XMM3 },
3121
3122 { "vshufpd", XMM_KZ, _XMM, M_1to2 },
3123 { "vshufpd", ZMM_KZ, _ZMM, M_1to8 },
3124
3125 { "vshufps", XMM_KZ, _XMM, M_1to4 },
3126 { "vshufps", ZMM_KZ, _ZMM, M_1to16 },
3127
3128 { "vinsertf32x4", _YMM | YMM_KZ, _YMM, _XMM | _MEM },
3129 { "vinsertf32x4", _ZMM | ZMM_KZ, _ZMM, _XMM | _MEM },
3130
3131 { "vinsertf64x2", _YMM | YMM_KZ, _YMM, _XMM | _MEM },
3132 { "vinsertf64x2", _ZMM | ZMM_KZ, _ZMM, _XMM | _MEM },
3133
3134 { "vinsertf32x8", _ZMM | ZMM_KZ, _ZMM, _YMM | _MEM },
3135 { "vinsertf64x4", _ZMM | ZMM_KZ, _ZMM, _YMM | _MEM },
3136
3137 { "vinserti32x4", _YMM | YMM_KZ, _YMM, _XMM | _MEM },
3138 { "vinserti32x4", _ZMM | ZMM_KZ, _ZMM, _XMM | _MEM },
3139
3140 { "vinserti64x2", _YMM | YMM_KZ, _YMM, _XMM | _MEM },
3141 { "vinserti64x2", _ZMM | ZMM_KZ, _ZMM, _XMM | _MEM },
3142
3143 { "vinserti32x8", _ZMM | ZMM_KZ, _ZMM, _YMM | _MEM },
3144 { "vinserti64x4", _ZMM | ZMM_KZ, _ZMM, _YMM | _MEM },
3145 #endif
3146 { "vpalignr", ZMM_KZ, _ZMM, _ZMM },
3147 };
3148 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
3149 const Tbl& p = tbl[i];
3150 put(p.name, p.x1, p.x2, p.xm, IMM8);
3151 }
3152 #ifdef XBYAK64
3153 put("vpextrb", _REG64, _XMM3, IMM8);
3154 put("vpextrw", _REG64|MEM, _XMM3, IMM8);
3155 put("vpextrd", _REG32, _XMM3, IMM8);
3156 put("vpextrq", _REG64, _XMM3, IMM8);
3157 put("vpinsrb", _XMM3, _XMM3, _REG32, IMM8);
3158 put("vpinsrw", _XMM3, _XMM3, _REG32, IMM8);
3159 put("vpinsrd", _XMM3, _XMM3, _REG32, IMM8);
3160 put("vpinsrq", _XMM3, _XMM3, _REG64, IMM8);
3161 #endif
3162 }
put512_FMA()3163 void put512_FMA()
3164 {
3165 const struct Tbl {
3166 const char *name;
3167 bool supportYMM;
3168 } tbl[] = {
3169 { "vfmadd", true },
3170 { "vfmadd", false },
3171 { "vfmaddsub", true },
3172 { "vfmsubadd", true },
3173 { "vfmsub", true },
3174 { "vfmsub", false },
3175 { "vfnmadd", true },
3176 { "vfnmadd", false },
3177 { "vfnmsub", true },
3178 { "vfnmsub", false },
3179 };
3180 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
3181 const Tbl& p = tbl[i];
3182 const struct Ord {
3183 const char *name;
3184 } ord[] = {
3185 { "132" },
3186 { "213" },
3187 { "231" },
3188 };
3189 for (size_t j = 0; j < NUM_OF_ARRAY(ord); j++) {
3190 const char sufTbl[][2][8] = {
3191 { "pd", "ps" },
3192 { "sd", "ss" },
3193 };
3194 for (size_t k = 0; k < 2; k++) {
3195 const std::string suf = sufTbl[p.supportYMM ? 0 : 1][k];
3196 uint64_t mem = 0;
3197 if (suf == "pd") {
3198 mem = M_1to2;
3199 } else if (suf == "ps") {
3200 mem = M_1to4;
3201 } else {
3202 mem = XMM_ER;
3203 }
3204 std::string name = std::string(p.name) + ord[j].name + suf;
3205 const char *q = name.c_str();
3206 put(q, XMM_KZ, _XMM, mem);
3207 if (!p.supportYMM) continue;
3208 if (suf == "pd") {
3209 mem = M_1to8;
3210 } else if (suf == "ps") {
3211 mem = M_1to16;
3212 } else {
3213 mem = XMM_ER;
3214 }
3215 put(q, _ZMM, _ZMM, mem);
3216 }
3217 }
3218 }
3219 }
put512_Y_XM()3220 void put512_Y_XM()
3221 {
3222 const char *tbl[] = {
3223 "vpmovsxbw",
3224 "vpmovsxbd",
3225 "vpmovsxbq",
3226 "vpmovsxwd",
3227 "vpmovsxwq",
3228 "vpmovsxdq",
3229 "vpmovzxbw",
3230 "vpmovzxbd",
3231 "vpmovzxbq",
3232 "vpmovzxwd",
3233 "vpmovzxwq",
3234 "vpmovzxdq",
3235 };
3236 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
3237 const char *name = tbl[i];
3238 put(name, XMM_KZ, _XMM);
3239 put(name, _ZMM, _MEM);
3240 }
3241 }
put512_AVX1()3242 void put512_AVX1()
3243 {
3244 #ifdef XBYAK64
3245 const struct Tbl {
3246 std::string name;
3247 bool only_pd_ps;
3248 } tbl[] = {
3249 { "vadd", false },
3250 { "vsub", false },
3251 { "vmul", false },
3252 { "vdiv", false },
3253 { "vmax", false },
3254 { "vmin", false },
3255 { "vand", true },
3256 { "vandn", true },
3257 { "vor", true },
3258 { "vxor", true },
3259 };
3260 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
3261 const struct Suf {
3262 const char *suf;
3263 bool supportYMM;
3264 } sufTbl[] = {
3265 { "pd", true },
3266 { "ps", true },
3267 { "sd", false },
3268 { "ss", false },
3269 };
3270 for (size_t j = 0; j < NUM_OF_ARRAY(sufTbl); j++) {
3271 if (tbl[i].only_pd_ps && j == 2) break;
3272 std::string suf = sufTbl[j].suf;
3273 std::string name = tbl[i].name + suf;
3274 const char *p = name.c_str();
3275 uint64_t mem = 0;
3276 if (suf == "pd") {
3277 mem = M_1to2;
3278 } else if (suf == "ps") {
3279 mem = M_1to4;
3280 }
3281 put(p, _XMM3 | XMM_KZ, _XMM, mem);
3282 if (!sufTbl[j].supportYMM) continue;
3283 mem = 0;
3284 if (suf == "pd") {
3285 mem = M_1to8;
3286 } else if (suf == "ps") {
3287 mem = M_1to16;
3288 }
3289 put(p, _ZMM, _ZMM, mem);
3290 }
3291 }
3292 #endif
3293 }
put512_cvt()3294 void put512_cvt()
3295 {
3296 #ifdef XBYAK64
3297 put("vcvtdq2pd", XMM_KZ, _XMM | M_1to2);
3298 put("vcvtdq2pd", YMM_KZ, _XMM | M_1to4);
3299 put("vcvtdq2pd", ZMM_KZ, _YMM | M_1to8);
3300
3301 put("vcvtdq2ps", XMM_KZ, _XMM | M_1to4);
3302 put("vcvtdq2ps", YMM_KZ, _YMM | M_1to8);
3303 put("vcvtdq2ps", ZMM_KZ, _ZMM | M_1to16);
3304
3305 put("vcvtpd2dq", XMM_KZ, _XMM | _YMM | M_1to2);
3306 put("vcvtpd2dq", YMM_KZ, _ZMM | ZMM_ER | M_1to8);
3307 #endif
3308 }
putMin()3309 void putMin()
3310 {
3311 #ifdef XBYAK64
3312 put("vcvtpd2dq", _XMM | _XMM3, _XMM | M_xword | M_1to2);
3313 put("vcvtpd2dq", _XMM | _XMM3, _YMM | M_yword | MY_1to4);
3314 #endif
3315 }
putAVX512()3316 void putAVX512()
3317 {
3318 #ifdef MIN_TEST
3319 putMin();
3320 #else
3321 putOpmask();
3322 separateFunc();
3323 putCombi();
3324 separateFunc();
3325 putCmpK();
3326 separateFunc();
3327 putBroadcast();
3328 separateFunc();
3329 putAVX512_M_X();
3330 separateFunc();
3331 put_vmov();
3332 separateFunc();
3333 put512_X_XM();
3334 separateFunc();
3335 put512_X_X_XM();
3336 separateFunc();
3337 put512_X3();
3338 separateFunc();
3339 put512_X3_I();
3340 separateFunc();
3341 put512_FMA();
3342 separateFunc();
3343 put512_Y_XM();
3344 separateFunc();
3345 put512_AVX1();
3346 separateFunc();
3347 put512_cvt();
3348 #endif
3349 }
3350 #endif
3351 };
3352
main(int argc,char * [])3353 int main(int argc, char *[])
3354 {
3355 Test test(argc > 1);
3356 test.put();
3357 }
3358