xref: /qemu/tests/tcg/i386/test-avx.c (revision 0339ddfa)
191117bc5SPaul Brook #include <stdio.h>
291117bc5SPaul Brook #include <stdint.h>
391117bc5SPaul Brook #include <stdlib.h>
491117bc5SPaul Brook #include <string.h>
591117bc5SPaul Brook 
691117bc5SPaul Brook typedef void (*testfn)(void);
791117bc5SPaul Brook 
891117bc5SPaul Brook typedef struct {
90339ddfaSPaolo Bonzini     uint64_t q0, q1, q2, q3;
100339ddfaSPaolo Bonzini } __attribute__((aligned(32))) v4di;
1191117bc5SPaul Brook 
1291117bc5SPaul Brook typedef struct {
1391117bc5SPaul Brook     uint64_t mm[8];
140339ddfaSPaolo Bonzini     v4di ymm[16];
1591117bc5SPaul Brook     uint64_t r[16];
1691117bc5SPaul Brook     uint64_t flags;
1791117bc5SPaul Brook     uint32_t ff;
1891117bc5SPaul Brook     uint64_t pad;
190339ddfaSPaolo Bonzini     v4di mem[4];
200339ddfaSPaolo Bonzini     v4di mem0[4];
2191117bc5SPaul Brook } reg_state;
2291117bc5SPaul Brook 
2391117bc5SPaul Brook typedef struct {
2491117bc5SPaul Brook     int n;
2591117bc5SPaul Brook     testfn fn;
2691117bc5SPaul Brook     const char *s;
2791117bc5SPaul Brook     reg_state *init;
2891117bc5SPaul Brook } TestDef;
2991117bc5SPaul Brook 
3091117bc5SPaul Brook reg_state initI;
3191117bc5SPaul Brook reg_state initF32;
3291117bc5SPaul Brook reg_state initF64;
3391117bc5SPaul Brook 
340339ddfaSPaolo Bonzini static void dump_ymm(const char *name, int n, const v4di *r, int ff)
3591117bc5SPaul Brook {
360339ddfaSPaolo Bonzini     printf("%s%d = %016lx %016lx %016lx %016lx\n",
370339ddfaSPaolo Bonzini            name, n, r->q3, r->q2, r->q1, r->q0);
3891117bc5SPaul Brook     if (ff == 64) {
390339ddfaSPaolo Bonzini         double v[4];
4091117bc5SPaul Brook         memcpy(v, r, sizeof(v));
410339ddfaSPaolo Bonzini         printf("        %16g %16g %16g %16g\n",
4291117bc5SPaul Brook                 v[3], v[2], v[1], v[0]);
430339ddfaSPaolo Bonzini     } else if (ff == 32) {
440339ddfaSPaolo Bonzini         float v[8];
450339ddfaSPaolo Bonzini         memcpy(v, r, sizeof(v));
460339ddfaSPaolo Bonzini         printf(" %8g %8g %8g %8g %8g %8g %8g %8g\n",
470339ddfaSPaolo Bonzini                 v[7], v[6], v[5], v[4], v[3], v[2], v[1], v[0]);
4891117bc5SPaul Brook     }
4991117bc5SPaul Brook }
5091117bc5SPaul Brook 
5191117bc5SPaul Brook static void dump_regs(reg_state *s)
5291117bc5SPaul Brook {
5391117bc5SPaul Brook     int i;
5491117bc5SPaul Brook 
5591117bc5SPaul Brook     for (i = 0; i < 16; i++) {
560339ddfaSPaolo Bonzini         dump_ymm("ymm", i, &s->ymm[i], 0);
5791117bc5SPaul Brook     }
5891117bc5SPaul Brook     for (i = 0; i < 4; i++) {
590339ddfaSPaolo Bonzini         dump_ymm("mem", i, &s->mem0[i], 0);
6091117bc5SPaul Brook     }
6191117bc5SPaul Brook }
6291117bc5SPaul Brook 
6391117bc5SPaul Brook static void compare_state(const reg_state *a, const reg_state *b)
6491117bc5SPaul Brook {
6591117bc5SPaul Brook     int i;
6691117bc5SPaul Brook     for (i = 0; i < 8; i++) {
6791117bc5SPaul Brook         if (a->mm[i] != b->mm[i]) {
6891117bc5SPaul Brook             printf("MM%d = %016lx\n", i, b->mm[i]);
6991117bc5SPaul Brook         }
7091117bc5SPaul Brook     }
7191117bc5SPaul Brook     for (i = 0; i < 16; i++) {
7291117bc5SPaul Brook         if (a->r[i] != b->r[i]) {
7391117bc5SPaul Brook             printf("r%d = %016lx\n", i, b->r[i]);
7491117bc5SPaul Brook         }
7591117bc5SPaul Brook     }
7691117bc5SPaul Brook     for (i = 0; i < 16; i++) {
770339ddfaSPaolo Bonzini         if (memcmp(&a->ymm[i], &b->ymm[i], 32)) {
780339ddfaSPaolo Bonzini             dump_ymm("ymm", i, &b->ymm[i], a->ff);
7991117bc5SPaul Brook         }
8091117bc5SPaul Brook     }
8191117bc5SPaul Brook     for (i = 0; i < 4; i++) {
820339ddfaSPaolo Bonzini         if (memcmp(&a->mem0[i], &a->mem[i], 32)) {
830339ddfaSPaolo Bonzini             dump_ymm("mem", i, &a->mem[i], a->ff);
8491117bc5SPaul Brook         }
8591117bc5SPaul Brook     }
8691117bc5SPaul Brook     if (a->flags != b->flags) {
8791117bc5SPaul Brook         printf("FLAGS = %016lx\n", b->flags);
8891117bc5SPaul Brook     }
8991117bc5SPaul Brook }
9091117bc5SPaul Brook 
9191117bc5SPaul Brook #define LOADMM(r, o) "movq " #r ", " #o "[%0]\n\t"
920339ddfaSPaolo Bonzini #define LOADYMM(r, o) "vmovdqa " #r ", " #o "[%0]\n\t"
9391117bc5SPaul Brook #define STOREMM(r, o) "movq " #o "[%1], " #r "\n\t"
940339ddfaSPaolo Bonzini #define STOREYMM(r, o) "vmovdqa " #o "[%1], " #r "\n\t"
9591117bc5SPaul Brook #define MMREG(F) \
9691117bc5SPaul Brook     F(mm0, 0x00) \
9791117bc5SPaul Brook     F(mm1, 0x08) \
9891117bc5SPaul Brook     F(mm2, 0x10) \
9991117bc5SPaul Brook     F(mm3, 0x18) \
10091117bc5SPaul Brook     F(mm4, 0x20) \
10191117bc5SPaul Brook     F(mm5, 0x28) \
10291117bc5SPaul Brook     F(mm6, 0x30) \
10391117bc5SPaul Brook     F(mm7, 0x38)
1040339ddfaSPaolo Bonzini #define YMMREG(F) \
1050339ddfaSPaolo Bonzini     F(ymm0, 0x040) \
1060339ddfaSPaolo Bonzini     F(ymm1, 0x060) \
1070339ddfaSPaolo Bonzini     F(ymm2, 0x080) \
1080339ddfaSPaolo Bonzini     F(ymm3, 0x0a0) \
1090339ddfaSPaolo Bonzini     F(ymm4, 0x0c0) \
1100339ddfaSPaolo Bonzini     F(ymm5, 0x0e0) \
1110339ddfaSPaolo Bonzini     F(ymm6, 0x100) \
1120339ddfaSPaolo Bonzini     F(ymm7, 0x120) \
1130339ddfaSPaolo Bonzini     F(ymm8, 0x140) \
1140339ddfaSPaolo Bonzini     F(ymm9, 0x160) \
1150339ddfaSPaolo Bonzini     F(ymm10, 0x180) \
1160339ddfaSPaolo Bonzini     F(ymm11, 0x1a0) \
1170339ddfaSPaolo Bonzini     F(ymm12, 0x1c0) \
1180339ddfaSPaolo Bonzini     F(ymm13, 0x1e0) \
1190339ddfaSPaolo Bonzini     F(ymm14, 0x200) \
1200339ddfaSPaolo Bonzini     F(ymm15, 0x220)
12191117bc5SPaul Brook #define LOADREG(r, o) "mov " #r ", " #o "[rax]\n\t"
12291117bc5SPaul Brook #define STOREREG(r, o) "mov " #o "[rax], " #r "\n\t"
12391117bc5SPaul Brook #define REG(F) \
1240339ddfaSPaolo Bonzini     F(rbx, 0x248) \
1250339ddfaSPaolo Bonzini     F(rcx, 0x250) \
1260339ddfaSPaolo Bonzini     F(rdx, 0x258) \
1270339ddfaSPaolo Bonzini     F(rsi, 0x260) \
1280339ddfaSPaolo Bonzini     F(rdi, 0x268) \
1290339ddfaSPaolo Bonzini     F(r8, 0x280) \
1300339ddfaSPaolo Bonzini     F(r9, 0x288) \
1310339ddfaSPaolo Bonzini     F(r10, 0x290) \
1320339ddfaSPaolo Bonzini     F(r11, 0x298) \
1330339ddfaSPaolo Bonzini     F(r12, 0x2a0) \
1340339ddfaSPaolo Bonzini     F(r13, 0x2a8) \
1350339ddfaSPaolo Bonzini     F(r14, 0x2b0) \
1360339ddfaSPaolo Bonzini     F(r15, 0x2b8) \
13791117bc5SPaul Brook 
13891117bc5SPaul Brook static void run_test(const TestDef *t)
13991117bc5SPaul Brook {
14091117bc5SPaul Brook     reg_state result;
14191117bc5SPaul Brook     reg_state *init = t->init;
14291117bc5SPaul Brook     memcpy(init->mem, init->mem0, sizeof(init->mem));
14391117bc5SPaul Brook     printf("%5d %s\n", t->n, t->s);
14491117bc5SPaul Brook     asm volatile(
14591117bc5SPaul Brook             MMREG(LOADMM)
1460339ddfaSPaolo Bonzini             YMMREG(LOADYMM)
14791117bc5SPaul Brook             "sub rsp, 128\n\t"
14891117bc5SPaul Brook             "push rax\n\t"
14991117bc5SPaul Brook             "push rbx\n\t"
15091117bc5SPaul Brook             "push rcx\n\t"
15191117bc5SPaul Brook             "push rdx\n\t"
15291117bc5SPaul Brook             "push %1\n\t"
15391117bc5SPaul Brook             "push %2\n\t"
15491117bc5SPaul Brook             "mov rax, %0\n\t"
15591117bc5SPaul Brook             "pushf\n\t"
15691117bc5SPaul Brook             "pop rbx\n\t"
15791117bc5SPaul Brook             "shr rbx, 8\n\t"
15891117bc5SPaul Brook             "shl rbx, 8\n\t"
1590339ddfaSPaolo Bonzini             "mov rcx, 0x2c0[rax]\n\t"
16091117bc5SPaul Brook             "and rcx, 0xff\n\t"
16191117bc5SPaul Brook             "or rbx, rcx\n\t"
16291117bc5SPaul Brook             "push rbx\n\t"
16391117bc5SPaul Brook             "popf\n\t"
16491117bc5SPaul Brook             REG(LOADREG)
1650339ddfaSPaolo Bonzini             "mov rax, 0x240[rax]\n\t"
16691117bc5SPaul Brook             "call [rsp]\n\t"
16791117bc5SPaul Brook             "mov [rsp], rax\n\t"
16891117bc5SPaul Brook             "mov rax, 8[rsp]\n\t"
16991117bc5SPaul Brook             REG(STOREREG)
17091117bc5SPaul Brook             "mov rbx, [rsp]\n\t"
1710339ddfaSPaolo Bonzini             "mov 0x240[rax], rbx\n\t"
17291117bc5SPaul Brook             "mov rbx, 0\n\t"
1730339ddfaSPaolo Bonzini             "mov 0x270[rax], rbx\n\t"
1740339ddfaSPaolo Bonzini             "mov 0x278[rax], rbx\n\t"
17591117bc5SPaul Brook             "pushf\n\t"
17691117bc5SPaul Brook             "pop rbx\n\t"
17791117bc5SPaul Brook             "and rbx, 0xff\n\t"
1780339ddfaSPaolo Bonzini             "mov 0x2c0[rax], rbx\n\t"
17991117bc5SPaul Brook             "add rsp, 16\n\t"
18091117bc5SPaul Brook             "pop rdx\n\t"
18191117bc5SPaul Brook             "pop rcx\n\t"
18291117bc5SPaul Brook             "pop rbx\n\t"
18391117bc5SPaul Brook             "pop rax\n\t"
18491117bc5SPaul Brook             "add rsp, 128\n\t"
18591117bc5SPaul Brook             MMREG(STOREMM)
1860339ddfaSPaolo Bonzini             YMMREG(STOREYMM)
18791117bc5SPaul Brook             : : "r"(init), "r"(&result), "r"(t->fn)
18891117bc5SPaul Brook             : "memory", "cc",
18991117bc5SPaul Brook             "rsi", "rdi",
19091117bc5SPaul Brook             "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
19191117bc5SPaul Brook             "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7",
1920339ddfaSPaolo Bonzini             "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5",
1930339ddfaSPaolo Bonzini             "ymm6", "ymm7", "ymm8", "ymm9", "ymm10", "ymm11",
1940339ddfaSPaolo Bonzini             "ymm12", "ymm13", "ymm14", "ymm15"
19591117bc5SPaul Brook             );
19691117bc5SPaul Brook     compare_state(init, &result);
19791117bc5SPaul Brook }
19891117bc5SPaul Brook 
19991117bc5SPaul Brook #define TEST(n, cmd, type) \
20091117bc5SPaul Brook static void __attribute__((naked)) test_##n(void) \
20191117bc5SPaul Brook { \
20291117bc5SPaul Brook     asm volatile(cmd); \
20391117bc5SPaul Brook     asm volatile("ret"); \
20491117bc5SPaul Brook }
20591117bc5SPaul Brook #include "test-avx.h"
20691117bc5SPaul Brook 
20791117bc5SPaul Brook 
20891117bc5SPaul Brook static const TestDef test_table[] = {
20991117bc5SPaul Brook #define TEST(n, cmd, type) {n, test_##n, cmd, &init##type},
21091117bc5SPaul Brook #include "test-avx.h"
21191117bc5SPaul Brook     {-1, NULL, "", NULL}
21291117bc5SPaul Brook };
21391117bc5SPaul Brook 
21491117bc5SPaul Brook static void run_all(void)
21591117bc5SPaul Brook {
21691117bc5SPaul Brook     const TestDef *t;
21791117bc5SPaul Brook     for (t = test_table; t->fn; t++) {
21891117bc5SPaul Brook         run_test(t);
21991117bc5SPaul Brook     }
22091117bc5SPaul Brook }
22191117bc5SPaul Brook 
22291117bc5SPaul Brook #define ARRAY_LEN(x) (sizeof(x) / sizeof(x[0]))
22391117bc5SPaul Brook 
22491117bc5SPaul Brook float val_f32[] = {2.0, -1.0, 4.8, 0.8, 3, -42.0, 5e6, 7.5, 8.3};
22591117bc5SPaul Brook double val_f64[] = {2.0, -1.0, 4.8, 0.8, 3, -42.0, 5e6, 7.5};
2260339ddfaSPaolo Bonzini v4di val_i64[] = {
2270339ddfaSPaolo Bonzini     {0x3d6b3b6a9e4118f2lu, 0x355ae76d2774d78clu,
2280339ddfaSPaolo Bonzini      0xac3ff76c4daa4b28lu, 0xe7fabd204cb54083lu},
2290339ddfaSPaolo Bonzini     {0xd851c54a56bf1f29lu, 0x4a84d1d50bf4c4fflu,
2300339ddfaSPaolo Bonzini      0x56621e553d52b56clu, 0xd0069553da8f584alu},
2310339ddfaSPaolo Bonzini     {0x5826475e2c5fd799lu, 0xfd32edc01243f5e9lu,
2320339ddfaSPaolo Bonzini      0x738ba2c66d3fe126lu, 0x5707219c6e6c26b4lu},
23391117bc5SPaul Brook };
23491117bc5SPaul Brook 
2350339ddfaSPaolo Bonzini v4di deadbeef = {0xa5a5a5a5deadbeefull, 0xa5a5a5a5deadbeefull,
2360339ddfaSPaolo Bonzini                  0xa5a5a5a5deadbeefull, 0xa5a5a5a5deadbeefull};
2370339ddfaSPaolo Bonzini v4di indexq = {0x000000000000001full, 0x000000000000008full,
2380339ddfaSPaolo Bonzini                0xffffffffffffffffull, 0xffffffffffffff5full};
2390339ddfaSPaolo Bonzini v4di indexd = {0x00000002000000efull, 0xfffffff500000010ull,
2400339ddfaSPaolo Bonzini                0x0000000afffffff0ull, 0x000000000000000eull};
24191117bc5SPaul Brook 
2420339ddfaSPaolo Bonzini v4di gather_mem[0x20];
2430339ddfaSPaolo Bonzini 
2440339ddfaSPaolo Bonzini void init_f32reg(v4di *r)
24591117bc5SPaul Brook {
24691117bc5SPaul Brook     static int n;
2470339ddfaSPaolo Bonzini     float v[8];
24891117bc5SPaul Brook     int i;
2490339ddfaSPaolo Bonzini     for (i = 0; i < 8; i++) {
25091117bc5SPaul Brook         v[i] = val_f32[n++];
25191117bc5SPaul Brook         if (n == ARRAY_LEN(val_f32)) {
25291117bc5SPaul Brook             n = 0;
25391117bc5SPaul Brook         }
25491117bc5SPaul Brook     }
25591117bc5SPaul Brook     memcpy(r, v, sizeof(*r));
25691117bc5SPaul Brook }
25791117bc5SPaul Brook 
2580339ddfaSPaolo Bonzini void init_f64reg(v4di *r)
25991117bc5SPaul Brook {
26091117bc5SPaul Brook     static int n;
2610339ddfaSPaolo Bonzini     double v[4];
26291117bc5SPaul Brook     int i;
2630339ddfaSPaolo Bonzini     for (i = 0; i < 4; i++) {
26491117bc5SPaul Brook         v[i] = val_f64[n++];
26591117bc5SPaul Brook         if (n == ARRAY_LEN(val_f64)) {
26691117bc5SPaul Brook             n = 0;
26791117bc5SPaul Brook         }
26891117bc5SPaul Brook     }
26991117bc5SPaul Brook     memcpy(r, v, sizeof(*r));
27091117bc5SPaul Brook }
27191117bc5SPaul Brook 
2720339ddfaSPaolo Bonzini void init_intreg(v4di *r)
27391117bc5SPaul Brook {
27491117bc5SPaul Brook     static uint64_t mask;
27591117bc5SPaul Brook     static int n;
27691117bc5SPaul Brook 
27791117bc5SPaul Brook     r->q0 = val_i64[n].q0 ^ mask;
27891117bc5SPaul Brook     r->q1 = val_i64[n].q1 ^ mask;
2790339ddfaSPaolo Bonzini     r->q2 = val_i64[n].q2 ^ mask;
2800339ddfaSPaolo Bonzini     r->q3 = val_i64[n].q3 ^ mask;
28191117bc5SPaul Brook     n++;
28291117bc5SPaul Brook     if (n == ARRAY_LEN(val_i64)) {
28391117bc5SPaul Brook         n = 0;
28491117bc5SPaul Brook         mask *= 0x104C11DB7;
28591117bc5SPaul Brook     }
28691117bc5SPaul Brook }
28791117bc5SPaul Brook 
28891117bc5SPaul Brook static void init_all(reg_state *s)
28991117bc5SPaul Brook {
29091117bc5SPaul Brook     int i;
29191117bc5SPaul Brook 
29291117bc5SPaul Brook     s->r[3] = (uint64_t)&s->mem[0]; /* rdx */
2930339ddfaSPaolo Bonzini     s->r[4] = (uint64_t)&gather_mem[ARRAY_LEN(gather_mem) / 2]; /* rsi */
29491117bc5SPaul Brook     s->r[5] = (uint64_t)&s->mem[2]; /* rdi */
29591117bc5SPaul Brook     s->flags = 2;
2960339ddfaSPaolo Bonzini     for (i = 0; i < 16; i++) {
2970339ddfaSPaolo Bonzini         s->ymm[i] = deadbeef;
29891117bc5SPaul Brook     }
2990339ddfaSPaolo Bonzini     s->ymm[13] = indexd;
3000339ddfaSPaolo Bonzini     s->ymm[14] = indexq;
3010339ddfaSPaolo Bonzini     for (i = 0; i < 4; i++) {
30291117bc5SPaul Brook         s->mem0[i] = deadbeef;
30391117bc5SPaul Brook     }
30491117bc5SPaul Brook }
30591117bc5SPaul Brook 
30691117bc5SPaul Brook int main(int argc, char *argv[])
30791117bc5SPaul Brook {
3080339ddfaSPaolo Bonzini     int i;
3090339ddfaSPaolo Bonzini 
31091117bc5SPaul Brook     init_all(&initI);
3110339ddfaSPaolo Bonzini     init_intreg(&initI.ymm[10]);
3120339ddfaSPaolo Bonzini     init_intreg(&initI.ymm[11]);
3130339ddfaSPaolo Bonzini     init_intreg(&initI.ymm[12]);
31491117bc5SPaul Brook     init_intreg(&initI.mem0[1]);
31591117bc5SPaul Brook     printf("Int:\n");
31691117bc5SPaul Brook     dump_regs(&initI);
31791117bc5SPaul Brook 
31891117bc5SPaul Brook     init_all(&initF32);
3190339ddfaSPaolo Bonzini     init_f32reg(&initF32.ymm[10]);
3200339ddfaSPaolo Bonzini     init_f32reg(&initF32.ymm[11]);
3210339ddfaSPaolo Bonzini     init_f32reg(&initF32.ymm[12]);
32291117bc5SPaul Brook     init_f32reg(&initF32.mem0[1]);
32391117bc5SPaul Brook     initF32.ff = 32;
32491117bc5SPaul Brook     printf("F32:\n");
32591117bc5SPaul Brook     dump_regs(&initF32);
32691117bc5SPaul Brook 
32791117bc5SPaul Brook     init_all(&initF64);
3280339ddfaSPaolo Bonzini     init_f64reg(&initF64.ymm[10]);
3290339ddfaSPaolo Bonzini     init_f64reg(&initF64.ymm[11]);
3300339ddfaSPaolo Bonzini     init_f64reg(&initF64.ymm[12]);
33191117bc5SPaul Brook     init_f64reg(&initF64.mem0[1]);
33291117bc5SPaul Brook     initF64.ff = 64;
33391117bc5SPaul Brook     printf("F64:\n");
33491117bc5SPaul Brook     dump_regs(&initF64);
33591117bc5SPaul Brook 
3360339ddfaSPaolo Bonzini     for (i = 0; i < ARRAY_LEN(gather_mem); i++) {
3370339ddfaSPaolo Bonzini         init_intreg(&gather_mem[i]);
3380339ddfaSPaolo Bonzini     }
3390339ddfaSPaolo Bonzini 
34091117bc5SPaul Brook     if (argc > 1) {
34191117bc5SPaul Brook         int n = atoi(argv[1]);
34291117bc5SPaul Brook         run_test(&test_table[n]);
34391117bc5SPaul Brook     } else {
34491117bc5SPaul Brook         run_all();
34591117bc5SPaul Brook     }
34691117bc5SPaul Brook     return 0;
34791117bc5SPaul Brook }
348