191117bc5SPaul Brook #include <stdio.h> 291117bc5SPaul Brook #include <stdint.h> 391117bc5SPaul Brook #include <stdlib.h> 491117bc5SPaul Brook #include <string.h> 591117bc5SPaul Brook 691117bc5SPaul Brook typedef void (*testfn)(void); 791117bc5SPaul Brook 891117bc5SPaul Brook typedef struct { 90339ddfaSPaolo Bonzini uint64_t q0, q1, q2, q3; 100339ddfaSPaolo Bonzini } __attribute__((aligned(32))) v4di; 1191117bc5SPaul Brook 1291117bc5SPaul Brook typedef struct { 1391117bc5SPaul Brook uint64_t mm[8]; 140339ddfaSPaolo Bonzini v4di ymm[16]; 1591117bc5SPaul Brook uint64_t r[16]; 1691117bc5SPaul Brook uint64_t flags; 1791117bc5SPaul Brook uint32_t ff; 1891117bc5SPaul Brook uint64_t pad; 190339ddfaSPaolo Bonzini v4di mem[4]; 200339ddfaSPaolo Bonzini v4di mem0[4]; 2191117bc5SPaul Brook } reg_state; 2291117bc5SPaul Brook 2391117bc5SPaul Brook typedef struct { 2491117bc5SPaul Brook int n; 2591117bc5SPaul Brook testfn fn; 2691117bc5SPaul Brook const char *s; 2791117bc5SPaul Brook reg_state *init; 2891117bc5SPaul Brook } TestDef; 2991117bc5SPaul Brook 3091117bc5SPaul Brook reg_state initI; 3191117bc5SPaul Brook reg_state initF32; 3291117bc5SPaul Brook reg_state initF64; 3391117bc5SPaul Brook 340339ddfaSPaolo Bonzini static void dump_ymm(const char *name, int n, const v4di *r, int ff) 3591117bc5SPaul Brook { 360339ddfaSPaolo Bonzini printf("%s%d = %016lx %016lx %016lx %016lx\n", 370339ddfaSPaolo Bonzini name, n, r->q3, r->q2, r->q1, r->q0); 3891117bc5SPaul Brook if (ff == 64) { 390339ddfaSPaolo Bonzini double v[4]; 4091117bc5SPaul Brook memcpy(v, r, sizeof(v)); 410339ddfaSPaolo Bonzini printf(" %16g %16g %16g %16g\n", 4291117bc5SPaul Brook v[3], v[2], v[1], v[0]); 430339ddfaSPaolo Bonzini } else if (ff == 32) { 440339ddfaSPaolo Bonzini float v[8]; 450339ddfaSPaolo Bonzini memcpy(v, r, sizeof(v)); 460339ddfaSPaolo Bonzini printf(" %8g %8g %8g %8g %8g %8g %8g %8g\n", 470339ddfaSPaolo Bonzini v[7], v[6], v[5], v[4], v[3], v[2], v[1], v[0]); 4891117bc5SPaul Brook } 4991117bc5SPaul Brook } 5091117bc5SPaul Brook 5191117bc5SPaul Brook static void dump_regs(reg_state *s) 5291117bc5SPaul Brook { 5391117bc5SPaul Brook int i; 5491117bc5SPaul Brook 5591117bc5SPaul Brook for (i = 0; i < 16; i++) { 560339ddfaSPaolo Bonzini dump_ymm("ymm", i, &s->ymm[i], 0); 5791117bc5SPaul Brook } 5891117bc5SPaul Brook for (i = 0; i < 4; i++) { 590339ddfaSPaolo Bonzini dump_ymm("mem", i, &s->mem0[i], 0); 6091117bc5SPaul Brook } 6191117bc5SPaul Brook } 6291117bc5SPaul Brook 6391117bc5SPaul Brook static void compare_state(const reg_state *a, const reg_state *b) 6491117bc5SPaul Brook { 6591117bc5SPaul Brook int i; 6691117bc5SPaul Brook for (i = 0; i < 8; i++) { 6791117bc5SPaul Brook if (a->mm[i] != b->mm[i]) { 6891117bc5SPaul Brook printf("MM%d = %016lx\n", i, b->mm[i]); 6991117bc5SPaul Brook } 7091117bc5SPaul Brook } 7191117bc5SPaul Brook for (i = 0; i < 16; i++) { 7291117bc5SPaul Brook if (a->r[i] != b->r[i]) { 7391117bc5SPaul Brook printf("r%d = %016lx\n", i, b->r[i]); 7491117bc5SPaul Brook } 7591117bc5SPaul Brook } 7691117bc5SPaul Brook for (i = 0; i < 16; i++) { 770339ddfaSPaolo Bonzini if (memcmp(&a->ymm[i], &b->ymm[i], 32)) { 780339ddfaSPaolo Bonzini dump_ymm("ymm", i, &b->ymm[i], a->ff); 7991117bc5SPaul Brook } 8091117bc5SPaul Brook } 8191117bc5SPaul Brook for (i = 0; i < 4; i++) { 820339ddfaSPaolo Bonzini if (memcmp(&a->mem0[i], &a->mem[i], 32)) { 830339ddfaSPaolo Bonzini dump_ymm("mem", i, &a->mem[i], a->ff); 8491117bc5SPaul Brook } 8591117bc5SPaul Brook } 8691117bc5SPaul Brook if (a->flags != b->flags) { 8791117bc5SPaul Brook printf("FLAGS = %016lx\n", b->flags); 8891117bc5SPaul Brook } 8991117bc5SPaul Brook } 9091117bc5SPaul Brook 9191117bc5SPaul Brook #define LOADMM(r, o) "movq " #r ", " #o "[%0]\n\t" 920339ddfaSPaolo Bonzini #define LOADYMM(r, o) "vmovdqa " #r ", " #o "[%0]\n\t" 9391117bc5SPaul Brook #define STOREMM(r, o) "movq " #o "[%1], " #r "\n\t" 940339ddfaSPaolo Bonzini #define STOREYMM(r, o) "vmovdqa " #o "[%1], " #r "\n\t" 9591117bc5SPaul Brook #define MMREG(F) \ 9691117bc5SPaul Brook F(mm0, 0x00) \ 9791117bc5SPaul Brook F(mm1, 0x08) \ 9891117bc5SPaul Brook F(mm2, 0x10) \ 9991117bc5SPaul Brook F(mm3, 0x18) \ 10091117bc5SPaul Brook F(mm4, 0x20) \ 10191117bc5SPaul Brook F(mm5, 0x28) \ 10291117bc5SPaul Brook F(mm6, 0x30) \ 10391117bc5SPaul Brook F(mm7, 0x38) 1040339ddfaSPaolo Bonzini #define YMMREG(F) \ 1050339ddfaSPaolo Bonzini F(ymm0, 0x040) \ 1060339ddfaSPaolo Bonzini F(ymm1, 0x060) \ 1070339ddfaSPaolo Bonzini F(ymm2, 0x080) \ 1080339ddfaSPaolo Bonzini F(ymm3, 0x0a0) \ 1090339ddfaSPaolo Bonzini F(ymm4, 0x0c0) \ 1100339ddfaSPaolo Bonzini F(ymm5, 0x0e0) \ 1110339ddfaSPaolo Bonzini F(ymm6, 0x100) \ 1120339ddfaSPaolo Bonzini F(ymm7, 0x120) \ 1130339ddfaSPaolo Bonzini F(ymm8, 0x140) \ 1140339ddfaSPaolo Bonzini F(ymm9, 0x160) \ 1150339ddfaSPaolo Bonzini F(ymm10, 0x180) \ 1160339ddfaSPaolo Bonzini F(ymm11, 0x1a0) \ 1170339ddfaSPaolo Bonzini F(ymm12, 0x1c0) \ 1180339ddfaSPaolo Bonzini F(ymm13, 0x1e0) \ 1190339ddfaSPaolo Bonzini F(ymm14, 0x200) \ 1200339ddfaSPaolo Bonzini F(ymm15, 0x220) 12191117bc5SPaul Brook #define LOADREG(r, o) "mov " #r ", " #o "[rax]\n\t" 12291117bc5SPaul Brook #define STOREREG(r, o) "mov " #o "[rax], " #r "\n\t" 12391117bc5SPaul Brook #define REG(F) \ 1240339ddfaSPaolo Bonzini F(rbx, 0x248) \ 1250339ddfaSPaolo Bonzini F(rcx, 0x250) \ 1260339ddfaSPaolo Bonzini F(rdx, 0x258) \ 1270339ddfaSPaolo Bonzini F(rsi, 0x260) \ 1280339ddfaSPaolo Bonzini F(rdi, 0x268) \ 1290339ddfaSPaolo Bonzini F(r8, 0x280) \ 1300339ddfaSPaolo Bonzini F(r9, 0x288) \ 1310339ddfaSPaolo Bonzini F(r10, 0x290) \ 1320339ddfaSPaolo Bonzini F(r11, 0x298) \ 1330339ddfaSPaolo Bonzini F(r12, 0x2a0) \ 1340339ddfaSPaolo Bonzini F(r13, 0x2a8) \ 1350339ddfaSPaolo Bonzini F(r14, 0x2b0) \ 1360339ddfaSPaolo Bonzini F(r15, 0x2b8) \ 13791117bc5SPaul Brook 13891117bc5SPaul Brook static void run_test(const TestDef *t) 13991117bc5SPaul Brook { 14091117bc5SPaul Brook reg_state result; 14191117bc5SPaul Brook reg_state *init = t->init; 14291117bc5SPaul Brook memcpy(init->mem, init->mem0, sizeof(init->mem)); 14391117bc5SPaul Brook printf("%5d %s\n", t->n, t->s); 14491117bc5SPaul Brook asm volatile( 14591117bc5SPaul Brook MMREG(LOADMM) 1460339ddfaSPaolo Bonzini YMMREG(LOADYMM) 14791117bc5SPaul Brook "sub rsp, 128\n\t" 14891117bc5SPaul Brook "push rax\n\t" 14991117bc5SPaul Brook "push rbx\n\t" 15091117bc5SPaul Brook "push rcx\n\t" 15191117bc5SPaul Brook "push rdx\n\t" 15291117bc5SPaul Brook "push %1\n\t" 15391117bc5SPaul Brook "push %2\n\t" 15491117bc5SPaul Brook "mov rax, %0\n\t" 15591117bc5SPaul Brook "pushf\n\t" 15691117bc5SPaul Brook "pop rbx\n\t" 15791117bc5SPaul Brook "shr rbx, 8\n\t" 15891117bc5SPaul Brook "shl rbx, 8\n\t" 1590339ddfaSPaolo Bonzini "mov rcx, 0x2c0[rax]\n\t" 16091117bc5SPaul Brook "and rcx, 0xff\n\t" 16191117bc5SPaul Brook "or rbx, rcx\n\t" 16291117bc5SPaul Brook "push rbx\n\t" 16391117bc5SPaul Brook "popf\n\t" 16491117bc5SPaul Brook REG(LOADREG) 1650339ddfaSPaolo Bonzini "mov rax, 0x240[rax]\n\t" 16691117bc5SPaul Brook "call [rsp]\n\t" 16791117bc5SPaul Brook "mov [rsp], rax\n\t" 16891117bc5SPaul Brook "mov rax, 8[rsp]\n\t" 16991117bc5SPaul Brook REG(STOREREG) 17091117bc5SPaul Brook "mov rbx, [rsp]\n\t" 1710339ddfaSPaolo Bonzini "mov 0x240[rax], rbx\n\t" 17291117bc5SPaul Brook "mov rbx, 0\n\t" 1730339ddfaSPaolo Bonzini "mov 0x270[rax], rbx\n\t" 1740339ddfaSPaolo Bonzini "mov 0x278[rax], rbx\n\t" 17591117bc5SPaul Brook "pushf\n\t" 17691117bc5SPaul Brook "pop rbx\n\t" 17791117bc5SPaul Brook "and rbx, 0xff\n\t" 1780339ddfaSPaolo Bonzini "mov 0x2c0[rax], rbx\n\t" 17991117bc5SPaul Brook "add rsp, 16\n\t" 18091117bc5SPaul Brook "pop rdx\n\t" 18191117bc5SPaul Brook "pop rcx\n\t" 18291117bc5SPaul Brook "pop rbx\n\t" 18391117bc5SPaul Brook "pop rax\n\t" 18491117bc5SPaul Brook "add rsp, 128\n\t" 18591117bc5SPaul Brook MMREG(STOREMM) 1860339ddfaSPaolo Bonzini YMMREG(STOREYMM) 18791117bc5SPaul Brook : : "r"(init), "r"(&result), "r"(t->fn) 18891117bc5SPaul Brook : "memory", "cc", 18991117bc5SPaul Brook "rsi", "rdi", 19091117bc5SPaul Brook "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", 19191117bc5SPaul Brook "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7", 1920339ddfaSPaolo Bonzini "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", 1930339ddfaSPaolo Bonzini "ymm6", "ymm7", "ymm8", "ymm9", "ymm10", "ymm11", 1940339ddfaSPaolo Bonzini "ymm12", "ymm13", "ymm14", "ymm15" 19591117bc5SPaul Brook ); 19691117bc5SPaul Brook compare_state(init, &result); 19791117bc5SPaul Brook } 19891117bc5SPaul Brook 19991117bc5SPaul Brook #define TEST(n, cmd, type) \ 20091117bc5SPaul Brook static void __attribute__((naked)) test_##n(void) \ 20191117bc5SPaul Brook { \ 20291117bc5SPaul Brook asm volatile(cmd); \ 20391117bc5SPaul Brook asm volatile("ret"); \ 20491117bc5SPaul Brook } 20591117bc5SPaul Brook #include "test-avx.h" 20691117bc5SPaul Brook 20791117bc5SPaul Brook 20891117bc5SPaul Brook static const TestDef test_table[] = { 20991117bc5SPaul Brook #define TEST(n, cmd, type) {n, test_##n, cmd, &init##type}, 21091117bc5SPaul Brook #include "test-avx.h" 21191117bc5SPaul Brook {-1, NULL, "", NULL} 21291117bc5SPaul Brook }; 21391117bc5SPaul Brook 21491117bc5SPaul Brook static void run_all(void) 21591117bc5SPaul Brook { 21691117bc5SPaul Brook const TestDef *t; 21791117bc5SPaul Brook for (t = test_table; t->fn; t++) { 21891117bc5SPaul Brook run_test(t); 21991117bc5SPaul Brook } 22091117bc5SPaul Brook } 22191117bc5SPaul Brook 22291117bc5SPaul Brook #define ARRAY_LEN(x) (sizeof(x) / sizeof(x[0])) 22391117bc5SPaul Brook 22491117bc5SPaul Brook float val_f32[] = {2.0, -1.0, 4.8, 0.8, 3, -42.0, 5e6, 7.5, 8.3}; 22591117bc5SPaul Brook double val_f64[] = {2.0, -1.0, 4.8, 0.8, 3, -42.0, 5e6, 7.5}; 2260339ddfaSPaolo Bonzini v4di val_i64[] = { 2270339ddfaSPaolo Bonzini {0x3d6b3b6a9e4118f2lu, 0x355ae76d2774d78clu, 2280339ddfaSPaolo Bonzini 0xac3ff76c4daa4b28lu, 0xe7fabd204cb54083lu}, 2290339ddfaSPaolo Bonzini {0xd851c54a56bf1f29lu, 0x4a84d1d50bf4c4fflu, 2300339ddfaSPaolo Bonzini 0x56621e553d52b56clu, 0xd0069553da8f584alu}, 2310339ddfaSPaolo Bonzini {0x5826475e2c5fd799lu, 0xfd32edc01243f5e9lu, 2320339ddfaSPaolo Bonzini 0x738ba2c66d3fe126lu, 0x5707219c6e6c26b4lu}, 23391117bc5SPaul Brook }; 23491117bc5SPaul Brook 2350339ddfaSPaolo Bonzini v4di deadbeef = {0xa5a5a5a5deadbeefull, 0xa5a5a5a5deadbeefull, 2360339ddfaSPaolo Bonzini 0xa5a5a5a5deadbeefull, 0xa5a5a5a5deadbeefull}; 2370339ddfaSPaolo Bonzini v4di indexq = {0x000000000000001full, 0x000000000000008full, 2380339ddfaSPaolo Bonzini 0xffffffffffffffffull, 0xffffffffffffff5full}; 2390339ddfaSPaolo Bonzini v4di indexd = {0x00000002000000efull, 0xfffffff500000010ull, 2400339ddfaSPaolo Bonzini 0x0000000afffffff0ull, 0x000000000000000eull}; 24191117bc5SPaul Brook 2420339ddfaSPaolo Bonzini v4di gather_mem[0x20]; 2430339ddfaSPaolo Bonzini 2440339ddfaSPaolo Bonzini void init_f32reg(v4di *r) 24591117bc5SPaul Brook { 24691117bc5SPaul Brook static int n; 2470339ddfaSPaolo Bonzini float v[8]; 24891117bc5SPaul Brook int i; 2490339ddfaSPaolo Bonzini for (i = 0; i < 8; i++) { 25091117bc5SPaul Brook v[i] = val_f32[n++]; 25191117bc5SPaul Brook if (n == ARRAY_LEN(val_f32)) { 25291117bc5SPaul Brook n = 0; 25391117bc5SPaul Brook } 25491117bc5SPaul Brook } 25591117bc5SPaul Brook memcpy(r, v, sizeof(*r)); 25691117bc5SPaul Brook } 25791117bc5SPaul Brook 2580339ddfaSPaolo Bonzini void init_f64reg(v4di *r) 25991117bc5SPaul Brook { 26091117bc5SPaul Brook static int n; 2610339ddfaSPaolo Bonzini double v[4]; 26291117bc5SPaul Brook int i; 2630339ddfaSPaolo Bonzini for (i = 0; i < 4; i++) { 26491117bc5SPaul Brook v[i] = val_f64[n++]; 26591117bc5SPaul Brook if (n == ARRAY_LEN(val_f64)) { 26691117bc5SPaul Brook n = 0; 26791117bc5SPaul Brook } 26891117bc5SPaul Brook } 26991117bc5SPaul Brook memcpy(r, v, sizeof(*r)); 27091117bc5SPaul Brook } 27191117bc5SPaul Brook 2720339ddfaSPaolo Bonzini void init_intreg(v4di *r) 27391117bc5SPaul Brook { 27491117bc5SPaul Brook static uint64_t mask; 27591117bc5SPaul Brook static int n; 27691117bc5SPaul Brook 27791117bc5SPaul Brook r->q0 = val_i64[n].q0 ^ mask; 27891117bc5SPaul Brook r->q1 = val_i64[n].q1 ^ mask; 2790339ddfaSPaolo Bonzini r->q2 = val_i64[n].q2 ^ mask; 2800339ddfaSPaolo Bonzini r->q3 = val_i64[n].q3 ^ mask; 28191117bc5SPaul Brook n++; 28291117bc5SPaul Brook if (n == ARRAY_LEN(val_i64)) { 28391117bc5SPaul Brook n = 0; 28491117bc5SPaul Brook mask *= 0x104C11DB7; 28591117bc5SPaul Brook } 28691117bc5SPaul Brook } 28791117bc5SPaul Brook 28891117bc5SPaul Brook static void init_all(reg_state *s) 28991117bc5SPaul Brook { 29091117bc5SPaul Brook int i; 29191117bc5SPaul Brook 29291117bc5SPaul Brook s->r[3] = (uint64_t)&s->mem[0]; /* rdx */ 2930339ddfaSPaolo Bonzini s->r[4] = (uint64_t)&gather_mem[ARRAY_LEN(gather_mem) / 2]; /* rsi */ 29491117bc5SPaul Brook s->r[5] = (uint64_t)&s->mem[2]; /* rdi */ 29591117bc5SPaul Brook s->flags = 2; 2960339ddfaSPaolo Bonzini for (i = 0; i < 16; i++) { 2970339ddfaSPaolo Bonzini s->ymm[i] = deadbeef; 29891117bc5SPaul Brook } 2990339ddfaSPaolo Bonzini s->ymm[13] = indexd; 3000339ddfaSPaolo Bonzini s->ymm[14] = indexq; 3010339ddfaSPaolo Bonzini for (i = 0; i < 4; i++) { 30291117bc5SPaul Brook s->mem0[i] = deadbeef; 30391117bc5SPaul Brook } 30491117bc5SPaul Brook } 30591117bc5SPaul Brook 30691117bc5SPaul Brook int main(int argc, char *argv[]) 30791117bc5SPaul Brook { 3080339ddfaSPaolo Bonzini int i; 3090339ddfaSPaolo Bonzini 31091117bc5SPaul Brook init_all(&initI); 3110339ddfaSPaolo Bonzini init_intreg(&initI.ymm[10]); 3120339ddfaSPaolo Bonzini init_intreg(&initI.ymm[11]); 3130339ddfaSPaolo Bonzini init_intreg(&initI.ymm[12]); 31491117bc5SPaul Brook init_intreg(&initI.mem0[1]); 31591117bc5SPaul Brook printf("Int:\n"); 31691117bc5SPaul Brook dump_regs(&initI); 31791117bc5SPaul Brook 31891117bc5SPaul Brook init_all(&initF32); 3190339ddfaSPaolo Bonzini init_f32reg(&initF32.ymm[10]); 3200339ddfaSPaolo Bonzini init_f32reg(&initF32.ymm[11]); 3210339ddfaSPaolo Bonzini init_f32reg(&initF32.ymm[12]); 32291117bc5SPaul Brook init_f32reg(&initF32.mem0[1]); 32391117bc5SPaul Brook initF32.ff = 32; 32491117bc5SPaul Brook printf("F32:\n"); 32591117bc5SPaul Brook dump_regs(&initF32); 32691117bc5SPaul Brook 32791117bc5SPaul Brook init_all(&initF64); 3280339ddfaSPaolo Bonzini init_f64reg(&initF64.ymm[10]); 3290339ddfaSPaolo Bonzini init_f64reg(&initF64.ymm[11]); 3300339ddfaSPaolo Bonzini init_f64reg(&initF64.ymm[12]); 33191117bc5SPaul Brook init_f64reg(&initF64.mem0[1]); 33291117bc5SPaul Brook initF64.ff = 64; 33391117bc5SPaul Brook printf("F64:\n"); 33491117bc5SPaul Brook dump_regs(&initF64); 33591117bc5SPaul Brook 3360339ddfaSPaolo Bonzini for (i = 0; i < ARRAY_LEN(gather_mem); i++) { 3370339ddfaSPaolo Bonzini init_intreg(&gather_mem[i]); 3380339ddfaSPaolo Bonzini } 3390339ddfaSPaolo Bonzini 34091117bc5SPaul Brook if (argc > 1) { 34191117bc5SPaul Brook int n = atoi(argv[1]); 34291117bc5SPaul Brook run_test(&test_table[n]); 34391117bc5SPaul Brook } else { 34491117bc5SPaul Brook run_all(); 34591117bc5SPaul Brook } 34691117bc5SPaul Brook return 0; 34791117bc5SPaul Brook } 348