1; RUN: llc -O3 -disable-peephole -mcpu=corei7-avx -mattr=+avx < %s | FileCheck %s 2 3target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 4target triple = "x86_64-unknown-unknown" 5 6; Stack reload folding tests - we use the 'big vectors' pattern to guarantee spilling to stack. 7; 8; Many of these tests are primarily to check memory folding with specific instructions. Using a basic 9; load/cvt/store pattern to test for this would mean that it wouldn't be the memory folding code thats 10; being tested - the load-execute version of the instruction from the tables would be matched instead. 11 12define void @stack_fold_vmulpd(<64 x double>* %a, <64 x double>* %b, <64 x double>* %c) { 13 ;CHECK-LABEL: stack_fold_vmulpd 14 ;CHECK: vmulpd {{[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 15 16 %1 = load <64 x double>* %a 17 %2 = load <64 x double>* %b 18 %3 = fadd <64 x double> %1, %2 19 %4 = fsub <64 x double> %1, %2 20 %5 = fmul <64 x double> %3, %4 21 store <64 x double> %5, <64 x double>* %c 22 ret void 23} 24 25define void @stack_fold_cvtdq2ps(<128 x i32>* %a, <128 x i32>* %b, <128 x float>* %c) { 26 ;CHECK-LABEL: stack_fold_cvtdq2ps 27 ;CHECK: vcvtdq2ps {{[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 28 29 %1 = load <128 x i32>* %a 30 %2 = load <128 x i32>* %b 31 %3 = and <128 x i32> %1, %2 32 %4 = xor <128 x i32> %1, %2 33 %5 = sitofp <128 x i32> %3 to <128 x float> 34 %6 = sitofp <128 x i32> %4 to <128 x float> 35 %7 = fadd <128 x float> %5, %6 36 store <128 x float> %7, <128 x float>* %c 37 ret void 38} 39 40define void @stack_fold_cvtpd2ps(<128 x double>* %a, <128 x double>* %b, <128 x float>* %c) { 41 ;CHECK-LABEL: stack_fold_cvtpd2ps 42 ;CHECK: vcvtpd2psy {{[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 43 44 %1 = load <128 x double>* %a 45 %2 = load <128 x double>* %b 46 %3 = fadd <128 x double> %1, %2 47 %4 = fsub <128 x double> %1, %2 48 %5 = fptrunc <128 x double> %3 to <128 x float> 49 %6 = fptrunc <128 x double> %4 to <128 x float> 50 %7 = fadd <128 x float> %5, %6 51 store <128 x float> %7, <128 x float>* %c 52 ret void 53} 54 55define void @stack_fold_cvttpd2dq(<64 x double>* %a, <64 x double>* %b, <64 x i32>* %c) #0 { 56 ;CHECK-LABEL: stack_fold_cvttpd2dq 57 ;CHECK: vcvttpd2dqy {{[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 58 59 %1 = load <64 x double>* %a 60 %2 = load <64 x double>* %b 61 %3 = fadd <64 x double> %1, %2 62 %4 = fsub <64 x double> %1, %2 63 %5 = fptosi <64 x double> %3 to <64 x i32> 64 %6 = fptosi <64 x double> %4 to <64 x i32> 65 %7 = or <64 x i32> %5, %6 66 store <64 x i32> %7, <64 x i32>* %c 67 ret void 68} 69 70define void @stack_fold_cvttps2dq(<128 x float>* %a, <128 x float>* %b, <128 x i32>* %c) #0 { 71 ;CHECK-LABEL: stack_fold_cvttps2dq 72 ;CHECK: vcvttps2dq {{[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 73 74 %1 = load <128 x float>* %a 75 %2 = load <128 x float>* %b 76 %3 = fadd <128 x float> %1, %2 77 %4 = fsub <128 x float> %1, %2 78 %5 = fptosi <128 x float> %3 to <128 x i32> 79 %6 = fptosi <128 x float> %4 to <128 x i32> 80 %7 = or <128 x i32> %5, %6 81 store <128 x i32> %7, <128 x i32>* %c 82 ret void 83} 84