1; RUN: llc -O3 -disable-peephole -mcpu=corei7-avx -mattr=+avx < %s | FileCheck %s
2
3target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
4target triple = "x86_64-unknown-unknown"
5
6; Stack reload folding tests - we use the 'big vectors' pattern to guarantee spilling to stack.
7;
8; Many of these tests are primarily to check memory folding with specific instructions. Using a basic
9; load/cvt/store pattern to test for this would mean that it wouldn't be the memory folding code thats
10; being tested - the load-execute version of the instruction from the tables would be matched instead.
11
12define void @stack_fold_vmulpd(<64 x double>* %a, <64 x double>* %b, <64 x double>* %c) {
13  ;CHECK-LABEL: stack_fold_vmulpd
14  ;CHECK:       vmulpd {{[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
15
16  %1 = load <64 x double>* %a
17  %2 = load <64 x double>* %b
18  %3 = fadd <64 x double> %1, %2
19  %4 = fsub <64 x double> %1, %2
20  %5 = fmul <64 x double> %3, %4
21  store <64 x double> %5, <64 x double>* %c
22  ret void
23}
24
25define void @stack_fold_cvtdq2ps(<128 x i32>* %a, <128 x i32>* %b, <128 x float>* %c) {
26  ;CHECK-LABEL: stack_fold_cvtdq2ps
27  ;CHECK:   vcvtdq2ps {{[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
28
29  %1 = load <128 x i32>* %a
30  %2 = load <128 x i32>* %b
31  %3 = and <128 x i32> %1, %2
32  %4 = xor <128 x i32> %1, %2
33  %5 = sitofp <128 x i32> %3 to <128 x float>
34  %6 = sitofp <128 x i32> %4 to <128 x float>
35  %7 = fadd <128 x float> %5, %6
36  store <128 x float> %7, <128 x float>* %c
37  ret void
38}
39
40define void @stack_fold_cvtpd2ps(<128 x double>* %a, <128 x double>* %b, <128 x float>* %c) {
41  ;CHECK-LABEL: stack_fold_cvtpd2ps
42  ;CHECK:   vcvtpd2psy {{[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
43
44  %1 = load <128 x double>* %a
45  %2 = load <128 x double>* %b
46  %3 = fadd <128 x double> %1, %2
47  %4 = fsub <128 x double> %1, %2
48  %5 = fptrunc <128 x double> %3 to <128 x float>
49  %6 = fptrunc <128 x double> %4 to <128 x float>
50  %7 = fadd <128 x float> %5, %6
51  store <128 x float> %7, <128 x float>* %c
52  ret void
53}
54
55define void @stack_fold_cvttpd2dq(<64 x double>* %a, <64 x double>* %b, <64 x i32>* %c) #0 {
56  ;CHECK-LABEL: stack_fold_cvttpd2dq
57  ;CHECK:  vcvttpd2dqy {{[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
58
59  %1 = load <64 x double>* %a
60  %2 = load <64 x double>* %b
61  %3 = fadd <64 x double> %1, %2
62  %4 = fsub <64 x double> %1, %2
63  %5 = fptosi <64 x double> %3 to <64 x i32>
64  %6 = fptosi <64 x double> %4 to <64 x i32>
65  %7 = or <64 x i32> %5, %6
66  store <64 x i32> %7, <64 x i32>* %c
67  ret void
68}
69
70define void @stack_fold_cvttps2dq(<128 x float>* %a, <128 x float>* %b, <128 x i32>* %c) #0 {
71  ;CHECK-LABEL: stack_fold_cvttps2dq
72  ;CHECK:   vcvttps2dq {{[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
73
74  %1 = load <128 x float>* %a
75  %2 = load <128 x float>* %b
76  %3 = fadd <128 x float> %1, %2
77  %4 = fsub <128 x float> %1, %2
78  %5 = fptosi <128 x float> %3 to <128 x i32>
79  %6 = fptosi <128 x float> %4 to <128 x i32>
80  %7 = or <128 x i32> %5, %6
81  store <128 x i32> %7, <128 x i32>* %c
82  ret void
83}
84