1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -S -early-cse < %s | FileCheck %s
3
4; Unequal mask check.
5
6; Load-load: the second load can be removed if (assuming unequal masks) the
7; second loaded value is a subset of the first loaded value considering the
8; non-undef vector elements. In other words, if the second mask is a submask
9; of the first one, and the through value of the second load is undef.
10
11; Load-load, second mask is a submask of the first, second through is undef.
12; Expect the second load to be removed.
13define <4 x i32> @f3(<4 x i32>* %a0, <4 x i32> %a1) {
14; CHECK-LABEL: @f3(
15; CHECK-NEXT:    [[V0:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[A0:%.*]], i32 4, <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i32> [[A1:%.*]])
16; CHECK-NEXT:    [[V2:%.*]] = add <4 x i32> [[V0]], [[V0]]
17; CHECK-NEXT:    ret <4 x i32> [[V2]]
18;
19  %v0 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %a0, i32 4, <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i32> %a1)
20  %v1 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %a0, i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> undef)
21  %v2 = add <4 x i32> %v0, %v1
22  ret <4 x i32> %v2
23}
24
25; Load-load, second mask is a submask of the first, second through is not undef.
26; Expect the second load to remain.
27define <4 x i32> @f4(<4 x i32>* %a0, <4 x i32> %a1) {
28; CHECK-LABEL: @f4(
29; CHECK-NEXT:    [[V0:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[A0:%.*]], i32 4, <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i32> [[A1:%.*]])
30; CHECK-NEXT:    [[V1:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[A0]], i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> zeroinitializer)
31; CHECK-NEXT:    [[V2:%.*]] = add <4 x i32> [[V0]], [[V1]]
32; CHECK-NEXT:    ret <4 x i32> [[V2]]
33;
34  %v0 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %a0, i32 4, <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i32> %a1)
35  %v1 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %a0, i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> zeroinitializer)
36  %v2 = add <4 x i32> %v0, %v1
37  ret <4 x i32> %v2
38}
39
40; Load-load, second mask is not a submask of the first, second through is undef.
41; Expect the second load to remain.
42define <4 x i32> @f5(<4 x i32>* %a0, <4 x i32> %a1) {
43; CHECK-LABEL: @f5(
44; CHECK-NEXT:    [[V0:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[A0:%.*]], i32 4, <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i32> [[A1:%.*]])
45; CHECK-NEXT:    [[V1:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[A0]], i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> zeroinitializer)
46; CHECK-NEXT:    [[V2:%.*]] = add <4 x i32> [[V0]], [[V1]]
47; CHECK-NEXT:    ret <4 x i32> [[V2]]
48;
49  %v0 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %a0, i32 4, <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i32> %a1)
50  %v1 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %a0, i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> zeroinitializer)
51  %v2 = add <4 x i32> %v0, %v1
52  ret <4 x i32> %v2
53}
54
55; Store-store: the first store can be removed if the first; mask is a submask
56; of the second mask.
57
58; Store-store, first mask is a submask of the second.
59; Expect the first store to be removed.
60define void @f6(<4 x i32> %a0, <4 x i32>* %a1) {
61; CHECK-LABEL: @f6(
62; CHECK-NEXT:    call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> [[A0:%.*]], <4 x i32>* [[A1:%.*]], i32 4, <4 x i1> <i1 true, i1 true, i1 false, i1 true>)
63; CHECK-NEXT:    ret void
64;
65  call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %a0, <4 x i32>* %a1, i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 true>)
66  call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %a0, <4 x i32>* %a1, i32 4, <4 x i1> <i1 true, i1 true, i1 false, i1 true>)
67  ret void
68}
69
70; Store-store, first mask is not a submask of the second.
71; Expect both stores to remain.
72define void @f7(<4 x i32> %a0, <4 x i32>* %a1) {
73; CHECK-LABEL: @f7(
74; CHECK-NEXT:    call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> [[A0:%.*]], <4 x i32>* [[A1:%.*]], i32 4, <4 x i1> <i1 true, i1 true, i1 false, i1 true>)
75; CHECK-NEXT:    call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> [[A0]], <4 x i32>* [[A1]], i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 true>)
76; CHECK-NEXT:    ret void
77;
78  call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %a0, <4 x i32>* %a1, i32 4, <4 x i1> <i1 true, i1 true, i1 false, i1 true>)
79  call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %a0, <4 x i32>* %a1, i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 true>)
80  ret void
81}
82
83; Load-store: the store can be removed if the store's mask is a submask of the
84; load's mask.
85
86; Load-store, second mask is a submask of the first.
87; Expect the store to be removed.
88define <4 x i32> @f8(<4 x i32>* %a0, <4 x i32> %a1) {
89; CHECK-LABEL: @f8(
90; CHECK-NEXT:    [[V0:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[A0:%.*]], i32 4, <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i32> [[A1:%.*]])
91; CHECK-NEXT:    ret <4 x i32> [[V0]]
92;
93  %v0 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %a0, i32 4, <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i32> %a1)
94  call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %v0, <4 x i32>* %a0, i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 true>)
95  ret <4 x i32> %v0
96}
97
98; Load-store, second mask is not a submask of the first.
99; Expect the store to remain.
100define <4 x i32> @f9(<4 x i32>* %a0, <4 x i32> %a1) {
101; CHECK-LABEL: @f9(
102; CHECK-NEXT:    [[V0:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[A0:%.*]], i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> [[A1:%.*]])
103; CHECK-NEXT:    call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> [[V0]], <4 x i32>* [[A0]], i32 4, <4 x i1> <i1 true, i1 true, i1 false, i1 true>)
104; CHECK-NEXT:    ret <4 x i32> [[V0]]
105;
106  %v0 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %a0, i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> %a1)
107  call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %v0, <4 x i32>* %a0, i32 4, <4 x i1> <i1 true, i1 true, i1 false, i1 true>)
108  ret <4 x i32> %v0
109}
110
111; Store-load: the load can be removed if load's mask is a submask of the
112; store's mask, and the load's through value is undef.
113
114; Store-load, load's mask is a submask of store's mask, thru is undef.
115; Expect the load to be removed.
116define <4 x i32> @fa(<4 x i32> %a0, <4 x i32>* %a1) {
117; CHECK-LABEL: @fa(
118; CHECK-NEXT:    call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> [[A0:%.*]], <4 x i32>* [[A1:%.*]], i32 4, <4 x i1> <i1 true, i1 true, i1 false, i1 true>)
119; CHECK-NEXT:    ret <4 x i32> [[A0]]
120;
121  call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %a0, <4 x i32>* %a1, i32 4, <4 x i1> <i1 true, i1 true, i1 false, i1 true>)
122  %v0 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %a1, i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> undef)
123  ret <4 x i32> %v0
124}
125
126; Store-load, load's mask is a submask of store's mask, thru is not undef.
127; Expect the load to remain.
128define <4 x i32> @fb(<4 x i32> %a0, <4 x i32>* %a1) {
129; CHECK-LABEL: @fb(
130; CHECK-NEXT:    call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> [[A0:%.*]], <4 x i32>* [[A1:%.*]], i32 4, <4 x i1> <i1 true, i1 true, i1 false, i1 true>)
131; CHECK-NEXT:    [[V0:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[A1]], i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> zeroinitializer)
132; CHECK-NEXT:    ret <4 x i32> [[V0]]
133;
134  call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %a0, <4 x i32>* %a1, i32 4, <4 x i1> <i1 true, i1 true, i1 false, i1 true>)
135  %v0 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %a1, i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> zeroinitializer)
136  ret <4 x i32> %v0
137}
138
139; Store-load, load's mask is not a submask of store's mask, thru is undef.
140; Expect the load to remain.
141define <4 x i32> @fc(<4 x i32> %a0, <4 x i32>* %a1) {
142; CHECK-LABEL: @fc(
143; CHECK-NEXT:    call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> [[A0:%.*]], <4 x i32>* [[A1:%.*]], i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 true>)
144; CHECK-NEXT:    [[V0:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[A1]], i32 4, <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i32> undef)
145; CHECK-NEXT:    ret <4 x i32> [[V0]]
146;
147  call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %a0, <4 x i32>* %a1, i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 true>)
148  %v0 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %a1, i32 4, <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i32> undef)
149  ret <4 x i32> %v0
150}
151
152declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32, <4 x i1>, <4 x i32>)
153declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32, <4 x i1>)
154