1; RUN: opt < %s -basicaa -slp-vectorizer -S | FileCheck %s
2
3; Check propagation of optional IR flags (PR20802). For a flag to
4; propagate from scalar instructions to their vector replacement,
5; *all* scalar instructions must have the flag.
6
7target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
8target triple = "x86_64-unknown-unknown"
9
10; CHECK-LABEL: @exact(
11; CHECK: lshr exact <4 x i32>
12define void @exact(i32* %x) {
13  %idx1 = getelementptr inbounds i32* %x, i64 0
14  %idx2 = getelementptr inbounds i32* %x, i64 1
15  %idx3 = getelementptr inbounds i32* %x, i64 2
16  %idx4 = getelementptr inbounds i32* %x, i64 3
17
18  %load1 = load i32* %idx1, align 4
19  %load2 = load i32* %idx2, align 4
20  %load3 = load i32* %idx3, align 4
21  %load4 = load i32* %idx4, align 4
22
23  %op1 = lshr exact i32 %load1, 1
24  %op2 = lshr exact i32 %load2, 1
25  %op3 = lshr exact i32 %load3, 1
26  %op4 = lshr exact i32 %load4, 1
27
28  store i32 %op1, i32* %idx1, align 4
29  store i32 %op2, i32* %idx2, align 4
30  store i32 %op3, i32* %idx3, align 4
31  store i32 %op4, i32* %idx4, align 4
32
33  ret void
34}
35
36; CHECK-LABEL: @not_exact(
37; CHECK: lshr <4 x i32>
38define void @not_exact(i32* %x) {
39  %idx1 = getelementptr inbounds i32* %x, i64 0
40  %idx2 = getelementptr inbounds i32* %x, i64 1
41  %idx3 = getelementptr inbounds i32* %x, i64 2
42  %idx4 = getelementptr inbounds i32* %x, i64 3
43
44  %load1 = load i32* %idx1, align 4
45  %load2 = load i32* %idx2, align 4
46  %load3 = load i32* %idx3, align 4
47  %load4 = load i32* %idx4, align 4
48
49  %op1 = lshr exact i32 %load1, 1
50  %op2 = lshr i32 %load2, 1
51  %op3 = lshr exact i32 %load3, 1
52  %op4 = lshr exact i32 %load4, 1
53
54  store i32 %op1, i32* %idx1, align 4
55  store i32 %op2, i32* %idx2, align 4
56  store i32 %op3, i32* %idx3, align 4
57  store i32 %op4, i32* %idx4, align 4
58
59  ret void
60}
61
62; CHECK-LABEL: @nsw(
63; CHECK: add nsw <4 x i32>
64define void @nsw(i32* %x) {
65  %idx1 = getelementptr inbounds i32* %x, i64 0
66  %idx2 = getelementptr inbounds i32* %x, i64 1
67  %idx3 = getelementptr inbounds i32* %x, i64 2
68  %idx4 = getelementptr inbounds i32* %x, i64 3
69
70  %load1 = load i32* %idx1, align 4
71  %load2 = load i32* %idx2, align 4
72  %load3 = load i32* %idx3, align 4
73  %load4 = load i32* %idx4, align 4
74
75  %op1 = add nsw i32 %load1, 1
76  %op2 = add nsw i32 %load2, 1
77  %op3 = add nsw i32 %load3, 1
78  %op4 = add nsw i32 %load4, 1
79
80  store i32 %op1, i32* %idx1, align 4
81  store i32 %op2, i32* %idx2, align 4
82  store i32 %op3, i32* %idx3, align 4
83  store i32 %op4, i32* %idx4, align 4
84
85  ret void
86}
87
88; CHECK-LABEL: @not_nsw(
89; CHECK: add <4 x i32>
90define void @not_nsw(i32* %x) {
91  %idx1 = getelementptr inbounds i32* %x, i64 0
92  %idx2 = getelementptr inbounds i32* %x, i64 1
93  %idx3 = getelementptr inbounds i32* %x, i64 2
94  %idx4 = getelementptr inbounds i32* %x, i64 3
95
96  %load1 = load i32* %idx1, align 4
97  %load2 = load i32* %idx2, align 4
98  %load3 = load i32* %idx3, align 4
99  %load4 = load i32* %idx4, align 4
100
101  %op1 = add nsw i32 %load1, 1
102  %op2 = add nsw i32 %load2, 1
103  %op3 = add nsw i32 %load3, 1
104  %op4 = add i32 %load4, 1
105
106  store i32 %op1, i32* %idx1, align 4
107  store i32 %op2, i32* %idx2, align 4
108  store i32 %op3, i32* %idx3, align 4
109  store i32 %op4, i32* %idx4, align 4
110
111  ret void
112}
113
114; CHECK-LABEL: @nuw(
115; CHECK: add nuw <4 x i32>
116define void @nuw(i32* %x) {
117  %idx1 = getelementptr inbounds i32* %x, i64 0
118  %idx2 = getelementptr inbounds i32* %x, i64 1
119  %idx3 = getelementptr inbounds i32* %x, i64 2
120  %idx4 = getelementptr inbounds i32* %x, i64 3
121
122  %load1 = load i32* %idx1, align 4
123  %load2 = load i32* %idx2, align 4
124  %load3 = load i32* %idx3, align 4
125  %load4 = load i32* %idx4, align 4
126
127  %op1 = add nuw i32 %load1, 1
128  %op2 = add nuw i32 %load2, 1
129  %op3 = add nuw i32 %load3, 1
130  %op4 = add nuw i32 %load4, 1
131
132  store i32 %op1, i32* %idx1, align 4
133  store i32 %op2, i32* %idx2, align 4
134  store i32 %op3, i32* %idx3, align 4
135  store i32 %op4, i32* %idx4, align 4
136
137  ret void
138}
139
140; CHECK-LABEL: @not_nuw(
141; CHECK: add <4 x i32>
142define void @not_nuw(i32* %x) {
143  %idx1 = getelementptr inbounds i32* %x, i64 0
144  %idx2 = getelementptr inbounds i32* %x, i64 1
145  %idx3 = getelementptr inbounds i32* %x, i64 2
146  %idx4 = getelementptr inbounds i32* %x, i64 3
147
148  %load1 = load i32* %idx1, align 4
149  %load2 = load i32* %idx2, align 4
150  %load3 = load i32* %idx3, align 4
151  %load4 = load i32* %idx4, align 4
152
153  %op1 = add nuw i32 %load1, 1
154  %op2 = add i32 %load2, 1
155  %op3 = add i32 %load3, 1
156  %op4 = add nuw i32 %load4, 1
157
158  store i32 %op1, i32* %idx1, align 4
159  store i32 %op2, i32* %idx2, align 4
160  store i32 %op3, i32* %idx3, align 4
161  store i32 %op4, i32* %idx4, align 4
162
163  ret void
164}
165
166; CHECK-LABEL: @nnan(
167; CHECK: fadd nnan <4 x float>
168define void @nnan(float* %x) {
169  %idx1 = getelementptr inbounds float* %x, i64 0
170  %idx2 = getelementptr inbounds float* %x, i64 1
171  %idx3 = getelementptr inbounds float* %x, i64 2
172  %idx4 = getelementptr inbounds float* %x, i64 3
173
174  %load1 = load float* %idx1, align 4
175  %load2 = load float* %idx2, align 4
176  %load3 = load float* %idx3, align 4
177  %load4 = load float* %idx4, align 4
178
179  %op1 = fadd fast nnan float %load1, 1.0
180  %op2 = fadd nnan ninf float %load2, 1.0
181  %op3 = fadd nsz nnan float %load3, 1.0
182  %op4 = fadd arcp nnan float %load4, 1.0
183
184  store float %op1, float* %idx1, align 4
185  store float %op2, float* %idx2, align 4
186  store float %op3, float* %idx3, align 4
187  store float %op4, float* %idx4, align 4
188
189  ret void
190}
191
192; CHECK-LABEL: @not_nnan(
193; CHECK: fadd <4 x float>
194define void @not_nnan(float* %x) {
195  %idx1 = getelementptr inbounds float* %x, i64 0
196  %idx2 = getelementptr inbounds float* %x, i64 1
197  %idx3 = getelementptr inbounds float* %x, i64 2
198  %idx4 = getelementptr inbounds float* %x, i64 3
199
200  %load1 = load float* %idx1, align 4
201  %load2 = load float* %idx2, align 4
202  %load3 = load float* %idx3, align 4
203  %load4 = load float* %idx4, align 4
204
205  %op1 = fadd nnan float %load1, 1.0
206  %op2 = fadd ninf float %load2, 1.0
207  %op3 = fadd nsz float %load3, 1.0
208  %op4 = fadd arcp float %load4, 1.0
209
210  store float %op1, float* %idx1, align 4
211  store float %op2, float* %idx2, align 4
212  store float %op3, float* %idx3, align 4
213  store float %op4, float* %idx4, align 4
214
215  ret void
216}
217
218; CHECK-LABEL: @only_fast(
219; CHECK: fadd fast <4 x float>
220define void @only_fast(float* %x) {
221  %idx1 = getelementptr inbounds float* %x, i64 0
222  %idx2 = getelementptr inbounds float* %x, i64 1
223  %idx3 = getelementptr inbounds float* %x, i64 2
224  %idx4 = getelementptr inbounds float* %x, i64 3
225
226  %load1 = load float* %idx1, align 4
227  %load2 = load float* %idx2, align 4
228  %load3 = load float* %idx3, align 4
229  %load4 = load float* %idx4, align 4
230
231  %op1 = fadd fast nnan float %load1, 1.0
232  %op2 = fadd fast nnan ninf float %load2, 1.0
233  %op3 = fadd fast nsz nnan float %load3, 1.0
234  %op4 = fadd arcp nnan fast float %load4, 1.0
235
236  store float %op1, float* %idx1, align 4
237  store float %op2, float* %idx2, align 4
238  store float %op3, float* %idx3, align 4
239  store float %op4, float* %idx4, align 4
240
241  ret void
242}
243
244; CHECK-LABEL: @only_arcp(
245; CHECK: fadd arcp <4 x float>
246define void @only_arcp(float* %x) {
247  %idx1 = getelementptr inbounds float* %x, i64 0
248  %idx2 = getelementptr inbounds float* %x, i64 1
249  %idx3 = getelementptr inbounds float* %x, i64 2
250  %idx4 = getelementptr inbounds float* %x, i64 3
251
252  %load1 = load float* %idx1, align 4
253  %load2 = load float* %idx2, align 4
254  %load3 = load float* %idx3, align 4
255  %load4 = load float* %idx4, align 4
256
257  %op1 = fadd fast float %load1, 1.0
258  %op2 = fadd fast float %load2, 1.0
259  %op3 = fadd fast float %load3, 1.0
260  %op4 = fadd arcp float %load4, 1.0
261
262  store float %op1, float* %idx1, align 4
263  store float %op2, float* %idx2, align 4
264  store float %op3, float* %idx3, align 4
265  store float %op4, float* %idx4, align 4
266
267  ret void
268}
269
270; CHECK-LABEL: @addsub_all_nsw
271; CHECK: add nsw <4 x i32>
272; CHECK: sub nsw <4 x i32>
273define void @addsub_all_nsw(i32* %x) {
274  %idx1 = getelementptr inbounds i32* %x, i64 0
275  %idx2 = getelementptr inbounds i32* %x, i64 1
276  %idx3 = getelementptr inbounds i32* %x, i64 2
277  %idx4 = getelementptr inbounds i32* %x, i64 3
278
279  %load1 = load i32* %idx1, align 4
280  %load2 = load i32* %idx2, align 4
281  %load3 = load i32* %idx3, align 4
282  %load4 = load i32* %idx4, align 4
283
284  %op1 = add nsw i32 %load1, 1
285  %op2 = sub nsw i32 %load2, 1
286  %op3 = add nsw i32 %load3, 1
287  %op4 = sub nsw i32 %load4, 1
288
289  store i32 %op1, i32* %idx1, align 4
290  store i32 %op2, i32* %idx2, align 4
291  store i32 %op3, i32* %idx3, align 4
292  store i32 %op4, i32* %idx4, align 4
293
294  ret void
295}
296
297; CHECK-LABEL: @addsub_some_nsw
298; CHECK: add nsw <4 x i32>
299; CHECK: sub <4 x i32>
300define void @addsub_some_nsw(i32* %x) {
301  %idx1 = getelementptr inbounds i32* %x, i64 0
302  %idx2 = getelementptr inbounds i32* %x, i64 1
303  %idx3 = getelementptr inbounds i32* %x, i64 2
304  %idx4 = getelementptr inbounds i32* %x, i64 3
305
306  %load1 = load i32* %idx1, align 4
307  %load2 = load i32* %idx2, align 4
308  %load3 = load i32* %idx3, align 4
309  %load4 = load i32* %idx4, align 4
310
311  %op1 = add nsw i32 %load1, 1
312  %op2 = sub nsw i32 %load2, 1
313  %op3 = add nsw i32 %load3, 1
314  %op4 = sub i32 %load4, 1
315
316  store i32 %op1, i32* %idx1, align 4
317  store i32 %op2, i32* %idx2, align 4
318  store i32 %op3, i32* %idx3, align 4
319  store i32 %op4, i32* %idx4, align 4
320
321  ret void
322}
323
324; CHECK-LABEL: @addsub_no_nsw
325; CHECK: add <4 x i32>
326; CHECK: sub <4 x i32>
327define void @addsub_no_nsw(i32* %x) {
328  %idx1 = getelementptr inbounds i32* %x, i64 0
329  %idx2 = getelementptr inbounds i32* %x, i64 1
330  %idx3 = getelementptr inbounds i32* %x, i64 2
331  %idx4 = getelementptr inbounds i32* %x, i64 3
332
333  %load1 = load i32* %idx1, align 4
334  %load2 = load i32* %idx2, align 4
335  %load3 = load i32* %idx3, align 4
336  %load4 = load i32* %idx4, align 4
337
338  %op1 = add i32 %load1, 1
339  %op2 = sub nsw i32 %load2, 1
340  %op3 = add nsw i32 %load3, 1
341  %op4 = sub i32 %load4, 1
342
343  store i32 %op1, i32* %idx1, align 4
344  store i32 %op2, i32* %idx2, align 4
345  store i32 %op3, i32* %idx3, align 4
346  store i32 %op4, i32* %idx4, align 4
347
348  ret void
349}
350
351