1; RUN: opt < %s -O1 -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
2
3target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
4
5%struct.anon = type { [100 x i32], i32, [100 x i32] }
6%struct.anon.0 = type { [100 x [100 x i32]], i32, [100 x [100 x i32]] }
7
8@Foo = common global %struct.anon zeroinitializer, align 4
9@Bar = common global %struct.anon.0 zeroinitializer, align 4
10
11@PB = external global i32*
12@PA = external global i32*
13
14
15;; === First, the tests that should always vectorize, wither statically or by adding run-time checks ===
16
17
18; /// Different objects, positive induction, constant distance
19; int noAlias01 (int a) {
20;   int i;
21;   for (i=0; i<SIZE; i++)
22;     Foo.A[i] = Foo.B[i] + a;
23;   return Foo.A[a];
24; }
25; CHECK-LABEL: define i32 @noAlias01(
26; CHECK: add nsw <4 x i32>
27; CHECK: ret
28
29define i32 @noAlias01(i32 %a) nounwind {
30entry:
31  %a.addr = alloca i32, align 4
32  %i = alloca i32, align 4
33  store i32 %a, i32* %a.addr, align 4
34  store i32 0, i32* %i, align 4
35  br label %for.cond
36
37for.cond:                                         ; preds = %for.inc, %entry
38  %0 = load i32* %i, align 4
39  %cmp = icmp slt i32 %0, 100
40  br i1 %cmp, label %for.body, label %for.end
41
42for.body:                                         ; preds = %for.cond
43  %1 = load i32* %i, align 4
44  %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %1
45  %2 = load i32* %arrayidx, align 4
46  %3 = load i32* %a.addr, align 4
47  %add = add nsw i32 %2, %3
48  %4 = load i32* %i, align 4
49  %arrayidx1 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %4
50  store i32 %add, i32* %arrayidx1, align 4
51  br label %for.inc
52
53for.inc:                                          ; preds = %for.body
54  %5 = load i32* %i, align 4
55  %inc = add nsw i32 %5, 1
56  store i32 %inc, i32* %i, align 4
57  br label %for.cond
58
59for.end:                                          ; preds = %for.cond
60  %6 = load i32* %a.addr, align 4
61  %arrayidx2 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
62  %7 = load i32* %arrayidx2, align 4
63  ret i32 %7
64}
65
66; /// Different objects, positive induction with widening slide
67; int noAlias02 (int a) {
68;   int i;
69;   for (i=0; i<SIZE-10; i++)
70;     Foo.A[i] = Foo.B[i+10] + a;
71;   return Foo.A[a];
72; }
73; CHECK-LABEL: define i32 @noAlias02(
74; CHECK: add nsw <4 x i32>
75; CHECK: ret
76
77define i32 @noAlias02(i32 %a) {
78entry:
79  %a.addr = alloca i32, align 4
80  %i = alloca i32, align 4
81  store i32 %a, i32* %a.addr, align 4
82  store i32 0, i32* %i, align 4
83  br label %for.cond
84
85for.cond:                                         ; preds = %for.inc, %entry
86  %0 = load i32* %i, align 4
87  %cmp = icmp slt i32 %0, 90
88  br i1 %cmp, label %for.body, label %for.end
89
90for.body:                                         ; preds = %for.cond
91  %1 = load i32* %i, align 4
92  %add = add nsw i32 %1, 10
93  %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %add
94  %2 = load i32* %arrayidx, align 4
95  %3 = load i32* %a.addr, align 4
96  %add1 = add nsw i32 %2, %3
97  %4 = load i32* %i, align 4
98  %arrayidx2 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %4
99  store i32 %add1, i32* %arrayidx2, align 4
100  br label %for.inc
101
102for.inc:                                          ; preds = %for.body
103  %5 = load i32* %i, align 4
104  %inc = add nsw i32 %5, 1
105  store i32 %inc, i32* %i, align 4
106  br label %for.cond
107
108for.end:                                          ; preds = %for.cond
109  %6 = load i32* %a.addr, align 4
110  %arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
111  %7 = load i32* %arrayidx3, align 4
112  ret i32 %7
113}
114
115; /// Different objects, positive induction with shortening slide
116; int noAlias03 (int a) {
117;   int i;
118;   for (i=0; i<SIZE; i++)
119;     Foo.A[i+10] = Foo.B[i] + a;
120;   return Foo.A[a];
121; }
122; CHECK-LABEL: define i32 @noAlias03(
123; CHECK: add nsw <4 x i32>
124; CHECK: ret
125
126define i32 @noAlias03(i32 %a) {
127entry:
128  %a.addr = alloca i32, align 4
129  %i = alloca i32, align 4
130  store i32 %a, i32* %a.addr, align 4
131  store i32 0, i32* %i, align 4
132  br label %for.cond
133
134for.cond:                                         ; preds = %for.inc, %entry
135  %0 = load i32* %i, align 4
136  %cmp = icmp slt i32 %0, 100
137  br i1 %cmp, label %for.body, label %for.end
138
139for.body:                                         ; preds = %for.cond
140  %1 = load i32* %i, align 4
141  %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %1
142  %2 = load i32* %arrayidx, align 4
143  %3 = load i32* %a.addr, align 4
144  %add = add nsw i32 %2, %3
145  %4 = load i32* %i, align 4
146  %add1 = add nsw i32 %4, 10
147  %arrayidx2 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %add1
148  store i32 %add, i32* %arrayidx2, align 4
149  br label %for.inc
150
151for.inc:                                          ; preds = %for.body
152  %5 = load i32* %i, align 4
153  %inc = add nsw i32 %5, 1
154  store i32 %inc, i32* %i, align 4
155  br label %for.cond
156
157for.end:                                          ; preds = %for.cond
158  %6 = load i32* %a.addr, align 4
159  %arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
160  %7 = load i32* %arrayidx3, align 4
161  ret i32 %7
162}
163
164; /// Pointer access, positive stride, run-time check added
165; int noAlias04 (int a) {
166;   int i;
167;   for (i=0; i<SIZE; i++)
168;     *(PA+i) = *(PB+i) + a;
169;   return *(PA+a);
170; }
171; CHECK-LABEL: define i32 @noAlias04(
172; CHECK-NOT: add nsw <4 x i32>
173; CHECK: ret
174;
175; TODO: This test vectorizes (with run-time check) on real targets with -O3)
176; Check why it's not being vectorized even when forcing vectorization
177
178define i32 @noAlias04(i32 %a) #0 {
179entry:
180  %a.addr = alloca i32, align 4
181  %i = alloca i32, align 4
182  store i32 %a, i32* %a.addr, align 4
183  store i32 0, i32* %i, align 4
184  br label %for.cond
185
186for.cond:                                         ; preds = %for.inc, %entry
187  %0 = load i32* %i, align 4
188  %cmp = icmp slt i32 %0, 100
189  br i1 %cmp, label %for.body, label %for.end
190
191for.body:                                         ; preds = %for.cond
192  %1 = load i32** @PB, align 4
193  %2 = load i32* %i, align 4
194  %add.ptr = getelementptr inbounds i32* %1, i32 %2
195  %3 = load i32* %add.ptr, align 4
196  %4 = load i32* %a.addr, align 4
197  %add = add nsw i32 %3, %4
198  %5 = load i32** @PA, align 4
199  %6 = load i32* %i, align 4
200  %add.ptr1 = getelementptr inbounds i32* %5, i32 %6
201  store i32 %add, i32* %add.ptr1, align 4
202  br label %for.inc
203
204for.inc:                                          ; preds = %for.body
205  %7 = load i32* %i, align 4
206  %inc = add nsw i32 %7, 1
207  store i32 %inc, i32* %i, align 4
208  br label %for.cond
209
210for.end:                                          ; preds = %for.cond
211  %8 = load i32** @PA, align 4
212  %9 = load i32* %a.addr, align 4
213  %add.ptr2 = getelementptr inbounds i32* %8, i32 %9
214  %10 = load i32* %add.ptr2, align 4
215  ret i32 %10
216}
217
218; /// Different objects, positive induction, multi-array
219; int noAlias05 (int a) {
220;   int i, N=10;
221;   for (i=0; i<SIZE; i++)
222;     Bar.A[N][i] = Bar.B[N][i] + a;
223;   return Bar.A[N][a];
224; }
225; CHECK-LABEL: define i32 @noAlias05(
226; CHECK: add nsw <4 x i32>
227; CHECK: ret
228
229define i32 @noAlias05(i32 %a) #0 {
230entry:
231  %a.addr = alloca i32, align 4
232  %i = alloca i32, align 4
233  %N = alloca i32, align 4
234  store i32 %a, i32* %a.addr, align 4
235  store i32 10, i32* %N, align 4
236  store i32 0, i32* %i, align 4
237  br label %for.cond
238
239for.cond:                                         ; preds = %for.inc, %entry
240  %0 = load i32* %i, align 4
241  %cmp = icmp slt i32 %0, 100
242  br i1 %cmp, label %for.body, label %for.end
243
244for.body:                                         ; preds = %for.cond
245  %1 = load i32* %i, align 4
246  %2 = load i32* %N, align 4
247  %arrayidx = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 2), i32 0, i32 %2
248  %arrayidx1 = getelementptr inbounds [100 x i32]* %arrayidx, i32 0, i32 %1
249  %3 = load i32* %arrayidx1, align 4
250  %4 = load i32* %a.addr, align 4
251  %add = add nsw i32 %3, %4
252  %5 = load i32* %i, align 4
253  %6 = load i32* %N, align 4
254  %arrayidx2 = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %6
255  %arrayidx3 = getelementptr inbounds [100 x i32]* %arrayidx2, i32 0, i32 %5
256  store i32 %add, i32* %arrayidx3, align 4
257  br label %for.inc
258
259for.inc:                                          ; preds = %for.body
260  %7 = load i32* %i, align 4
261  %inc = add nsw i32 %7, 1
262  store i32 %inc, i32* %i, align 4
263  br label %for.cond
264
265for.end:                                          ; preds = %for.cond
266  %8 = load i32* %a.addr, align 4
267  %9 = load i32* %N, align 4
268  %arrayidx4 = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %9
269  %arrayidx5 = getelementptr inbounds [100 x i32]* %arrayidx4, i32 0, i32 %8
270  %10 = load i32* %arrayidx5, align 4
271  ret i32 %10
272}
273
274; /// Same objects, positive induction, multi-array, different sub-elements
275; int noAlias06 (int a) {
276;   int i, N=10;
277;   for (i=0; i<SIZE; i++)
278;     Bar.A[N][i] = Bar.A[N+1][i] + a;
279;   return Bar.A[N][a];
280; }
281; CHECK-LABEL: define i32 @noAlias06(
282; CHECK: add nsw <4 x i32>
283; CHECK: ret
284
285define i32 @noAlias06(i32 %a) #0 {
286entry:
287  %a.addr = alloca i32, align 4
288  %i = alloca i32, align 4
289  %N = alloca i32, align 4
290  store i32 %a, i32* %a.addr, align 4
291  store i32 10, i32* %N, align 4
292  store i32 0, i32* %i, align 4
293  br label %for.cond
294
295for.cond:                                         ; preds = %for.inc, %entry
296  %0 = load i32* %i, align 4
297  %cmp = icmp slt i32 %0, 100
298  br i1 %cmp, label %for.body, label %for.end
299
300for.body:                                         ; preds = %for.cond
301  %1 = load i32* %i, align 4
302  %2 = load i32* %N, align 4
303  %add = add nsw i32 %2, 1
304  %arrayidx = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %add
305  %arrayidx1 = getelementptr inbounds [100 x i32]* %arrayidx, i32 0, i32 %1
306  %3 = load i32* %arrayidx1, align 4
307  %4 = load i32* %a.addr, align 4
308  %add2 = add nsw i32 %3, %4
309  %5 = load i32* %i, align 4
310  %6 = load i32* %N, align 4
311  %arrayidx3 = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %6
312  %arrayidx4 = getelementptr inbounds [100 x i32]* %arrayidx3, i32 0, i32 %5
313  store i32 %add2, i32* %arrayidx4, align 4
314  br label %for.inc
315
316for.inc:                                          ; preds = %for.body
317  %7 = load i32* %i, align 4
318  %inc = add nsw i32 %7, 1
319  store i32 %inc, i32* %i, align 4
320  br label %for.cond
321
322for.end:                                          ; preds = %for.cond
323  %8 = load i32* %a.addr, align 4
324  %9 = load i32* %N, align 4
325  %arrayidx5 = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %9
326  %arrayidx6 = getelementptr inbounds [100 x i32]* %arrayidx5, i32 0, i32 %8
327  %10 = load i32* %arrayidx6, align 4
328  ret i32 %10
329}
330
331; /// Different objects, negative induction, constant distance
332; int noAlias07 (int a) {
333;   int i;
334;   for (i=0; i<SIZE; i++)
335;     Foo.A[SIZE-i-1] = Foo.B[SIZE-i-1] + a;
336;   return Foo.A[a];
337; }
338; CHECK-LABEL: define i32 @noAlias07(
339; CHECK: store <4 x i32>
340; CHECK: ret
341define i32 @noAlias07(i32 %a) #0 {
342entry:
343  %a.addr = alloca i32, align 4
344  %i = alloca i32, align 4
345  store i32 %a, i32* %a.addr, align 4
346  store i32 0, i32* %i, align 4
347  br label %for.cond
348
349for.cond:                                         ; preds = %for.inc, %entry
350  %0 = load i32* %i, align 4
351  %cmp = icmp slt i32 %0, 100
352  br i1 %cmp, label %for.body, label %for.end
353
354for.body:                                         ; preds = %for.cond
355  %1 = load i32* %i, align 4
356  %sub = sub nsw i32 100, %1
357  %sub1 = sub nsw i32 %sub, 1
358  %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
359  %2 = load i32* %arrayidx, align 4
360  %3 = load i32* %a.addr, align 4
361  %add = add nsw i32 %2, %3
362  %4 = load i32* %i, align 4
363  %sub2 = sub nsw i32 100, %4
364  %sub3 = sub nsw i32 %sub2, 1
365  %arrayidx4 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %sub3
366  store i32 %add, i32* %arrayidx4, align 4
367  br label %for.inc
368
369for.inc:                                          ; preds = %for.body
370  %5 = load i32* %i, align 4
371  %inc = add nsw i32 %5, 1
372  store i32 %inc, i32* %i, align 4
373  br label %for.cond
374
375for.end:                                          ; preds = %for.cond
376  %6 = load i32* %a.addr, align 4
377  %arrayidx5 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
378  %7 = load i32* %arrayidx5, align 4
379  ret i32 %7
380}
381
382; /// Different objects, negative induction, shortening slide
383; int noAlias08 (int a) {
384;   int i;
385;   for (i=0; i<SIZE-10; i++)
386;     Foo.A[SIZE-i-1] = Foo.B[SIZE-i-10] + a;
387;   return Foo.A[a];
388; }
389; CHECK-LABEL: define i32 @noAlias08(
390; CHECK: sub <4 x i32>
391; CHECK: ret
392
393define i32 @noAlias08(i32 %a) #0 {
394entry:
395  %a.addr = alloca i32, align 4
396  %i = alloca i32, align 4
397  store i32 %a, i32* %a.addr, align 4
398  store i32 0, i32* %i, align 4
399  br label %for.cond
400
401for.cond:                                         ; preds = %for.inc, %entry
402  %0 = load i32* %i, align 4
403  %cmp = icmp slt i32 %0, 90
404  br i1 %cmp, label %for.body, label %for.end
405
406for.body:                                         ; preds = %for.cond
407  %1 = load i32* %i, align 4
408  %sub = sub nsw i32 100, %1
409  %sub1 = sub nsw i32 %sub, 10
410  %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
411  %2 = load i32* %arrayidx, align 4
412  %3 = load i32* %a.addr, align 4
413  %add = add nsw i32 %2, %3
414  %4 = load i32* %i, align 4
415  %sub2 = sub nsw i32 100, %4
416  %sub3 = sub nsw i32 %sub2, 1
417  %arrayidx4 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %sub3
418  store i32 %add, i32* %arrayidx4, align 4
419  br label %for.inc
420
421for.inc:                                          ; preds = %for.body
422  %5 = load i32* %i, align 4
423  %inc = add nsw i32 %5, 1
424  store i32 %inc, i32* %i, align 4
425  br label %for.cond
426
427for.end:                                          ; preds = %for.cond
428  %6 = load i32* %a.addr, align 4
429  %arrayidx5 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
430  %7 = load i32* %arrayidx5, align 4
431  ret i32 %7
432}
433
434; /// Different objects, negative induction, widening slide
435; int noAlias09 (int a) {
436;   int i;
437;   for (i=0; i<SIZE; i++)
438;     Foo.A[SIZE-i-10] = Foo.B[SIZE-i-1] + a;
439;   return Foo.A[a];
440; }
441; CHECK-LABEL: define i32 @noAlias09(
442; CHECK: sub <4 x i32>
443; CHECK: ret
444
445define i32 @noAlias09(i32 %a) #0 {
446entry:
447  %a.addr = alloca i32, align 4
448  %i = alloca i32, align 4
449  store i32 %a, i32* %a.addr, align 4
450  store i32 0, i32* %i, align 4
451  br label %for.cond
452
453for.cond:                                         ; preds = %for.inc, %entry
454  %0 = load i32* %i, align 4
455  %cmp = icmp slt i32 %0, 100
456  br i1 %cmp, label %for.body, label %for.end
457
458for.body:                                         ; preds = %for.cond
459  %1 = load i32* %i, align 4
460  %sub = sub nsw i32 100, %1
461  %sub1 = sub nsw i32 %sub, 1
462  %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
463  %2 = load i32* %arrayidx, align 4
464  %3 = load i32* %a.addr, align 4
465  %add = add nsw i32 %2, %3
466  %4 = load i32* %i, align 4
467  %sub2 = sub nsw i32 100, %4
468  %sub3 = sub nsw i32 %sub2, 10
469  %arrayidx4 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %sub3
470  store i32 %add, i32* %arrayidx4, align 4
471  br label %for.inc
472
473for.inc:                                          ; preds = %for.body
474  %5 = load i32* %i, align 4
475  %inc = add nsw i32 %5, 1
476  store i32 %inc, i32* %i, align 4
477  br label %for.cond
478
479for.end:                                          ; preds = %for.cond
480  %6 = load i32* %a.addr, align 4
481  %arrayidx5 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
482  %7 = load i32* %arrayidx5, align 4
483  ret i32 %7
484}
485
486; /// Pointer access, negative stride, run-time check added
487; int noAlias10 (int a) {
488;   int i;
489;   for (i=0; i<SIZE; i++)
490;     *(PA+SIZE-i-1) = *(PB+SIZE-i-1) + a;
491;   return *(PA+a);
492; }
493; CHECK-LABEL: define i32 @noAlias10(
494; CHECK-NOT: sub {{.*}} <4 x i32>
495; CHECK: ret
496;
497; TODO: This test vectorizes (with run-time check) on real targets with -O3)
498; Check why it's not being vectorized even when forcing vectorization
499
500define i32 @noAlias10(i32 %a) #0 {
501entry:
502  %a.addr = alloca i32, align 4
503  %i = alloca i32, align 4
504  store i32 %a, i32* %a.addr, align 4
505  store i32 0, i32* %i, align 4
506  br label %for.cond
507
508for.cond:                                         ; preds = %for.inc, %entry
509  %0 = load i32* %i, align 4
510  %cmp = icmp slt i32 %0, 100
511  br i1 %cmp, label %for.body, label %for.end
512
513for.body:                                         ; preds = %for.cond
514  %1 = load i32** @PB, align 4
515  %add.ptr = getelementptr inbounds i32* %1, i32 100
516  %2 = load i32* %i, align 4
517  %idx.neg = sub i32 0, %2
518  %add.ptr1 = getelementptr inbounds i32* %add.ptr, i32 %idx.neg
519  %add.ptr2 = getelementptr inbounds i32* %add.ptr1, i32 -1
520  %3 = load i32* %add.ptr2, align 4
521  %4 = load i32* %a.addr, align 4
522  %add = add nsw i32 %3, %4
523  %5 = load i32** @PA, align 4
524  %add.ptr3 = getelementptr inbounds i32* %5, i32 100
525  %6 = load i32* %i, align 4
526  %idx.neg4 = sub i32 0, %6
527  %add.ptr5 = getelementptr inbounds i32* %add.ptr3, i32 %idx.neg4
528  %add.ptr6 = getelementptr inbounds i32* %add.ptr5, i32 -1
529  store i32 %add, i32* %add.ptr6, align 4
530  br label %for.inc
531
532for.inc:                                          ; preds = %for.body
533  %7 = load i32* %i, align 4
534  %inc = add nsw i32 %7, 1
535  store i32 %inc, i32* %i, align 4
536  br label %for.cond
537
538for.end:                                          ; preds = %for.cond
539  %8 = load i32** @PA, align 4
540  %9 = load i32* %a.addr, align 4
541  %add.ptr7 = getelementptr inbounds i32* %8, i32 %9
542  %10 = load i32* %add.ptr7, align 4
543  ret i32 %10
544}
545
546; /// Different objects, negative induction, multi-array
547; int noAlias11 (int a) {
548;   int i, N=10;
549;   for (i=0; i<SIZE; i++)
550;     Bar.A[N][SIZE-i-1] = Bar.B[N][SIZE-i-1] + a;
551;   return Bar.A[N][a];
552; }
553; CHECK-LABEL: define i32 @noAlias11(
554; CHECK: store <4 x i32>
555; CHECK: ret
556
557define i32 @noAlias11(i32 %a) #0 {
558entry:
559  %a.addr = alloca i32, align 4
560  %i = alloca i32, align 4
561  %N = alloca i32, align 4
562  store i32 %a, i32* %a.addr, align 4
563  store i32 10, i32* %N, align 4
564  store i32 0, i32* %i, align 4
565  br label %for.cond
566
567for.cond:                                         ; preds = %for.inc, %entry
568  %0 = load i32* %i, align 4
569  %cmp = icmp slt i32 %0, 100
570  br i1 %cmp, label %for.body, label %for.end
571
572for.body:                                         ; preds = %for.cond
573  %1 = load i32* %i, align 4
574  %sub = sub nsw i32 100, %1
575  %sub1 = sub nsw i32 %sub, 1
576  %2 = load i32* %N, align 4
577  %arrayidx = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 2), i32 0, i32 %2
578  %arrayidx2 = getelementptr inbounds [100 x i32]* %arrayidx, i32 0, i32 %sub1
579  %3 = load i32* %arrayidx2, align 4
580  %4 = load i32* %a.addr, align 4
581  %add = add nsw i32 %3, %4
582  %5 = load i32* %i, align 4
583  %sub3 = sub nsw i32 100, %5
584  %sub4 = sub nsw i32 %sub3, 1
585  %6 = load i32* %N, align 4
586  %arrayidx5 = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %6
587  %arrayidx6 = getelementptr inbounds [100 x i32]* %arrayidx5, i32 0, i32 %sub4
588  store i32 %add, i32* %arrayidx6, align 4
589  br label %for.inc
590
591for.inc:                                          ; preds = %for.body
592  %7 = load i32* %i, align 4
593  %inc = add nsw i32 %7, 1
594  store i32 %inc, i32* %i, align 4
595  br label %for.cond
596
597for.end:                                          ; preds = %for.cond
598  %8 = load i32* %a.addr, align 4
599  %9 = load i32* %N, align 4
600  %arrayidx7 = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %9
601  %arrayidx8 = getelementptr inbounds [100 x i32]* %arrayidx7, i32 0, i32 %8
602  %10 = load i32* %arrayidx8, align 4
603  ret i32 %10
604}
605
606; /// Same objects, negative induction, multi-array, different sub-elements
607; int noAlias12 (int a) {
608;   int i, N=10;
609;   for (i=0; i<SIZE; i++)
610;     Bar.A[N][SIZE-i-1] = Bar.A[N+1][SIZE-i-1] + a;
611;   return Bar.A[N][a];
612; }
613; CHECK-LABEL: define i32 @noAlias12(
614; CHECK: store <4 x i32>
615; CHECK: ret
616
617define i32 @noAlias12(i32 %a) #0 {
618entry:
619  %a.addr = alloca i32, align 4
620  %i = alloca i32, align 4
621  %N = alloca i32, align 4
622  store i32 %a, i32* %a.addr, align 4
623  store i32 10, i32* %N, align 4
624  store i32 0, i32* %i, align 4
625  br label %for.cond
626
627for.cond:                                         ; preds = %for.inc, %entry
628  %0 = load i32* %i, align 4
629  %cmp = icmp slt i32 %0, 100
630  br i1 %cmp, label %for.body, label %for.end
631
632for.body:                                         ; preds = %for.cond
633  %1 = load i32* %i, align 4
634  %sub = sub nsw i32 100, %1
635  %sub1 = sub nsw i32 %sub, 1
636  %2 = load i32* %N, align 4
637  %add = add nsw i32 %2, 1
638  %arrayidx = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %add
639  %arrayidx2 = getelementptr inbounds [100 x i32]* %arrayidx, i32 0, i32 %sub1
640  %3 = load i32* %arrayidx2, align 4
641  %4 = load i32* %a.addr, align 4
642  %add3 = add nsw i32 %3, %4
643  %5 = load i32* %i, align 4
644  %sub4 = sub nsw i32 100, %5
645  %sub5 = sub nsw i32 %sub4, 1
646  %6 = load i32* %N, align 4
647  %arrayidx6 = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %6
648  %arrayidx7 = getelementptr inbounds [100 x i32]* %arrayidx6, i32 0, i32 %sub5
649  store i32 %add3, i32* %arrayidx7, align 4
650  br label %for.inc
651
652for.inc:                                          ; preds = %for.body
653  %7 = load i32* %i, align 4
654  %inc = add nsw i32 %7, 1
655  store i32 %inc, i32* %i, align 4
656  br label %for.cond
657
658for.end:                                          ; preds = %for.cond
659  %8 = load i32* %a.addr, align 4
660  %9 = load i32* %N, align 4
661  %arrayidx8 = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %9
662  %arrayidx9 = getelementptr inbounds [100 x i32]* %arrayidx8, i32 0, i32 %8
663  %10 = load i32* %arrayidx9, align 4
664  ret i32 %10
665}
666
667; /// Same objects, positive induction, constant distance, just enough for vector size
668; int noAlias13 (int a) {
669;   int i;
670;   for (i=0; i<SIZE; i++)
671;     Foo.A[i] = Foo.A[i+4] + a;
672;   return Foo.A[a];
673; }
674; CHECK-LABEL: define i32 @noAlias13(
675; CHECK: add nsw <4 x i32>
676; CHECK: ret
677
678define i32 @noAlias13(i32 %a) #0 {
679entry:
680  %a.addr = alloca i32, align 4
681  %i = alloca i32, align 4
682  store i32 %a, i32* %a.addr, align 4
683  store i32 0, i32* %i, align 4
684  br label %for.cond
685
686for.cond:                                         ; preds = %for.inc, %entry
687  %0 = load i32* %i, align 4
688  %cmp = icmp slt i32 %0, 100
689  br i1 %cmp, label %for.body, label %for.end
690
691for.body:                                         ; preds = %for.cond
692  %1 = load i32* %i, align 4
693  %add = add nsw i32 %1, 4
694  %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %add
695  %2 = load i32* %arrayidx, align 4
696  %3 = load i32* %a.addr, align 4
697  %add1 = add nsw i32 %2, %3
698  %4 = load i32* %i, align 4
699  %arrayidx2 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %4
700  store i32 %add1, i32* %arrayidx2, align 4
701  br label %for.inc
702
703for.inc:                                          ; preds = %for.body
704  %5 = load i32* %i, align 4
705  %inc = add nsw i32 %5, 1
706  store i32 %inc, i32* %i, align 4
707  br label %for.cond
708
709for.end:                                          ; preds = %for.cond
710  %6 = load i32* %a.addr, align 4
711  %arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
712  %7 = load i32* %arrayidx3, align 4
713  ret i32 %7
714}
715
716; /// Same objects, negative induction, constant distance, just enough for vector size
717; int noAlias14 (int a) {
718;   int i;
719;   for (i=0; i<SIZE; i++)
720;     Foo.A[SIZE-i-1] = Foo.A[SIZE-i-5] + a;
721;   return Foo.A[a];
722; }
723; CHECK-LABEL: define i32 @noAlias14(
724; CHECK: sub <4 x i32>
725; CHECK: ret
726
727define i32 @noAlias14(i32 %a) #0 {
728entry:
729  %a.addr = alloca i32, align 4
730  %i = alloca i32, align 4
731  store i32 %a, i32* %a.addr, align 4
732  store i32 0, i32* %i, align 4
733  br label %for.cond
734
735for.cond:                                         ; preds = %for.inc, %entry
736  %0 = load i32* %i, align 4
737  %cmp = icmp slt i32 %0, 100
738  br i1 %cmp, label %for.body, label %for.end
739
740for.body:                                         ; preds = %for.cond
741  %1 = load i32* %i, align 4
742  %sub = sub nsw i32 100, %1
743  %sub1 = sub nsw i32 %sub, 5
744  %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %sub1
745  %2 = load i32* %arrayidx, align 4
746  %3 = load i32* %a.addr, align 4
747  %add = add nsw i32 %2, %3
748  %4 = load i32* %i, align 4
749  %sub2 = sub nsw i32 100, %4
750  %sub3 = sub nsw i32 %sub2, 1
751  %arrayidx4 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %sub3
752  store i32 %add, i32* %arrayidx4, align 4
753  br label %for.inc
754
755for.inc:                                          ; preds = %for.body
756  %5 = load i32* %i, align 4
757  %inc = add nsw i32 %5, 1
758  store i32 %inc, i32* %i, align 4
759  br label %for.cond
760
761for.end:                                          ; preds = %for.cond
762  %6 = load i32* %a.addr, align 4
763  %arrayidx5 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
764  %7 = load i32* %arrayidx5, align 4
765  ret i32 %7
766}
767
768
769;; === Now, the tests that we could vectorize with induction changes or run-time checks ===
770
771
772; /// Different objects, swapped induction, alias at the end
773; int mayAlias01 (int a) {
774;   int i;
775;   for (i=0; i<SIZE; i++)
776;     Foo.A[i] = Foo.B[SIZE-i-1] + a;
777;   return Foo.A[a];
778; }
779; CHECK-LABEL: define i32 @mayAlias01(
780; CHECK-NOT: add nsw <4 x i32>
781; CHECK: ret
782
783define i32 @mayAlias01(i32 %a) nounwind {
784entry:
785  %a.addr = alloca i32, align 4
786  %i = alloca i32, align 4
787  store i32 %a, i32* %a.addr, align 4
788  store i32 0, i32* %i, align 4
789  br label %for.cond
790
791for.cond:                                         ; preds = %for.inc, %entry
792  %0 = load i32* %i, align 4
793  %cmp = icmp slt i32 %0, 100
794  br i1 %cmp, label %for.body, label %for.end
795
796for.body:                                         ; preds = %for.cond
797  %1 = load i32* %i, align 4
798  %sub = sub nsw i32 100, %1
799  %sub1 = sub nsw i32 %sub, 1
800  %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
801  %2 = load i32* %arrayidx, align 4
802  %3 = load i32* %a.addr, align 4
803  %add = add nsw i32 %2, %3
804  %4 = load i32* %i, align 4
805  %arrayidx2 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %4
806  store i32 %add, i32* %arrayidx2, align 4
807  br label %for.inc
808
809for.inc:                                          ; preds = %for.body
810  %5 = load i32* %i, align 4
811  %inc = add nsw i32 %5, 1
812  store i32 %inc, i32* %i, align 4
813  br label %for.cond
814
815for.end:                                          ; preds = %for.cond
816  %6 = load i32* %a.addr, align 4
817  %arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
818  %7 = load i32* %arrayidx3, align 4
819  ret i32 %7
820}
821
822; /// Different objects, swapped induction, alias at the beginning
823; int mayAlias02 (int a) {
824;   int i;
825;   for (i=0; i<SIZE; i++)
826;     Foo.A[SIZE-i-1] = Foo.B[i] + a;
827;   return Foo.A[a];
828; }
829; CHECK-LABEL: define i32 @mayAlias02(
830; CHECK-NOT: add nsw <4 x i32>
831; CHECK: ret
832
833define i32 @mayAlias02(i32 %a) nounwind {
834entry:
835  %a.addr = alloca i32, align 4
836  %i = alloca i32, align 4
837  store i32 %a, i32* %a.addr, align 4
838  store i32 0, i32* %i, align 4
839  br label %for.cond
840
841for.cond:                                         ; preds = %for.inc, %entry
842  %0 = load i32* %i, align 4
843  %cmp = icmp slt i32 %0, 100
844  br i1 %cmp, label %for.body, label %for.end
845
846for.body:                                         ; preds = %for.cond
847  %1 = load i32* %i, align 4
848  %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %1
849  %2 = load i32* %arrayidx, align 4
850  %3 = load i32* %a.addr, align 4
851  %add = add nsw i32 %2, %3
852  %4 = load i32* %i, align 4
853  %sub = sub nsw i32 100, %4
854  %sub1 = sub nsw i32 %sub, 1
855  %arrayidx2 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %sub1
856  store i32 %add, i32* %arrayidx2, align 4
857  br label %for.inc
858
859for.inc:                                          ; preds = %for.body
860  %5 = load i32* %i, align 4
861  %inc = add nsw i32 %5, 1
862  store i32 %inc, i32* %i, align 4
863  br label %for.cond
864
865for.end:                                          ; preds = %for.cond
866  %6 = load i32* %a.addr, align 4
867  %arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
868  %7 = load i32* %arrayidx3, align 4
869  ret i32 %7
870}
871
872; /// Pointer access, run-time check added
873; int mayAlias03 (int a) {
874;   int i;
875;   for (i=0; i<SIZE; i++)
876;     *(PA+i) = *(PB+SIZE-i-1) + a;
877;   return *(PA+a);
878; }
879; CHECK-LABEL: define i32 @mayAlias03(
880; CHECK-NOT: add nsw <4 x i32>
881; CHECK: ret
882
883define i32 @mayAlias03(i32 %a) nounwind {
884entry:
885  %a.addr = alloca i32, align 4
886  %i = alloca i32, align 4
887  store i32 %a, i32* %a.addr, align 4
888  store i32 0, i32* %i, align 4
889  br label %for.cond
890
891for.cond:                                         ; preds = %for.inc, %entry
892  %0 = load i32* %i, align 4
893  %cmp = icmp slt i32 %0, 100
894  br i1 %cmp, label %for.body, label %for.end
895
896for.body:                                         ; preds = %for.cond
897  %1 = load i32** @PB, align 4
898  %add.ptr = getelementptr inbounds i32* %1, i32 100
899  %2 = load i32* %i, align 4
900  %idx.neg = sub i32 0, %2
901  %add.ptr1 = getelementptr inbounds i32* %add.ptr, i32 %idx.neg
902  %add.ptr2 = getelementptr inbounds i32* %add.ptr1, i32 -1
903  %3 = load i32* %add.ptr2, align 4
904  %4 = load i32* %a.addr, align 4
905  %add = add nsw i32 %3, %4
906  %5 = load i32** @PA, align 4
907  %6 = load i32* %i, align 4
908  %add.ptr3 = getelementptr inbounds i32* %5, i32 %6
909  store i32 %add, i32* %add.ptr3, align 4
910  br label %for.inc
911
912for.inc:                                          ; preds = %for.body
913  %7 = load i32* %i, align 4
914  %inc = add nsw i32 %7, 1
915  store i32 %inc, i32* %i, align 4
916  br label %for.cond
917
918for.end:                                          ; preds = %for.cond
919  %8 = load i32** @PA, align 4
920  %9 = load i32* %a.addr, align 4
921  %add.ptr4 = getelementptr inbounds i32* %8, i32 %9
922  %10 = load i32* %add.ptr4, align 4
923  ret i32 %10
924}
925
926
927;; === Finally, the tests that should only vectorize with care (or if we ignore undefined behaviour at all) ===
928
929
930; int mustAlias01 (int a) {
931;   int i;
932;   for (i=0; i<SIZE; i++)
933;     Foo.A[i+10] = Foo.B[SIZE-i-1] + a;
934;   return Foo.A[a];
935; }
936; CHECK-LABEL: define i32 @mustAlias01(
937; CHECK-NOT: add nsw <4 x i32>
938; CHECK: ret
939
940define i32 @mustAlias01(i32 %a) nounwind {
941entry:
942  %a.addr = alloca i32, align 4
943  %i = alloca i32, align 4
944  store i32 %a, i32* %a.addr, align 4
945  store i32 0, i32* %i, align 4
946  br label %for.cond
947
948for.cond:                                         ; preds = %for.inc, %entry
949  %0 = load i32* %i, align 4
950  %cmp = icmp slt i32 %0, 100
951  br i1 %cmp, label %for.body, label %for.end
952
953for.body:                                         ; preds = %for.cond
954  %1 = load i32* %i, align 4
955  %sub = sub nsw i32 100, %1
956  %sub1 = sub nsw i32 %sub, 1
957  %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
958  %2 = load i32* %arrayidx, align 4
959  %3 = load i32* %a.addr, align 4
960  %add = add nsw i32 %2, %3
961  %4 = load i32* %i, align 4
962  %add2 = add nsw i32 %4, 10
963  %arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %add2
964  store i32 %add, i32* %arrayidx3, align 4
965  br label %for.inc
966
967for.inc:                                          ; preds = %for.body
968  %5 = load i32* %i, align 4
969  %inc = add nsw i32 %5, 1
970  store i32 %inc, i32* %i, align 4
971  br label %for.cond
972
973for.end:                                          ; preds = %for.cond
974  %6 = load i32* %a.addr, align 4
975  %arrayidx4 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
976  %7 = load i32* %arrayidx4, align 4
977  ret i32 %7
978}
979
980; int mustAlias02 (int a) {
981;   int i;
982;   for (i=0; i<SIZE; i++)
983;     Foo.A[i] = Foo.B[SIZE-i-10] + a;
984;   return Foo.A[a];
985; }
986; CHECK-LABEL: define i32 @mustAlias02(
987; CHECK-NOT: add nsw <4 x i32>
988; CHECK: ret
989
990define i32 @mustAlias02(i32 %a) nounwind {
991entry:
992  %a.addr = alloca i32, align 4
993  %i = alloca i32, align 4
994  store i32 %a, i32* %a.addr, align 4
995  store i32 0, i32* %i, align 4
996  br label %for.cond
997
998for.cond:                                         ; preds = %for.inc, %entry
999  %0 = load i32* %i, align 4
1000  %cmp = icmp slt i32 %0, 100
1001  br i1 %cmp, label %for.body, label %for.end
1002
1003for.body:                                         ; preds = %for.cond
1004  %1 = load i32* %i, align 4
1005  %sub = sub nsw i32 100, %1
1006  %sub1 = sub nsw i32 %sub, 10
1007  %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
1008  %2 = load i32* %arrayidx, align 4
1009  %3 = load i32* %a.addr, align 4
1010  %add = add nsw i32 %2, %3
1011  %4 = load i32* %i, align 4
1012  %arrayidx2 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %4
1013  store i32 %add, i32* %arrayidx2, align 4
1014  br label %for.inc
1015
1016for.inc:                                          ; preds = %for.body
1017  %5 = load i32* %i, align 4
1018  %inc = add nsw i32 %5, 1
1019  store i32 %inc, i32* %i, align 4
1020  br label %for.cond
1021
1022for.end:                                          ; preds = %for.cond
1023  %6 = load i32* %a.addr, align 4
1024  %arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
1025  %7 = load i32* %arrayidx3, align 4
1026  ret i32 %7
1027}
1028
1029; int mustAlias03 (int a) {
1030;   int i;
1031;   for (i=0; i<SIZE; i++)
1032;     Foo.A[i+10] = Foo.B[SIZE-i-10] + a;
1033;   return Foo.A[a];
1034; }
1035; CHECK-LABEL: define i32 @mustAlias03(
1036; CHECK-NOT: add nsw <4 x i32>
1037; CHECK: ret
1038
1039define i32 @mustAlias03(i32 %a) nounwind {
1040entry:
1041  %a.addr = alloca i32, align 4
1042  %i = alloca i32, align 4
1043  store i32 %a, i32* %a.addr, align 4
1044  store i32 0, i32* %i, align 4
1045  br label %for.cond
1046
1047for.cond:                                         ; preds = %for.inc, %entry
1048  %0 = load i32* %i, align 4
1049  %cmp = icmp slt i32 %0, 100
1050  br i1 %cmp, label %for.body, label %for.end
1051
1052for.body:                                         ; preds = %for.cond
1053  %1 = load i32* %i, align 4
1054  %sub = sub nsw i32 100, %1
1055  %sub1 = sub nsw i32 %sub, 10
1056  %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
1057  %2 = load i32* %arrayidx, align 4
1058  %3 = load i32* %a.addr, align 4
1059  %add = add nsw i32 %2, %3
1060  %4 = load i32* %i, align 4
1061  %add2 = add nsw i32 %4, 10
1062  %arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %add2
1063  store i32 %add, i32* %arrayidx3, align 4
1064  br label %for.inc
1065
1066for.inc:                                          ; preds = %for.body
1067  %5 = load i32* %i, align 4
1068  %inc = add nsw i32 %5, 1
1069  store i32 %inc, i32* %i, align 4
1070  br label %for.cond
1071
1072for.end:                                          ; preds = %for.cond
1073  %6 = load i32* %a.addr, align 4
1074  %arrayidx4 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
1075  %7 = load i32* %arrayidx4, align 4
1076  ret i32 %7
1077}
1078