1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -basic-aa -dse -S | FileCheck %s
3; RUN: opt < %s -aa-pipeline=basic-aa -passes=dse -S | FileCheck %s
4target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
5
6declare i8* @calloc(i64, i64)
7declare void @memset_pattern16(i8*, i8*, i64)
8
9declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind
10declare void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* nocapture, i8, i64, i32) nounwind
11declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
12declare void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind
13declare void @llvm.init.trampoline(i8*, i8*, i8*)
14
15; **** Noop load->store tests **************************************************
16
17; We CAN optimize volatile loads.
18define void @test_load_volatile(i32* %Q) {
19; CHECK-LABEL: @test_load_volatile(
20; CHECK-NEXT:    [[A:%.*]] = load volatile i32, i32* [[Q:%.*]], align 4
21; CHECK-NEXT:    ret void
22;
23  %a = load volatile i32, i32* %Q
24  store i32 %a, i32* %Q
25  ret void
26}
27
28; We can NOT optimize volatile stores.
29define void @test_store_volatile(i32* %Q) {
30; CHECK-LABEL: @test_store_volatile(
31; CHECK-NEXT:    [[A:%.*]] = load i32, i32* [[Q:%.*]], align 4
32; CHECK-NEXT:    store volatile i32 [[A]], i32* [[Q]], align 4
33; CHECK-NEXT:    ret void
34;
35  %a = load i32, i32* %Q
36  store volatile i32 %a, i32* %Q
37  ret void
38}
39
40; PR2599 - load -> store to same address.
41define void @test12({ i32, i32 }* %x) nounwind  {
42; CHECK-LABEL: @test12(
43; CHECK-NEXT:    [[TEMP7:%.*]] = getelementptr { i32, i32 }, { i32, i32 }* [[X:%.*]], i32 0, i32 1
44; CHECK-NEXT:    [[TEMP8:%.*]] = load i32, i32* [[TEMP7]], align 4
45; CHECK-NEXT:    [[TEMP17:%.*]] = sub i32 0, [[TEMP8]]
46; CHECK-NEXT:    store i32 [[TEMP17]], i32* [[TEMP7]], align 4
47; CHECK-NEXT:    ret void
48;
49  %temp4 = getelementptr { i32, i32 }, { i32, i32 }* %x, i32 0, i32 0
50  %temp5 = load i32, i32* %temp4, align 4
51  %temp7 = getelementptr { i32, i32 }, { i32, i32 }* %x, i32 0, i32 1
52  %temp8 = load i32, i32* %temp7, align 4
53  %temp17 = sub i32 0, %temp8
54  store i32 %temp5, i32* %temp4, align 4
55  store i32 %temp17, i32* %temp7, align 4
56  ret void
57}
58
59; Remove redundant store if loaded value is in another block.
60define i32 @test26(i1 %c, i32* %p) {
61; CHECK-LABEL: @test26(
62; CHECK-NEXT:  entry:
63; CHECK-NEXT:    br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
64; CHECK:       bb1:
65; CHECK-NEXT:    br label [[BB3:%.*]]
66; CHECK:       bb2:
67; CHECK-NEXT:    br label [[BB3]]
68; CHECK:       bb3:
69; CHECK-NEXT:    ret i32 0
70;
71entry:
72  %v = load i32, i32* %p, align 4
73  br i1 %c, label %bb1, label %bb2
74bb1:
75  br label %bb3
76bb2:
77  store i32 %v, i32* %p, align 4
78  br label %bb3
79bb3:
80  ret i32 0
81}
82
83; Remove redundant store if loaded value is in another block.
84define i32 @test27(i1 %c, i32* %p) {
85; CHECK-LABEL: @test27(
86; CHECK-NEXT:  entry:
87; CHECK-NEXT:    br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
88; CHECK:       bb1:
89; CHECK-NEXT:    br label [[BB3:%.*]]
90; CHECK:       bb2:
91; CHECK-NEXT:    br label [[BB3]]
92; CHECK:       bb3:
93; CHECK-NEXT:    ret i32 0
94;
95entry:
96  %v = load i32, i32* %p, align 4
97  br i1 %c, label %bb1, label %bb2
98bb1:
99  br label %bb3
100bb2:
101  br label %bb3
102bb3:
103  store i32 %v, i32* %p, align 4
104  ret i32 0
105}
106
107; Remove redundant store if loaded value is in another block inside a loop.
108define i32 @test31(i1 %c, i32* %p, i32 %i) {
109; CHECK-LABEL: @test31(
110; CHECK-NEXT:  entry:
111; CHECK-NEXT:    br label [[BB1:%.*]]
112; CHECK:       bb1:
113; CHECK-NEXT:    br i1 [[C:%.*]], label [[BB1]], label [[BB2:%.*]]
114; CHECK:       bb2:
115; CHECK-NEXT:    ret i32 0
116;
117entry:
118  %v = load i32, i32* %p, align 4
119  br label %bb1
120bb1:
121  store i32 %v, i32* %p, align 4
122  br i1 %c, label %bb1, label %bb2
123bb2:
124  ret i32 0
125}
126
127; Don't remove "redundant" store if %p is possibly stored to.
128define i32 @test46(i1 %c, i32* %p, i32* %p2, i32 %i) {
129; CHECK-LABEL: @test46(
130; CHECK-NEXT:  entry:
131; CHECK-NEXT:    [[V:%.*]] = load i32, i32* [[P:%.*]], align 4
132; CHECK-NEXT:    br label [[BB1:%.*]]
133; CHECK:       bb1:
134; CHECK-NEXT:    store i32 [[V]], i32* [[P]], align 4
135; CHECK-NEXT:    br i1 [[C:%.*]], label [[BB1]], label [[BB2:%.*]]
136; CHECK:       bb2:
137; CHECK-NEXT:    store i32 0, i32* [[P2:%.*]], align 4
138; CHECK-NEXT:    br i1 [[C]], label [[BB3:%.*]], label [[BB1]]
139; CHECK:       bb3:
140; CHECK-NEXT:    ret i32 0
141;
142entry:
143  %v = load i32, i32* %p, align 4
144  br label %bb1
145bb1:
146  store i32 %v, i32* %p, align 4
147  br i1 %c, label %bb1, label %bb2
148bb2:
149  store i32 0, i32* %p2, align 4
150  br i1 %c, label %bb3, label %bb1
151bb3:
152  ret i32 0
153}
154
155declare void @unknown_func()
156
157; Remove redundant store, which is in the lame loop as the load.
158define i32 @test33(i1 %c, i32* %p, i32 %i) {
159; CHECK-LABEL: @test33(
160; CHECK-NEXT:  entry:
161; CHECK-NEXT:    br label [[BB1:%.*]]
162; CHECK:       bb1:
163; CHECK-NEXT:    br label [[BB2:%.*]]
164; CHECK:       bb2:
165; CHECK-NEXT:    call void @unknown_func()
166; CHECK-NEXT:    br i1 [[C:%.*]], label [[BB1]], label [[BB3:%.*]]
167; CHECK:       bb3:
168; CHECK-NEXT:    ret i32 0
169;
170entry:
171  br label %bb1
172bb1:
173  %v = load i32, i32* %p, align 4
174  br label %bb2
175bb2:
176  store i32 %v, i32* %p, align 4
177  ; Might read and overwrite value at %p, but doesn't matter.
178  call void @unknown_func()
179  br i1 %c, label %bb1, label %bb3
180bb3:
181  ret i32 0
182}
183
184declare void @unkown_write(i32*)
185
186; We can't remove the "noop" store around an unkown write.
187define void @test43(i32* %Q) {
188; CHECK-LABEL: @test43(
189; CHECK-NEXT:    [[A:%.*]] = load i32, i32* [[Q:%.*]], align 4
190; CHECK-NEXT:    call void @unkown_write(i32* [[Q]])
191; CHECK-NEXT:    store i32 [[A]], i32* [[Q]], align 4
192; CHECK-NEXT:    ret void
193;
194  %a = load i32, i32* %Q
195  call void @unkown_write(i32* %Q)
196  store i32 %a, i32* %Q
197  ret void
198}
199
200; We CAN remove it when the unkown write comes AFTER.
201define void @test44(i32* %Q) {
202; CHECK-LABEL: @test44(
203; CHECK-NEXT:    call void @unkown_write(i32* [[Q:%.*]])
204; CHECK-NEXT:    ret void
205;
206  %a = load i32, i32* %Q
207  store i32 %a, i32* %Q
208  call void @unkown_write(i32* %Q)
209  ret void
210}
211
212define void @test45(i32* %Q) {
213; CHECK-LABEL: @test45(
214; CHECK-NEXT:    ret void
215;
216  %a = load i32, i32* %Q
217  store i32 10, i32* %Q
218  store i32 %a, i32* %Q
219  ret void
220}
221
222define i32 @test48(i1 %c, i32* %p) {
223; CHECK-LABEL: @test48(
224; CHECK-NEXT:  entry:
225; CHECK-NEXT:    [[V:%.*]] = load i32, i32* [[P:%.*]], align 4
226; CHECK-NEXT:    br i1 [[C:%.*]], label [[BB0:%.*]], label [[BB0_0:%.*]]
227; CHECK:       bb0:
228; CHECK-NEXT:    store i32 0, i32* [[P]], align 4
229; CHECK-NEXT:    br i1 [[C]], label [[BB1:%.*]], label [[BB2:%.*]]
230; CHECK:       bb0.0:
231; CHECK-NEXT:    br label [[BB1]]
232; CHECK:       bb1:
233; CHECK-NEXT:    store i32 [[V]], i32* [[P]], align 4
234; CHECK-NEXT:    br i1 [[C]], label [[BB2]], label [[BB0]]
235; CHECK:       bb2:
236; CHECK-NEXT:    ret i32 0
237;
238entry:
239  %v = load i32, i32* %p, align 4
240  br i1 %c, label %bb0, label %bb0.0
241
242bb0:
243  store i32 0, i32* %p
244  br i1 %c, label %bb1, label %bb2
245
246bb0.0:
247  br label %bb1
248
249bb1:
250  store i32 %v, i32* %p, align 4
251  br i1 %c, label %bb2, label %bb0
252bb2:
253  ret i32 0
254}
255
256define i32 @test47(i1 %c, i32* %p, i32 %i) {
257; CHECK-LABEL: @test47(
258; CHECK-NEXT:  entry:
259; CHECK-NEXT:    br label [[BB1:%.*]]
260; CHECK:       bb1:
261; CHECK-NEXT:    br i1 [[C:%.*]], label [[BB1]], label [[BB2:%.*]]
262; CHECK:       bb2:
263; CHECK-NEXT:    br i1 [[C]], label [[BB3:%.*]], label [[BB1]]
264; CHECK:       bb3:
265; CHECK-NEXT:    ret i32 0
266;
267entry:
268  %v = load i32, i32* %p, align 4
269  br label %bb1
270bb1:
271  store i32 %v, i32* %p, align 4
272  br i1 %c, label %bb1, label %bb2
273bb2:
274  store i32 %v, i32* %p, align 4
275  br i1 %c, label %bb3, label %bb1
276bb3:
277  ret i32 0
278}
279
280; Test case from PR47887.
281define void @test_noalias_store_between_load_and_store(i32* noalias %x, i32* noalias %y) {
282; CHECK-LABEL: @test_noalias_store_between_load_and_store(
283; CHECK-NEXT:  entry:
284; CHECK-NEXT:    store i32 0, i32* [[Y:%.*]], align 4
285; CHECK-NEXT:    ret void
286;
287entry:
288  %lv = load i32, i32* %x, align 4
289  store i32 0, i32* %y, align 4
290  store i32 %lv, i32* %x, align 4
291  ret void
292}
293
294; Test case from PR47887. Currently we eliminate the dead `store i32 %inc, i32* %x`,
295; but not the no-op `store i32 %lv, i32* %x`. That is because no-op stores are
296; eliminated before dead stores for the same def.
297define void @test_noalias_store_between_load_and_store_elimin_order(i32* noalias %x, i32* noalias %y) {
298; CHECK-LABEL: @test_noalias_store_between_load_and_store_elimin_order(
299; CHECK-NEXT:  entry:
300; CHECK-NEXT:    store i32 0, i32* [[Y:%.*]], align 4
301; CHECK-NEXT:    ret void
302;
303entry:
304  %lv = load i32, i32* %x, align 4
305  %inc = add nsw i32 %lv, 1
306  store i32 %inc, i32* %x, align 4
307  store i32 0, i32* %y, align 4
308  store i32 %lv, i32* %x, align 4
309  ret void
310}
311
312; PR50143
313define i8* @store_zero_after_calloc_inaccessiblememonly() {
314; CHECK-LABEL: @store_zero_after_calloc_inaccessiblememonly(
315; CHECK-NEXT:    [[CALL:%.*]] = tail call i8* @calloc(i64 1, i64 10) #[[ATTR6:[0-9]+]]
316; CHECK-NEXT:    store i8 0, i8* [[CALL]], align 1
317; CHECK-NEXT:    ret i8* [[CALL]]
318;
319  %call = tail call i8* @calloc(i64 1, i64 10)  inaccessiblememonly
320  store i8 0, i8* %call
321  ret i8* %call
322}
323
324define i8* @zero_memset_after_calloc()  {
325; CHECK-LABEL: @zero_memset_after_calloc(
326; CHECK-NEXT:    [[CALL:%.*]] = tail call i8* @calloc(i64 10000, i64 4)
327; CHECK-NEXT:    ret i8* [[CALL]]
328;
329  %call = tail call i8* @calloc(i64 10000, i64 4)
330  call void @llvm.memset.p0i8.i64(i8* %call, i8 0, i64 40000, i1 false)
331  ret i8* %call
332}
333
334define i8* @volatile_zero_memset_after_calloc()  {
335; CHECK-LABEL: @volatile_zero_memset_after_calloc(
336; CHECK-NEXT:    [[CALL:%.*]] = tail call i8* @calloc(i64 10000, i64 4)
337; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* [[CALL]], i8 0, i64 40000, i1 true)
338; CHECK-NEXT:    ret i8* [[CALL]]
339;
340  %call = tail call i8* @calloc(i64 10000, i64 4)
341  call void @llvm.memset.p0i8.i64(i8* %call, i8 0, i64 40000, i1 true)
342  ret i8* %call
343}
344
345define i8* @zero_memset_and_store_after_calloc(i8 %v)  {
346; CHECK-LABEL: @zero_memset_and_store_after_calloc(
347; CHECK-NEXT:    [[CALL:%.*]] = tail call i8* @calloc(i64 10000, i64 4)
348; CHECK-NEXT:    ret i8* [[CALL]]
349;
350  %call = tail call i8* @calloc(i64 10000, i64 4)
351  store i8 %v, i8* %call
352  call void @llvm.memset.p0i8.i64(i8* %call, i8 0, i64 40000, i1 false)
353  ret i8* %call
354}
355
356define i8* @partial_zero_memset_after_calloc() {
357; CHECK-LABEL: @partial_zero_memset_after_calloc(
358; CHECK-NEXT:    [[CALL:%.*]] = tail call i8* @calloc(i64 10000, i64 4)
359; CHECK-NEXT:    ret i8* [[CALL]]
360;
361  %call = tail call i8* @calloc(i64 10000, i64 4)
362  call void @llvm.memset.p0i8.i64(i8* %call, i8 0, i64 20, i1 false)
363  ret i8* %call
364}
365
366define i8* @partial_zero_memset_and_store_after_calloc(i8 %v)  {
367; CHECK-LABEL: @partial_zero_memset_and_store_after_calloc(
368; CHECK-NEXT:    [[CALL:%.*]] = tail call i8* @calloc(i64 10000, i64 4)
369; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, i8* [[CALL]], i64 30
370; CHECK-NEXT:    store i8 [[V:%.*]], i8* [[GEP]], align 1
371; CHECK-NEXT:    ret i8* [[CALL]]
372;
373  %call = tail call i8* @calloc(i64 10000, i64 4)
374  %gep = getelementptr inbounds i8, i8* %call, i64 30
375  store i8 %v, i8* %gep
376  call void @llvm.memset.p0i8.i64(i8* %call, i8 0, i64 20, i1 false)
377  ret i8* %call
378}
379
380define i8* @zero_memset_and_store_with_dyn_index_after_calloc(i8 %v, i64 %idx)  {
381; CHECK-LABEL: @zero_memset_and_store_with_dyn_index_after_calloc(
382; CHECK-NEXT:    [[CALL:%.*]] = tail call i8* @calloc(i64 10000, i64 4)
383; CHECK-NEXT:    ret i8* [[CALL]]
384;
385  %call = tail call i8* @calloc(i64 10000, i64 4)
386  %gep = getelementptr inbounds i8, i8* %call, i64 %idx
387  store i8 %v, i8* %gep
388  call void @llvm.memset.p0i8.i64(i8* %call, i8 0, i64 40000, i1 false)
389  ret i8* %call
390}
391
392define i8* @partial_zero_memset_and_store_with_dyn_index_after_calloc(i8 %v, i64 %idx)  {
393; CHECK-LABEL: @partial_zero_memset_and_store_with_dyn_index_after_calloc(
394; CHECK-NEXT:    [[CALL:%.*]] = tail call i8* @calloc(i64 10000, i64 4)
395; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, i8* [[CALL]], i64 [[IDX:%.*]]
396; CHECK-NEXT:    store i8 [[V:%.*]], i8* [[GEP]], align 1
397; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* [[CALL]], i8 0, i64 20, i1 false)
398; CHECK-NEXT:    ret i8* [[CALL]]
399;
400  %call = tail call i8* @calloc(i64 10000, i64 4)
401  %gep = getelementptr inbounds i8, i8* %call, i64 %idx
402  store i8 %v, i8* %gep
403  call void @llvm.memset.p0i8.i64(i8* %call, i8 0, i64 20, i1 false)
404  ret i8* %call
405}
406
407define i8* @zero_memset_after_calloc_inaccessiblememonly()  {
408; CHECK-LABEL: @zero_memset_after_calloc_inaccessiblememonly(
409; CHECK-NEXT:    [[CALL:%.*]] = tail call i8* @calloc(i64 10000, i64 4) #[[ATTR6]]
410; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* [[CALL]], i8 0, i64 40000, i1 false)
411; CHECK-NEXT:    ret i8* [[CALL]]
412;
413  %call = tail call i8* @calloc(i64 10000, i64 4) inaccessiblememonly
414  call void @llvm.memset.p0i8.i64(i8* %call, i8 0, i64 40000, i1 false)
415  ret i8* %call
416}
417
418define i8* @cst_nonzero_memset_after_calloc() {
419; CHECK-LABEL: @cst_nonzero_memset_after_calloc(
420; CHECK-NEXT:    [[CALL:%.*]] = tail call i8* @calloc(i64 10000, i64 4)
421; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* [[CALL]], i8 1, i64 40000, i1 false)
422; CHECK-NEXT:    ret i8* [[CALL]]
423;
424  %call = tail call i8* @calloc(i64 10000, i64 4)
425  call void @llvm.memset.p0i8.i64(i8* %call, i8 1, i64 40000, i1 false)
426  ret i8* %call
427}
428
429define i8* @nonzero_memset_after_calloc(i8 %v) {
430; CHECK-LABEL: @nonzero_memset_after_calloc(
431; CHECK-NEXT:    [[CALL:%.*]] = tail call i8* @calloc(i64 10000, i64 4)
432; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* [[CALL]], i8 [[V:%.*]], i64 40000, i1 false)
433; CHECK-NEXT:    ret i8* [[CALL]]
434;
435  %call = tail call i8* @calloc(i64 10000, i64 4)
436  call void @llvm.memset.p0i8.i64(i8* %call, i8 %v, i64 40000, i1 false)
437  ret i8* %call
438}
439
440; PR11896
441; The first memset is dead, because calloc provides zero-filled memory.
442; TODO: This could be replaced with a call to malloc + memset_pattern16.
443define i8* @memset_pattern16_after_calloc(i8* %pat) {
444; CHECK-LABEL: @memset_pattern16_after_calloc(
445; CHECK-NEXT:    [[CALL:%.*]] = tail call i8* @calloc(i64 10000, i64 4)
446; CHECK-NEXT:    call void @memset_pattern16(i8* [[CALL]], i8* [[PAT:%.*]], i64 40000)
447; CHECK-NEXT:    ret i8* [[CALL]]
448;
449  %call = tail call i8* @calloc(i64 10000, i64 4) #1
450  call void @llvm.memset.p0i8.i64(i8* align 4 %call, i8 0, i64 40000, i1 false)
451  call void @memset_pattern16(i8* %call, i8* %pat, i64 40000) #1
452  ret i8* %call
453}
454