1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -basic-aa -dse -S | FileCheck %s
3; RUN: opt < %s -aa-pipeline=basic-aa -passes=dse -S | FileCheck %s
4target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
5
6declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind
7declare void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* nocapture, i8, i64, i32) nounwind
8declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
9declare void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind
10declare void @llvm.init.trampoline(i8*, i8*, i8*)
11
12define void @test1(i32* %Q, i32* %P) {
13; CHECK-LABEL: @test1(
14; CHECK-NEXT:    store i32 0, i32* [[P:%.*]], align 4
15; CHECK-NEXT:    ret void
16;
17  %DEAD = load i32, i32* %Q
18  store i32 %DEAD, i32* %P
19  store i32 0, i32* %P
20  ret void
21}
22
23; PR8677
24@g = global i32 1
25
26define i32 @test3(i32* %g_addr) nounwind {
27; CHECK-LABEL: @test3(
28; CHECK-NEXT:    [[G_VALUE:%.*]] = load i32, i32* [[G_ADDR:%.*]], align 4
29; CHECK-NEXT:    store i32 -1, i32* @g, align 4
30; CHECK-NEXT:    store i32 [[G_VALUE]], i32* [[G_ADDR]], align 4
31; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* @g, align 4
32; CHECK-NEXT:    ret i32 [[TMP3]]
33;
34  %g_value = load i32, i32* %g_addr, align 4
35  store i32 -1, i32* @g, align 4
36  store i32 %g_value, i32* %g_addr, align 4
37  %tmp3 = load i32, i32* @g, align 4
38  ret i32 %tmp3
39}
40
41
42define void @test4(i32* %Q) {
43; CHECK-LABEL: @test4(
44; CHECK-NEXT:    [[A:%.*]] = load i32, i32* [[Q:%.*]], align 4
45; CHECK-NEXT:    store volatile i32 [[A]], i32* [[Q]], align 4
46; CHECK-NEXT:    ret void
47;
48  %a = load i32, i32* %Q
49  store volatile i32 %a, i32* %Q
50  ret void
51}
52
53; PR8576 - Should delete store of 10 even though p/q are may aliases.
54define void @test2(i32 *%p, i32 *%q) {
55; CHECK-LABEL: @test2(
56; CHECK-NEXT:    store i32 20, i32* [[Q:%.*]], align 4
57; CHECK-NEXT:    store i32 30, i32* [[P:%.*]], align 4
58; CHECK-NEXT:    ret void
59;
60  store i32 10, i32* %p, align 4
61  store i32 20, i32* %q, align 4
62  store i32 30, i32* %p, align 4
63  ret void
64}
65
66; Should delete store of 10 even though memset is a may-store to P (P and Q may
67; alias).
68define void @test6(i32 *%p, i8 *%q) {
69; CHECK-LABEL: @test6(
70; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* [[Q:%.*]], i8 42, i64 900, i1 false)
71; CHECK-NEXT:    store i32 30, i32* [[P:%.*]], align 4
72; CHECK-NEXT:    ret void
73;
74  store i32 10, i32* %p, align 4       ;; dead.
75  call void @llvm.memset.p0i8.i64(i8* %q, i8 42, i64 900, i1 false)
76  store i32 30, i32* %p, align 4
77  ret void
78}
79
80; Should delete store of 10 even though memset is a may-store to P (P and Q may
81; alias).
82define void @test6_atomic(i32* align 4 %p, i8* align 4 %q) {
83; CHECK-LABEL: @test6_atomic(
84; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 [[Q:%.*]], i8 42, i64 900, i32 4)
85; CHECK-NEXT:    store atomic i32 30, i32* [[P:%.*]] unordered, align 4
86; CHECK-NEXT:    ret void
87;
88  store atomic i32 10, i32* %p unordered, align 4       ;; dead.
89  call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 %q, i8 42, i64 900, i32 4)
90  store atomic i32 30, i32* %p unordered, align 4
91  ret void
92}
93
94; Should delete store of 10 even though memcpy is a may-store to P (P and Q may
95; alias).
96define void @test7(i32 *%p, i8 *%q, i8* noalias %r) {
97; CHECK-LABEL: @test7(
98; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[Q:%.*]], i8* [[R:%.*]], i64 900, i1 false)
99; CHECK-NEXT:    store i32 30, i32* [[P:%.*]], align 4
100; CHECK-NEXT:    ret void
101;
102  store i32 10, i32* %p, align 4       ;; dead.
103  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %q, i8* %r, i64 900, i1 false)
104  store i32 30, i32* %p, align 4
105  ret void
106}
107
108; Should delete store of 10 even though memcpy is a may-store to P (P and Q may
109; alias).
110define void @test7_atomic(i32* align 4 %p, i8* align 4 %q, i8* noalias align 4 %r) {
111; CHECK-LABEL: @test7_atomic(
112; CHECK-NEXT:    call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 4 [[Q:%.*]], i8* align 4 [[R:%.*]], i64 900, i32 4)
113; CHECK-NEXT:    store atomic i32 30, i32* [[P:%.*]] unordered, align 4
114; CHECK-NEXT:    ret void
115;
116  store atomic i32 10, i32* %p unordered, align 4       ;; dead.
117  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 4 %q, i8* align 4 %r, i64 900, i32 4)
118  store atomic i32 30, i32* %p unordered, align 4
119  ret void
120}
121
122; Do not delete stores that are only partially killed.
123define i32 @test8() {
124; CHECK-LABEL: @test8(
125; CHECK-NEXT:    [[V:%.*]] = alloca i32, align 4
126; CHECK-NEXT:    store i32 1234567, i32* [[V]], align 4
127; CHECK-NEXT:    [[X:%.*]] = load i32, i32* [[V]], align 4
128; CHECK-NEXT:    ret i32 [[X]]
129;
130  %V = alloca i32
131  store i32 1234567, i32* %V
132  %V2 = bitcast i32* %V to i8*
133  store i8 0, i8* %V2
134  %X = load i32, i32* %V
135  ret i32 %X
136
137}
138
139; Test for byval handling.
140%struct.x = type { i32, i32, i32, i32 }
141define void @test9(%struct.x* byval(%struct.x)  %a) nounwind  {
142; CHECK-LABEL: @test9(
143; CHECK-NEXT:    ret void
144;
145  %tmp2 = getelementptr %struct.x, %struct.x* %a, i32 0, i32 0
146  store i32 1, i32* %tmp2, align 4
147  ret void
148}
149
150; Test for inalloca handling.
151define void @test9_2(%struct.x* inalloca(%struct.x) %a) nounwind {
152; CHECK-LABEL: @test9_2(
153; CHECK-NEXT:    ret void
154;
155  %tmp2 = getelementptr %struct.x, %struct.x* %a, i32 0, i32 0
156  store i32 1, i32* %tmp2, align 4
157  ret void
158}
159
160; Test for preallocated handling.
161define void @test9_3(%struct.x* preallocated(%struct.x)  %a) nounwind  {
162; CHECK-LABEL: @test9_3(
163; CHECK-NEXT:    ret void
164;
165  %tmp2 = getelementptr %struct.x, %struct.x* %a, i32 0, i32 0
166  store i32 1, i32* %tmp2, align 4
167  ret void
168}
169
170; va_arg has fuzzy dependence, the store shouldn't be zapped.
171define double @test10(i8* %X) {
172; CHECK-LABEL: @test10(
173; CHECK-NEXT:    [[X_ADDR:%.*]] = alloca i8*, align 8
174; CHECK-NEXT:    store i8* [[X:%.*]], i8** [[X_ADDR]], align 8
175; CHECK-NEXT:    [[TMP_0:%.*]] = va_arg i8** [[X_ADDR]], double
176; CHECK-NEXT:    ret double [[TMP_0]]
177;
178  %X_addr = alloca i8*
179  store i8* %X, i8** %X_addr
180  %tmp.0 = va_arg i8** %X_addr, double
181  ret double %tmp.0
182}
183
184; DSE should delete the dead trampoline.
185declare void @test11f()
186define void @test11() {
187; CHECK-LABEL: @test11(
188; CHECK-NEXT:    ret void
189;
190  %storage = alloca [10 x i8], align 16		; <[10 x i8]*> [#uses=1]
191  %cast = getelementptr [10 x i8], [10 x i8]* %storage, i32 0, i32 0		; <i8*> [#uses=1]
192  call void @llvm.init.trampoline( i8* %cast, i8* bitcast (void ()* @test11f to i8*), i8* null )		; <i8*> [#uses=1]
193  ret void
194}
195
196; %P doesn't escape, the DEAD instructions should be removed.
197declare void @test13f()
198define i32* @test13() {
199; CHECK-LABEL: @test13(
200; CHECK-NEXT:    [[PTR:%.*]] = tail call i8* @malloc(i32 4)
201; CHECK-NEXT:    [[P:%.*]] = bitcast i8* [[PTR]] to i32*
202; CHECK-NEXT:    call void @test13f()
203; CHECK-NEXT:    store i32 0, i32* [[P]], align 4
204; CHECK-NEXT:    ret i32* [[P]]
205;
206  %ptr = tail call i8* @malloc(i32 4)
207  %P = bitcast i8* %ptr to i32*
208  %DEAD = load i32, i32* %P
209  %DEAD2 = add i32 %DEAD, 1
210  store i32 %DEAD2, i32* %P
211  call void @test13f( )
212  store i32 0, i32* %P
213  ret i32* %P
214}
215
216define i32 addrspace(1)* @test13_addrspacecast() {
217; CHECK-LABEL: @test13_addrspacecast(
218; CHECK-NEXT:    [[P:%.*]] = tail call i8* @malloc(i32 4)
219; CHECK-NEXT:    [[P_BC:%.*]] = bitcast i8* [[P]] to i32*
220; CHECK-NEXT:    [[P:%.*]] = addrspacecast i32* [[P_BC]] to i32 addrspace(1)*
221; CHECK-NEXT:    call void @test13f()
222; CHECK-NEXT:    store i32 0, i32 addrspace(1)* [[P]], align 4
223; CHECK-NEXT:    ret i32 addrspace(1)* [[P]]
224;
225  %p = tail call i8* @malloc(i32 4)
226  %p.bc = bitcast i8* %p to i32*
227  %P = addrspacecast i32* %p.bc to i32 addrspace(1)*
228  %DEAD = load i32, i32 addrspace(1)* %P
229  %DEAD2 = add i32 %DEAD, 1
230  store i32 %DEAD2, i32 addrspace(1)* %P
231  call void @test13f( )
232  store i32 0, i32 addrspace(1)* %P
233  ret i32 addrspace(1)* %P
234}
235
236
237declare noalias i8* @malloc(i32) willreturn
238declare noalias i8* @calloc(i32, i32) willreturn
239
240define void @test14(i32* %Q) {
241; CHECK-LABEL: @test14(
242; CHECK-NEXT:    ret void
243;
244  %P = alloca i32
245  %DEAD = load i32, i32* %Q
246  store i32 %DEAD, i32* %P
247  ret void
248
249}
250
251; The store here is not dead because the byval call reads it.
252declare void @test19f({i32}* byval({i32}) align 4 %P)
253
254define void @test19({i32}* nocapture byval({i32}) align 4 %arg5) nounwind ssp {
255; CHECK-LABEL: @test19(
256; CHECK-NEXT:  bb:
257; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds { i32 }, { i32 }* [[ARG5:%.*]], i32 0, i32 0
258; CHECK-NEXT:    store i32 912, i32* [[TMP7]], align 4
259; CHECK-NEXT:    call void @test19f({ i32 }* byval({ i32 }) align 4 [[ARG5]])
260; CHECK-NEXT:    ret void
261;
262bb:
263  %tmp7 = getelementptr inbounds {i32}, {i32}* %arg5, i32 0, i32 0
264  store i32 912, i32* %tmp7
265  call void @test19f({i32}* byval({i32}) align 4 %arg5)
266  ret void
267
268}
269
270define void @test20() {
271; CHECK-LABEL: @test20(
272; CHECK-NEXT:    ret void
273;
274  %m = call i8* @malloc(i32 24)
275  store i8 0, i8* %m
276  ret void
277}
278
279define void @test21() {
280; CHECK-LABEL: @test21(
281; CHECK-NEXT:    ret void
282;
283  %m = call i8* @calloc(i32 9, i32 7)
284  store i8 0, i8* %m
285  ret void
286}
287
288; Currently elimination of stores at the end of a function is limited to a
289; single underlying object, for compile-time. This case appears to not be
290; very important in practice.
291define void @test22(i1 %i, i32 %k, i32 %m) nounwind {
292; CHECK-LABEL: @test22(
293; CHECK-NEXT:    [[K_ADDR:%.*]] = alloca i32, align 4
294; CHECK-NEXT:    [[M_ADDR:%.*]] = alloca i32, align 4
295; CHECK-NEXT:    [[K_ADDR_M_ADDR:%.*]] = select i1 [[I:%.*]], i32* [[K_ADDR]], i32* [[M_ADDR]]
296; CHECK-NEXT:    store i32 0, i32* [[K_ADDR_M_ADDR]], align 4
297; CHECK-NEXT:    ret void
298;
299  %k.addr = alloca i32
300  %m.addr = alloca i32
301  %k.addr.m.addr = select i1 %i, i32* %k.addr, i32* %m.addr
302  store i32 0, i32* %k.addr.m.addr, align 4
303  ret void
304}
305
306; PR13547
307declare noalias i8* @strdup(i8* nocapture) nounwind
308define noalias i8* @test23() nounwind uwtable ssp {
309; CHECK-LABEL: @test23(
310; CHECK-NEXT:    [[X:%.*]] = alloca [2 x i8], align 1
311; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i8], [2 x i8]* [[X]], i64 0, i64 0
312; CHECK-NEXT:    store i8 97, i8* [[ARRAYIDX]], align 1
313; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x i8], [2 x i8]* [[X]], i64 0, i64 1
314; CHECK-NEXT:    store i8 0, i8* [[ARRAYIDX1]], align 1
315; CHECK-NEXT:    [[CALL:%.*]] = call i8* @strdup(i8* [[ARRAYIDX]]) [[ATTR3:#.*]]
316; CHECK-NEXT:    ret i8* [[CALL]]
317;
318  %x = alloca [2 x i8], align 1
319  %arrayidx = getelementptr inbounds [2 x i8], [2 x i8]* %x, i64 0, i64 0
320  store i8 97, i8* %arrayidx, align 1
321  %arrayidx1 = getelementptr inbounds [2 x i8], [2 x i8]* %x, i64 0, i64 1
322  store i8 0, i8* %arrayidx1, align 1
323  %call = call i8* @strdup(i8* %arrayidx) nounwind
324  ret i8* %call
325}
326
327; Make sure same sized store to later element is deleted
328define void @test24([2 x i32]* %a, i32 %b, i32 %c) nounwind {
329; CHECK-LABEL: @test24(
330; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[A:%.*]], i64 0, i64 0
331; CHECK-NEXT:    store i32 [[B:%.*]], i32* [[TMP1]], align 4
332; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[A]], i64 0, i64 1
333; CHECK-NEXT:    store i32 [[C:%.*]], i32* [[TMP2]], align 4
334; CHECK-NEXT:    ret void
335;
336  %1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 0, i64 0
337  store i32 0, i32* %1, align 4
338  %2 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 0, i64 1
339  store i32 0, i32* %2, align 4
340  %3 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 0, i64 0
341  store i32 %b, i32* %3, align 4
342  %4 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 0, i64 1
343  store i32 %c, i32* %4, align 4
344  ret void
345}
346
347; Check another case like PR13547 where strdup is not like malloc.
348define i8* @test25(i8* %p) nounwind {
349; CHECK-LABEL: @test25(
350; CHECK-NEXT:    [[P_4:%.*]] = getelementptr i8, i8* [[P:%.*]], i64 4
351; CHECK-NEXT:    [[TMP:%.*]] = load i8, i8* [[P_4]], align 1
352; CHECK-NEXT:    store i8 0, i8* [[P_4]], align 1
353; CHECK-NEXT:    [[Q:%.*]] = call i8* @strdup(i8* [[P]]) [[ATTR6:#.*]]
354; CHECK-NEXT:    store i8 [[TMP]], i8* [[P_4]], align 1
355; CHECK-NEXT:    ret i8* [[Q]]
356;
357  %p.4 = getelementptr i8, i8* %p, i64 4
358  %tmp = load i8, i8* %p.4, align 1
359  store i8 0, i8* %p.4, align 1
360  %q = call i8* @strdup(i8* %p) nounwind optsize
361  store i8 %tmp, i8* %p.4, align 1
362  ret i8* %q
363}
364
365; Don't remove redundant store because of may-aliased store.
366define i32 @test28(i1 %c, i32* %p, i32* %p2, i32 %i) {
367; CHECK-LABEL: @test28(
368; CHECK-NEXT:  entry:
369; CHECK-NEXT:    [[V:%.*]] = load i32, i32* [[P:%.*]], align 4
370; CHECK-NEXT:    store i32 [[I:%.*]], i32* [[P2:%.*]], align 4
371; CHECK-NEXT:    br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
372; CHECK:       bb1:
373; CHECK-NEXT:    br label [[BB3:%.*]]
374; CHECK:       bb2:
375; CHECK-NEXT:    br label [[BB3]]
376; CHECK:       bb3:
377; CHECK-NEXT:    store i32 [[V]], i32* [[P]], align 4
378; CHECK-NEXT:    ret i32 0
379;
380entry:
381  %v = load i32, i32* %p, align 4
382
383  ; Might overwrite value at %p
384  store i32 %i, i32* %p2, align 4
385  br i1 %c, label %bb1, label %bb2
386bb1:
387  br label %bb3
388bb2:
389  br label %bb3
390bb3:
391  store i32 %v, i32* %p, align 4
392  ret i32 0
393}
394
395; Don't remove redundant store because of may-aliased store.
396define i32 @test29(i1 %c, i32* %p, i32* %p2, i32 %i) {
397; CHECK-LABEL: @test29(
398; CHECK-NEXT:  entry:
399; CHECK-NEXT:    [[V:%.*]] = load i32, i32* [[P:%.*]], align 4
400; CHECK-NEXT:    br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
401; CHECK:       bb1:
402; CHECK-NEXT:    br label [[BB3:%.*]]
403; CHECK:       bb2:
404; CHECK-NEXT:    store i32 [[I:%.*]], i32* [[P2:%.*]], align 4
405; CHECK-NEXT:    br label [[BB3]]
406; CHECK:       bb3:
407; CHECK-NEXT:    store i32 [[V]], i32* [[P]], align 4
408; CHECK-NEXT:    ret i32 0
409;
410entry:
411  %v = load i32, i32* %p, align 4
412  br i1 %c, label %bb1, label %bb2
413bb1:
414  br label %bb3
415bb2:
416  ; Might overwrite value at %p
417  store i32 %i, i32* %p2, align 4
418  br label %bb3
419bb3:
420  store i32 %v, i32* %p, align 4
421  ret i32 0
422}
423
424declare void @unknown_func()
425
426; Don't remove redundant store because of unknown call.
427define i32 @test30(i1 %c, i32* %p, i32 %i) {
428; CHECK-LABEL: @test30(
429; CHECK-NEXT:  entry:
430; CHECK-NEXT:    [[V:%.*]] = load i32, i32* [[P:%.*]], align 4
431; CHECK-NEXT:    br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
432; CHECK:       bb1:
433; CHECK-NEXT:    br label [[BB3:%.*]]
434; CHECK:       bb2:
435; CHECK-NEXT:    call void @unknown_func()
436; CHECK-NEXT:    br label [[BB3]]
437; CHECK:       bb3:
438; CHECK-NEXT:    store i32 [[V]], i32* [[P]], align 4
439; CHECK-NEXT:    ret i32 0
440;
441entry:
442  %v = load i32, i32* %p, align 4
443  br i1 %c, label %bb1, label %bb2
444bb1:
445  br label %bb3
446bb2:
447  ; Might overwrite value at %p
448  call void @unknown_func()
449  br label %bb3
450bb3:
451  store i32 %v, i32* %p, align 4
452  ret i32 0
453}
454
455; Don't remove redundant store in a loop with a may-alias store.
456define i32 @test32(i1 %c, i32* %p, i32 %i) {
457; CHECK-LABEL: @test32(
458; CHECK-NEXT:  entry:
459; CHECK-NEXT:    [[V:%.*]] = load i32, i32* [[P:%.*]], align 4
460; CHECK-NEXT:    br label [[BB1:%.*]]
461; CHECK:       bb1:
462; CHECK-NEXT:    store i32 [[V]], i32* [[P]], align 4
463; CHECK-NEXT:    call void @unknown_func()
464; CHECK-NEXT:    br i1 undef, label [[BB1]], label [[BB2:%.*]]
465; CHECK:       bb2:
466; CHECK-NEXT:    ret i32 0
467;
468entry:
469  %v = load i32, i32* %p, align 4
470  br label %bb1
471bb1:
472  store i32 %v, i32* %p, align 4
473  ; Might read and overwrite value at %p
474  call void @unknown_func()
475  br i1 undef, label %bb1, label %bb2
476bb2:
477  ret i32 0
478}
479
480; We cannot remove any stores, because @unknown_func may unwind and the caller
481; may read %p while unwinding.
482define void @test34(i32* noalias %p) {
483; CHECK-LABEL: @test34(
484; CHECK-NEXT:    store i32 1, i32* [[P:%.*]], align 4
485; CHECK-NEXT:    call void @unknown_func()
486; CHECK-NEXT:    store i32 0, i32* [[P]], align 4
487; CHECK-NEXT:    ret void
488;
489  store i32 1, i32* %p
490  call void @unknown_func()
491  store i32 0, i32* %p
492  ret void
493}
494
495; Remove redundant store even with an unwinding function in the same block
496define void @test35(i32* noalias %p) {
497; CHECK-LABEL: @test35(
498; CHECK-NEXT:    call void @unknown_func()
499; CHECK-NEXT:    store i32 0, i32* [[P:%.*]], align 4
500; CHECK-NEXT:    ret void
501;
502  call void @unknown_func()
503  store i32 1, i32* %p
504  store i32 0, i32* %p
505  ret void
506}
507
508; We cannot optimize away the first memmove since %P could overlap with %Q.
509define void @test36(i8* %P, i8* %Q) {
510; CHECK-LABEL: @test36(
511; CHECK-NEXT:    tail call void @llvm.memmove.p0i8.p0i8.i64(i8* [[P:%.*]], i8* [[Q:%.*]], i64 12, i1 false)
512; CHECK-NEXT:    tail call void @llvm.memmove.p0i8.p0i8.i64(i8* [[P]], i8* [[Q]], i64 12, i1 false)
513; CHECK-NEXT:    ret void
514;
515
516  tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
517  tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
518  ret void
519}
520
521define void @test36_atomic(i8* %P, i8* %Q) {
522; CHECK-LABEL: @test36_atomic(
523; CHECK-NEXT:    tail call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1)
524; CHECK-NEXT:    tail call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[Q]], i64 12, i32 1)
525; CHECK-NEXT:    ret void
526;
527
528  tail call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1)
529  tail call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1)
530  ret void
531}
532
533define void @test37(i8* %P, i8* %Q, i8* %R) {
534; CHECK-LABEL: @test37(
535; CHECK-NEXT:    tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P:%.*]], i8* [[Q:%.*]], i64 12, i1 false)
536; CHECK-NEXT:    tail call void @llvm.memmove.p0i8.p0i8.i64(i8* [[P]], i8* [[R:%.*]], i64 12, i1 false)
537; CHECK-NEXT:    ret void
538;
539
540  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
541  tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false)
542  ret void
543}
544
545define void @test37_atomic(i8* %P, i8* %Q, i8* %R) {
546; CHECK-LABEL: @test37_atomic(
547; CHECK-NEXT:    tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1)
548; CHECK-NEXT:    tail call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[R:%.*]], i64 12, i32 1)
549; CHECK-NEXT:    ret void
550;
551
552  tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1)
553  tail call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %R, i64 12, i32 1)
554  ret void
555}
556
557; See PR11763 - LLVM allows memcpy's source and destination to be equal (but not
558; inequal and overlapping).
559define void @test38(i8* %P, i8* %Q, i8* %R) {
560; CHECK-LABEL: @test38(
561; CHECK-NEXT:    tail call void @llvm.memmove.p0i8.p0i8.i64(i8* [[P:%.*]], i8* [[Q:%.*]], i64 12, i1 false)
562; CHECK-NEXT:    tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P]], i8* [[R:%.*]], i64 12, i1 false)
563; CHECK-NEXT:    ret void
564;
565
566  tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
567  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false)
568  ret void
569}
570
571; See PR11763 - LLVM allows memcpy's source and destination to be equal (but not
572; inequal and overlapping).
573define void @test38_atomic(i8* %P, i8* %Q, i8* %R) {
574; CHECK-LABEL: @test38_atomic(
575; CHECK-NEXT:    tail call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1)
576; CHECK-NEXT:    tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[R:%.*]], i64 12, i32 1)
577; CHECK-NEXT:    ret void
578;
579
580  tail call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1)
581  tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %R, i64 12, i32 1)
582  ret void
583}
584
585define void @test39(i8* %P, i8* %Q, i8* %R) {
586; CHECK-LABEL: @test39(
587; CHECK-NEXT:    tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P:%.*]], i8* [[Q:%.*]], i64 12, i1 false)
588; CHECK-NEXT:    tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P]], i8* [[R:%.*]], i64 8, i1 false)
589; CHECK-NEXT:    ret void
590;
591
592  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
593  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 8, i1 false)
594  ret void
595}
596
597define void @test39_atomic(i8* %P, i8* %Q, i8* %R) {
598; CHECK-LABEL: @test39_atomic(
599; CHECK-NEXT:    tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1)
600; CHECK-NEXT:    tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[R:%.*]], i64 8, i32 1)
601; CHECK-NEXT:    ret void
602;
603
604  tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1)
605  tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %R, i64 8, i32 1)
606  ret void
607}
608
609declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1)
610declare void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32)
611
612declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) nounwind
613declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) nounwind
614define void @test40(i32** noalias %Pp, i32* noalias %Q)  {
615; CHECK-LABEL: @test40(
616; CHECK-NEXT:  entry:
617; CHECK-NEXT:    [[A:%.*]] = alloca i32, align 4
618; CHECK-NEXT:    [[AC:%.*]] = bitcast i32* [[A]] to i8*
619; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull [[AC]])
620; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32** [[PP:%.*]] to i8**
621; CHECK-NEXT:    [[PC:%.*]] = load i8*, i8** [[TMP0]], align 8
622; CHECK-NEXT:    [[QC:%.*]] = bitcast i32* [[Q:%.*]] to i8*
623; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 [[AC]], i8* align 4 [[QC]], i64 4, i1 false)
624; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[PC]], i8* nonnull align 4 [[AC]], i64 4, i1 true)
625; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull [[AC]])
626; CHECK-NEXT:    ret void
627;
628entry:
629  %A = alloca i32, align 4
630  %Ac = bitcast i32* %A to i8*
631  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %Ac)
632  %0 = bitcast i32** %Pp to i8**
633  %Pc = load i8*, i8** %0, align 8
634  %Qc = bitcast i32* %Q to i8*
635  call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 %Ac, i8* align 4 %Qc, i64 4, i1 false)
636  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %Pc, i8* nonnull align 4 %Ac, i64 4, i1 true)
637  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %Ac)
638  ret void
639}
640
641declare void @free(i8* nocapture)
642
643; We cannot remove `store i32 1, i32* %p`, because @unknown_func may unwind
644; and the caller may read %p while unwinding.
645define void @test41(i32* noalias %P) {
646; CHECK-LABEL: @test41(
647; CHECK-NEXT:    [[P2:%.*]] = bitcast i32* [[P:%.*]] to i8*
648; CHECK-NEXT:    store i32 1, i32* [[P]], align 4
649; CHECK-NEXT:    call void @unknown_func()
650; CHECK-NEXT:    call void @free(i8* [[P2]])
651; CHECK-NEXT:    ret void
652;
653  %P2 = bitcast i32* %P to i8*
654  store i32 1, i32* %P
655  call void @unknown_func()
656  store i32 2, i32* %P
657  call void @free(i8* %P2)
658  ret void
659}
660
661define void @test42(i32* %P, i32* %Q) {
662; CHECK-LABEL: @test42(
663; CHECK-NEXT:    store i32 1, i32* [[P:%.*]], align 4
664; CHECK-NEXT:    [[P2:%.*]] = bitcast i32* [[P]] to i8*
665; CHECK-NEXT:    store i32 2, i32* [[Q:%.*]], align 4
666; CHECK-NEXT:    store i8 3, i8* [[P2]], align 1
667; CHECK-NEXT:    ret void
668;
669  store i32 1, i32* %P
670  %P2 = bitcast i32* %P to i8*
671  store i32 2, i32* %Q
672  store i8 3, i8* %P2
673  ret void
674}
675
676define void @test42a(i32* %P, i32* %Q) {
677; CHECK-LABEL: @test42a(
678; CHECK-NEXT:    store atomic i32 1, i32* [[P:%.*]] unordered, align 4
679; CHECK-NEXT:    [[P2:%.*]] = bitcast i32* [[P]] to i8*
680; CHECK-NEXT:    store atomic i32 2, i32* [[Q:%.*]] unordered, align 4
681; CHECK-NEXT:    store atomic i8 3, i8* [[P2]] unordered, align 4
682; CHECK-NEXT:    ret void
683;
684  store atomic i32 1, i32* %P unordered, align 4
685  %P2 = bitcast i32* %P to i8*
686  store atomic i32 2, i32* %Q unordered, align 4
687  store atomic i8 3, i8* %P2 unordered, align 4
688  ret void
689}
690
691define void @test43a(i32* %P, i32* noalias %Q) {
692; CHECK-LABEL: @test43a(
693; CHECK-NEXT:  entry:
694; CHECK-NEXT:    store atomic i32 50331649, i32* [[P:%.*]] unordered, align 4
695; CHECK-NEXT:    store atomic i32 2, i32* [[Q:%.*]] unordered, align 4
696; CHECK-NEXT:    ret void
697;
698entry:
699  store atomic i32 1, i32* %P unordered, align 4
700  %P2 = bitcast i32* %P to i8*
701  store atomic i32 2, i32* %Q unordered, align 4
702  store atomic i8 3, i8* %P2 unordered, align 4
703  ret void
704}
705
706; Some tests where volatile may block removing a store.
707
708; Here we can remove the first non-volatile store. We cannot remove the
709; volatile store.
710define void @test44_volatile(i32* %P) {
711; CHECK-LABEL: @test44_volatile(
712; CHECK-NEXT:    store volatile i32 2, i32* [[P:%.*]], align 4
713; CHECK-NEXT:    store i32 3, i32* [[P]], align 4
714; CHECK-NEXT:    ret void
715;
716  store i32 1, i32* %P, align 4
717  store volatile i32 2, i32* %P, align 4
718  store i32 3, i32* %P, align 4
719  ret void
720}
721
722define void @test45_volatile(i32* %P) {
723; CHECK-LABEL: @test45_volatile(
724; CHECK-NEXT:    store volatile i32 2, i32* [[P:%.*]], align 4
725; CHECK-NEXT:    store volatile i32 3, i32* [[P]], align 4
726; CHECK-NEXT:    ret void
727;
728  store i32 1, i32* %P, align 4
729  store volatile i32 2, i32* %P, align 4
730  store volatile i32 3, i32* %P, align 4
731  ret void
732}
733
734define void @test46_volatile(i32* %P) {
735; CHECK-LABEL: @test46_volatile(
736; CHECK-NEXT:    store volatile i32 2, i32* [[P:%.*]], align 4
737; CHECK-NEXT:    store volatile i32 3, i32* [[P]], align 4
738; CHECK-NEXT:    ret void
739;
740  store volatile i32 2, i32* %P, align 4
741  store i32 1, i32* %P, align 4
742  store volatile i32 3, i32* %P, align 4
743  ret void
744}
745
746define void @test47_volatile(i32* %P) {
747; CHECK-LABEL: @test47_volatile(
748; CHECK-NEXT:    store volatile i32 2, i32* [[P:%.*]], align 4
749; CHECK-NEXT:    store volatile i32 3, i32* [[P]], align 4
750; CHECK-NEXT:    ret void
751;
752  store volatile i32 2, i32* %P, align 4
753  store volatile i32 3, i32* %P, align 4
754  ret void
755}
756
757define i32 @test48(i32* %P, i32* noalias %Q, i32* %R) {
758; CHECK-LABEL: @test48(
759; CHECK-NEXT:    store i32 2, i32* [[P:%.*]], align 4
760; CHECK-NEXT:    store i32 3, i32* [[Q:%.*]], align 4
761; CHECK-NEXT:    [[L:%.*]] = load i32, i32* [[R:%.*]], align 4
762; CHECK-NEXT:    ret i32 [[L]]
763;
764  store i32 1, i32* %Q
765  store i32 2, i32* %P
766  store i32 3, i32* %Q
767  %l = load i32, i32* %R
768  ret i32 %l
769}
770