1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -memcpyopt -S -enable-memcpyopt-memoryssa=0 | FileCheck %s --check-prefix=NO_MSSA
3; RUN: opt < %s -memcpyopt -S -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s --check-prefix=MSSA
4
5; Test whether memcpy-memcpy dependence is optimized across
6; basic blocks (conditional branches and invokes).
7; TODO: This is not supported yet.
8
9%struct.s = type { i32, i32 }
10
11@s_foo = private unnamed_addr constant %struct.s { i32 1, i32 2 }, align 4
12@s_baz = private unnamed_addr constant %struct.s { i32 1, i32 2 }, align 4
13@i = external constant i8*
14
15declare void @qux()
16declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1)
17declare void @__cxa_throw(i8*, i8*, i8*)
18declare i32 @__gxx_personality_v0(...)
19declare i8* @__cxa_begin_catch(i8*)
20
21; A simple partial redundancy. Test that the second memcpy is optimized
22; to copy directly from the original source rather than from the temporary.
23
24define void @wobble(i8* noalias %dst, i8* %src, i1 %some_condition) {
25; NO_MSSA-LABEL: @wobble(
26; NO_MSSA-NEXT:  bb:
27; NO_MSSA-NEXT:    [[TEMP:%.*]] = alloca i8, i32 64, align 1
28; NO_MSSA-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TEMP]], i8* nonnull align 8 [[SRC:%.*]], i64 64, i1 false)
29; NO_MSSA-NEXT:    br i1 [[SOME_CONDITION:%.*]], label [[MORE:%.*]], label [[OUT:%.*]]
30; NO_MSSA:       out:
31; NO_MSSA-NEXT:    call void @qux()
32; NO_MSSA-NEXT:    unreachable
33; NO_MSSA:       more:
34; NO_MSSA-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[DST:%.*]], i8* align 8 [[TEMP]], i64 64, i1 false)
35; NO_MSSA-NEXT:    ret void
36;
37; MSSA-LABEL: @wobble(
38; MSSA-NEXT:  bb:
39; MSSA-NEXT:    [[TEMP:%.*]] = alloca i8, i32 64, align 1
40; MSSA-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TEMP]], i8* nonnull align 8 [[SRC:%.*]], i64 64, i1 false)
41; MSSA-NEXT:    br i1 [[SOME_CONDITION:%.*]], label [[MORE:%.*]], label [[OUT:%.*]]
42; MSSA:       out:
43; MSSA-NEXT:    call void @qux()
44; MSSA-NEXT:    unreachable
45; MSSA:       more:
46; MSSA-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[DST:%.*]], i8* align 8 [[SRC]], i64 64, i1 false)
47; MSSA-NEXT:    ret void
48;
49bb:
50  %temp = alloca i8, i32 64
51  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %temp, i8* nonnull align 8%src, i64 64, i1 false)
52  br i1 %some_condition, label %more, label %out
53
54out:
55  call void @qux()
56  unreachable
57
58more:
59  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %dst, i8* align 8 %temp, i64 64, i1 false)
60  ret void
61}
62
63; A CFG triangle with a partial redundancy targeting an alloca. Test that the
64; memcpy inside the triangle is optimized to copy directly from the original
65; source rather than from the temporary.
66
67define i32 @foo(i1 %t3) {
68; NO_MSSA-LABEL: @foo(
69; NO_MSSA-NEXT:  bb:
70; NO_MSSA-NEXT:    [[S:%.*]] = alloca [[STRUCT_S:%.*]], align 4
71; NO_MSSA-NEXT:    [[T:%.*]] = alloca [[STRUCT_S]], align 4
72; NO_MSSA-NEXT:    [[S1:%.*]] = bitcast %struct.s* [[S]] to i8*
73; NO_MSSA-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[S1]], i8* align 4 bitcast (%struct.s* @s_foo to i8*), i64 8, i1 false)
74; NO_MSSA-NEXT:    br i1 [[T3:%.*]], label [[BB4:%.*]], label [[BB7:%.*]]
75; NO_MSSA:       bb4:
76; NO_MSSA-NEXT:    [[T5:%.*]] = bitcast %struct.s* [[T]] to i8*
77; NO_MSSA-NEXT:    [[S6:%.*]] = bitcast %struct.s* [[S]] to i8*
78; NO_MSSA-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[T5]], i8* align 4 [[S6]], i64 8, i1 false)
79; NO_MSSA-NEXT:    br label [[BB7]]
80; NO_MSSA:       bb7:
81; NO_MSSA-NEXT:    [[T8:%.*]] = getelementptr [[STRUCT_S]], %struct.s* [[T]], i32 0, i32 0
82; NO_MSSA-NEXT:    [[T9:%.*]] = load i32, i32* [[T8]], align 4
83; NO_MSSA-NEXT:    [[T10:%.*]] = getelementptr [[STRUCT_S]], %struct.s* [[T]], i32 0, i32 1
84; NO_MSSA-NEXT:    [[T11:%.*]] = load i32, i32* [[T10]], align 4
85; NO_MSSA-NEXT:    [[T12:%.*]] = add i32 [[T9]], [[T11]]
86; NO_MSSA-NEXT:    ret i32 [[T12]]
87;
88; MSSA-LABEL: @foo(
89; MSSA-NEXT:  bb:
90; MSSA-NEXT:    [[S:%.*]] = alloca [[STRUCT_S:%.*]], align 4
91; MSSA-NEXT:    [[T:%.*]] = alloca [[STRUCT_S]], align 4
92; MSSA-NEXT:    [[S1:%.*]] = bitcast %struct.s* [[S]] to i8*
93; MSSA-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[S1]], i8* align 4 bitcast (%struct.s* @s_foo to i8*), i64 8, i1 false)
94; MSSA-NEXT:    br i1 [[T3:%.*]], label [[BB4:%.*]], label [[BB7:%.*]]
95; MSSA:       bb4:
96; MSSA-NEXT:    [[T5:%.*]] = bitcast %struct.s* [[T]] to i8*
97; MSSA-NEXT:    [[S6:%.*]] = bitcast %struct.s* [[S]] to i8*
98; MSSA-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[T5]], i8* align 4 bitcast (%struct.s* @s_foo to i8*), i64 8, i1 false)
99; MSSA-NEXT:    br label [[BB7]]
100; MSSA:       bb7:
101; MSSA-NEXT:    [[T8:%.*]] = getelementptr [[STRUCT_S]], %struct.s* [[T]], i32 0, i32 0
102; MSSA-NEXT:    [[T9:%.*]] = load i32, i32* [[T8]], align 4
103; MSSA-NEXT:    [[T10:%.*]] = getelementptr [[STRUCT_S]], %struct.s* [[T]], i32 0, i32 1
104; MSSA-NEXT:    [[T11:%.*]] = load i32, i32* [[T10]], align 4
105; MSSA-NEXT:    [[T12:%.*]] = add i32 [[T9]], [[T11]]
106; MSSA-NEXT:    ret i32 [[T12]]
107;
108bb:
109  %s = alloca %struct.s, align 4
110  %t = alloca %struct.s, align 4
111  %s1 = bitcast %struct.s* %s to i8*
112  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %s1, i8* align 4 bitcast (%struct.s* @s_foo to i8*), i64 8, i1 false)
113  br i1 %t3, label %bb4, label %bb7
114
115bb4:                                              ; preds = %bb
116  %t5 = bitcast %struct.s* %t to i8*
117  %s6 = bitcast %struct.s* %s to i8*
118  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %t5, i8* align 4 %s6, i64 8, i1 false)
119  br label %bb7
120
121bb7:                                              ; preds = %bb4, %bb
122  %t8 = getelementptr %struct.s, %struct.s* %t, i32 0, i32 0
123  %t9 = load i32, i32* %t8, align 4
124  %t10 = getelementptr %struct.s, %struct.s* %t, i32 0, i32 1
125  %t11 = load i32, i32* %t10, align 4
126  %t12 = add i32 %t9, %t11
127  ret i32 %t12
128}
129
130; A CFG diamond with an invoke on one side, and a partially redundant memcpy
131; into an alloca on the other. Test that the memcpy inside the diamond is
132; optimized to copy ; directly from the original source rather than from the
133; temporary. This more complex test represents a relatively common usage
134; pattern.
135
136define i32 @baz(i1 %t5) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
137; NO_MSSA-LABEL: @baz(
138; NO_MSSA-NEXT:  bb:
139; NO_MSSA-NEXT:    [[S:%.*]] = alloca [[STRUCT_S:%.*]], align 4
140; NO_MSSA-NEXT:    [[T:%.*]] = alloca [[STRUCT_S]], align 4
141; NO_MSSA-NEXT:    [[S3:%.*]] = bitcast %struct.s* [[S]] to i8*
142; NO_MSSA-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[S3]], i8* align 4 bitcast (%struct.s* @s_baz to i8*), i64 8, i1 false)
143; NO_MSSA-NEXT:    br i1 [[T5:%.*]], label [[BB6:%.*]], label [[BB22:%.*]]
144; NO_MSSA:       bb6:
145; NO_MSSA-NEXT:    invoke void @__cxa_throw(i8* null, i8* bitcast (i8** @i to i8*), i8* null)
146; NO_MSSA-NEXT:    to label [[BB25:%.*]] unwind label [[BB9:%.*]]
147; NO_MSSA:       bb9:
148; NO_MSSA-NEXT:    [[T10:%.*]] = landingpad { i8*, i32 }
149; NO_MSSA-NEXT:    catch i8* null
150; NO_MSSA-NEXT:    br label [[BB13:%.*]]
151; NO_MSSA:       bb13:
152; NO_MSSA-NEXT:    [[T15:%.*]] = call i8* @__cxa_begin_catch(i8* null)
153; NO_MSSA-NEXT:    br label [[BB23:%.*]]
154; NO_MSSA:       bb22:
155; NO_MSSA-NEXT:    [[T23:%.*]] = bitcast %struct.s* [[T]] to i8*
156; NO_MSSA-NEXT:    [[S24:%.*]] = bitcast %struct.s* [[S]] to i8*
157; NO_MSSA-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[T23]], i8* align 4 [[S24]], i64 8, i1 false)
158; NO_MSSA-NEXT:    br label [[BB23]]
159; NO_MSSA:       bb23:
160; NO_MSSA-NEXT:    [[T17:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.s* [[T]], i32 0, i32 0
161; NO_MSSA-NEXT:    [[T18:%.*]] = load i32, i32* [[T17]], align 4
162; NO_MSSA-NEXT:    [[T19:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.s* [[T]], i32 0, i32 1
163; NO_MSSA-NEXT:    [[T20:%.*]] = load i32, i32* [[T19]], align 4
164; NO_MSSA-NEXT:    [[T21:%.*]] = add nsw i32 [[T18]], [[T20]]
165; NO_MSSA-NEXT:    ret i32 [[T21]]
166; NO_MSSA:       bb25:
167; NO_MSSA-NEXT:    unreachable
168;
169; MSSA-LABEL: @baz(
170; MSSA-NEXT:  bb:
171; MSSA-NEXT:    [[S:%.*]] = alloca [[STRUCT_S:%.*]], align 4
172; MSSA-NEXT:    [[T:%.*]] = alloca [[STRUCT_S]], align 4
173; MSSA-NEXT:    [[S3:%.*]] = bitcast %struct.s* [[S]] to i8*
174; MSSA-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[S3]], i8* align 4 bitcast (%struct.s* @s_baz to i8*), i64 8, i1 false)
175; MSSA-NEXT:    br i1 [[T5:%.*]], label [[BB6:%.*]], label [[BB22:%.*]]
176; MSSA:       bb6:
177; MSSA-NEXT:    invoke void @__cxa_throw(i8* null, i8* bitcast (i8** @i to i8*), i8* null)
178; MSSA-NEXT:    to label [[BB25:%.*]] unwind label [[BB9:%.*]]
179; MSSA:       bb9:
180; MSSA-NEXT:    [[T10:%.*]] = landingpad { i8*, i32 }
181; MSSA-NEXT:    catch i8* null
182; MSSA-NEXT:    br label [[BB13:%.*]]
183; MSSA:       bb13:
184; MSSA-NEXT:    [[T15:%.*]] = call i8* @__cxa_begin_catch(i8* null)
185; MSSA-NEXT:    br label [[BB23:%.*]]
186; MSSA:       bb22:
187; MSSA-NEXT:    [[T23:%.*]] = bitcast %struct.s* [[T]] to i8*
188; MSSA-NEXT:    [[S24:%.*]] = bitcast %struct.s* [[S]] to i8*
189; MSSA-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[T23]], i8* align 4 bitcast (%struct.s* @s_baz to i8*), i64 8, i1 false)
190; MSSA-NEXT:    br label [[BB23]]
191; MSSA:       bb23:
192; MSSA-NEXT:    [[T17:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.s* [[T]], i32 0, i32 0
193; MSSA-NEXT:    [[T18:%.*]] = load i32, i32* [[T17]], align 4
194; MSSA-NEXT:    [[T19:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.s* [[T]], i32 0, i32 1
195; MSSA-NEXT:    [[T20:%.*]] = load i32, i32* [[T19]], align 4
196; MSSA-NEXT:    [[T21:%.*]] = add nsw i32 [[T18]], [[T20]]
197; MSSA-NEXT:    ret i32 [[T21]]
198; MSSA:       bb25:
199; MSSA-NEXT:    unreachable
200;
201bb:
202  %s = alloca %struct.s, align 4
203  %t = alloca %struct.s, align 4
204  %s3 = bitcast %struct.s* %s to i8*
205  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %s3, i8* align 4 bitcast (%struct.s* @s_baz to i8*), i64 8, i1 false)
206  br i1 %t5, label %bb6, label %bb22
207
208bb6:                                              ; preds = %bb
209  invoke void @__cxa_throw(i8* null, i8* bitcast (i8** @i to i8*), i8* null)
210  to label %bb25 unwind label %bb9
211
212bb9:                                              ; preds = %bb6
213  %t10 = landingpad { i8*, i32 }
214  catch i8* null
215  br label %bb13
216
217bb13:                                             ; preds = %bb9
218  %t15 = call i8* @__cxa_begin_catch(i8* null)
219  br label %bb23
220
221bb22:                                             ; preds = %bb
222  %t23 = bitcast %struct.s* %t to i8*
223  %s24 = bitcast %struct.s* %s to i8*
224  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %t23, i8* align 4 %s24, i64 8, i1 false)
225  br label %bb23
226
227bb23:                                             ; preds = %bb22, %bb13
228  %t17 = getelementptr inbounds %struct.s, %struct.s* %t, i32 0, i32 0
229  %t18 = load i32, i32* %t17, align 4
230  %t19 = getelementptr inbounds %struct.s, %struct.s* %t, i32 0, i32 1
231  %t20 = load i32, i32* %t19, align 4
232  %t21 = add nsw i32 %t18, %t20
233  ret i32 %t21
234
235bb25:                                             ; preds = %bb6
236  unreachable
237}
238