1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -S -dse -enable-dse-partial-store-merging=false < %s | FileCheck --check-prefixes=CHECK %s
3target datalayout = "E-m:e-i64:64-n32:64"
4target triple = "powerpc64le-unknown-linux"
5
6%"struct.std::complex" = type { { float, float } }
7
8define void @_Z4testSt7complexIfE(%"struct.std::complex"* noalias nocapture sret(%"struct.std::complex") %agg.result, i64 %c.coerce) {
9; CHECK-LABEL: @_Z4testSt7complexIfE(
10; CHECK-NEXT:  entry:
11; CHECK-NEXT:    [[REF_TMP:%.*]] = alloca i64, align 8
12; CHECK-NEXT:    [[TMPCAST:%.*]] = bitcast i64* [[REF_TMP]] to %"struct.std::complex"*
13; CHECK-NEXT:    [[C_SROA_0_0_EXTRACT_SHIFT:%.*]] = lshr i64 [[C_COERCE:%.*]], 32
14; CHECK-NEXT:    [[C_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[C_SROA_0_0_EXTRACT_SHIFT]] to i32
15; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32 [[C_SROA_0_0_EXTRACT_TRUNC]] to float
16; CHECK-NEXT:    [[C_SROA_2_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[C_COERCE]] to i32
17; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32 [[C_SROA_2_0_EXTRACT_TRUNC]] to float
18; CHECK-NEXT:    call void @_Z3barSt7complexIfE(%"struct.std::complex"* nonnull sret(%"struct.std::complex") [[TMPCAST]], i64 [[C_COERCE]])
19; CHECK-NEXT:    [[TMP2:%.*]] = load i64, i64* [[REF_TMP]], align 8
20; CHECK-NEXT:    [[_M_VALUE_REALP_I_I:%.*]] = getelementptr inbounds %"struct.std::complex", %"struct.std::complex"* [[AGG_RESULT:%.*]], i64 0, i32 0, i32 0
21; CHECK-NEXT:    [[TMP3:%.*]] = lshr i64 [[TMP2]], 32
22; CHECK-NEXT:    [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
23; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float
24; CHECK-NEXT:    [[_M_VALUE_IMAGP_I_I:%.*]] = getelementptr inbounds %"struct.std::complex", %"struct.std::complex"* [[AGG_RESULT]], i64 0, i32 0, i32 1
25; CHECK-NEXT:    [[TMP6:%.*]] = trunc i64 [[TMP2]] to i32
26; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i32 [[TMP6]] to float
27; CHECK-NEXT:    [[MUL_AD_I_I:%.*]] = fmul fast float [[TMP5]], [[TMP1]]
28; CHECK-NEXT:    [[MUL_BC_I_I:%.*]] = fmul fast float [[TMP7]], [[TMP0]]
29; CHECK-NEXT:    [[MUL_I_I_I:%.*]] = fadd fast float [[MUL_AD_I_I]], [[MUL_BC_I_I]]
30; CHECK-NEXT:    [[MUL_AC_I_I:%.*]] = fmul fast float [[TMP5]], [[TMP0]]
31; CHECK-NEXT:    [[MUL_BD_I_I:%.*]] = fmul fast float [[TMP7]], [[TMP1]]
32; CHECK-NEXT:    [[MUL_R_I_I:%.*]] = fsub fast float [[MUL_AC_I_I]], [[MUL_BD_I_I]]
33; CHECK-NEXT:    store float [[MUL_R_I_I]], float* [[_M_VALUE_REALP_I_I]], align 4
34; CHECK-NEXT:    store float [[MUL_I_I_I]], float* [[_M_VALUE_IMAGP_I_I]], align 4
35; CHECK-NEXT:    ret void
36;
37entry:
38
39  %ref.tmp = alloca i64, align 8
40  %tmpcast = bitcast i64* %ref.tmp to %"struct.std::complex"*
41  %c.sroa.0.0.extract.shift = lshr i64 %c.coerce, 32
42  %c.sroa.0.0.extract.trunc = trunc i64 %c.sroa.0.0.extract.shift to i32
43  %0 = bitcast i32 %c.sroa.0.0.extract.trunc to float
44  %c.sroa.2.0.extract.trunc = trunc i64 %c.coerce to i32
45  %1 = bitcast i32 %c.sroa.2.0.extract.trunc to float
46  call void @_Z3barSt7complexIfE(%"struct.std::complex"* nonnull sret(%"struct.std::complex") %tmpcast, i64 %c.coerce)
47  %2 = bitcast %"struct.std::complex"* %agg.result to i64*
48  %3 = load i64, i64* %ref.tmp, align 8
49  store i64 %3, i64* %2, align 4
50
51  %_M_value.realp.i.i = getelementptr inbounds %"struct.std::complex", %"struct.std::complex"* %agg.result, i64 0, i32 0, i32 0
52  %4 = lshr i64 %3, 32
53  %5 = trunc i64 %4 to i32
54  %6 = bitcast i32 %5 to float
55  %_M_value.imagp.i.i = getelementptr inbounds %"struct.std::complex", %"struct.std::complex"* %agg.result, i64 0, i32 0, i32 1
56  %7 = trunc i64 %3 to i32
57  %8 = bitcast i32 %7 to float
58  %mul_ad.i.i = fmul fast float %6, %1
59  %mul_bc.i.i = fmul fast float %8, %0
60  %mul_i.i.i = fadd fast float %mul_ad.i.i, %mul_bc.i.i
61  %mul_ac.i.i = fmul fast float %6, %0
62  %mul_bd.i.i = fmul fast float %8, %1
63  %mul_r.i.i = fsub fast float %mul_ac.i.i, %mul_bd.i.i
64  store float %mul_r.i.i, float* %_M_value.realp.i.i, align 4
65  store float %mul_i.i.i, float* %_M_value.imagp.i.i, align 4
66  ret void
67}
68
69declare void @_Z3barSt7complexIfE(%"struct.std::complex"* sret(%"struct.std::complex"), i64)
70
71define void @test1(i32 *%ptr) {
72; CHECK-LABEL: @test1(
73; CHECK-NEXT:  entry:
74; CHECK-NEXT:    [[BPTR:%.*]] = bitcast i32* [[PTR:%.*]] to i8*
75; CHECK-NEXT:    [[WPTR:%.*]] = bitcast i32* [[PTR]] to i16*
76; CHECK-NEXT:    store i16 -30062, i16* [[WPTR]], align 2
77; CHECK-NEXT:    [[BPTR3:%.*]] = getelementptr inbounds i8, i8* [[BPTR]], i64 3
78; CHECK-NEXT:    store i8 47, i8* [[BPTR3]], align 1
79; CHECK-NEXT:    [[BPTR1:%.*]] = getelementptr inbounds i8, i8* [[BPTR]], i64 1
80; CHECK-NEXT:    [[WPTRP:%.*]] = bitcast i8* [[BPTR1]] to i16*
81; CHECK-NEXT:    store i16 2020, i16* [[WPTRP]], align 1
82; CHECK-NEXT:    ret void
83;
84entry:
85
86  store i32 5, i32* %ptr
87  %bptr = bitcast i32* %ptr to i8*
88  store i8 7, i8* %bptr
89  %wptr = bitcast i32* %ptr to i16*
90  store i16 -30062, i16* %wptr
91  %bptr2 = getelementptr inbounds i8, i8* %bptr, i64 2
92  store i8 25, i8* %bptr2
93  %bptr3 = getelementptr inbounds i8, i8* %bptr, i64 3
94  store i8 47, i8* %bptr3
95  %bptr1 = getelementptr inbounds i8, i8* %bptr, i64 1
96  %wptrp = bitcast i8* %bptr1 to i16*
97  store i16 2020, i16* %wptrp, align 1
98  ret void
99
100
101}
102
103define void @test2(i32 *%ptr) {
104; CHECK-LABEL: @test2(
105; CHECK-NEXT:  entry:
106; CHECK-NEXT:    [[BPTR:%.*]] = bitcast i32* [[PTR:%.*]] to i8*
107; CHECK-NEXT:    [[BPTRM1:%.*]] = getelementptr inbounds i8, i8* [[BPTR]], i64 -1
108; CHECK-NEXT:    [[BPTR1:%.*]] = getelementptr inbounds i8, i8* [[BPTR]], i64 1
109; CHECK-NEXT:    [[BPTR2:%.*]] = getelementptr inbounds i8, i8* [[BPTR]], i64 2
110; CHECK-NEXT:    [[BPTR3:%.*]] = getelementptr inbounds i8, i8* [[BPTR]], i64 3
111; CHECK-NEXT:    [[WPTR:%.*]] = bitcast i8* [[BPTR]] to i16*
112; CHECK-NEXT:    [[WPTRM1:%.*]] = bitcast i8* [[BPTRM1]] to i16*
113; CHECK-NEXT:    [[WPTR1:%.*]] = bitcast i8* [[BPTR1]] to i16*
114; CHECK-NEXT:    [[WPTR2:%.*]] = bitcast i8* [[BPTR2]] to i16*
115; CHECK-NEXT:    [[WPTR3:%.*]] = bitcast i8* [[BPTR3]] to i16*
116; CHECK-NEXT:    store i16 1456, i16* [[WPTRM1]], align 1
117; CHECK-NEXT:    store i16 1346, i16* [[WPTR]], align 1
118; CHECK-NEXT:    store i16 1756, i16* [[WPTR1]], align 1
119; CHECK-NEXT:    store i16 1126, i16* [[WPTR2]], align 1
120; CHECK-NEXT:    store i16 5656, i16* [[WPTR3]], align 1
121; CHECK-NEXT:    ret void
122;
123entry:
124
125  store i32 5, i32* %ptr
126
127  %bptr = bitcast i32* %ptr to i8*
128  %bptrm1 = getelementptr inbounds i8, i8* %bptr, i64 -1
129  %bptr1 = getelementptr inbounds i8, i8* %bptr, i64 1
130  %bptr2 = getelementptr inbounds i8, i8* %bptr, i64 2
131  %bptr3 = getelementptr inbounds i8, i8* %bptr, i64 3
132
133  %wptr = bitcast i8* %bptr to i16*
134  %wptrm1 = bitcast i8* %bptrm1 to i16*
135  %wptr1 = bitcast i8* %bptr1 to i16*
136  %wptr2 = bitcast i8* %bptr2 to i16*
137  %wptr3 = bitcast i8* %bptr3 to i16*
138
139  store i16 1456, i16* %wptrm1, align 1
140  store i16 1346, i16* %wptr, align 1
141  store i16 1756, i16* %wptr1, align 1
142  store i16 1126, i16* %wptr2, align 1
143  store i16 5656, i16* %wptr3, align 1
144
145
146
147  ret void
148
149}
150
151define signext i8 @test3(i32 *%ptr) {
152; CHECK-LABEL: @test3(
153; CHECK-NEXT:  entry:
154; CHECK-NEXT:    store i32 5, i32* [[PTR:%.*]], align 4
155; CHECK-NEXT:    [[BPTR:%.*]] = bitcast i32* [[PTR]] to i8*
156; CHECK-NEXT:    [[BPTRM1:%.*]] = getelementptr inbounds i8, i8* [[BPTR]], i64 -1
157; CHECK-NEXT:    [[BPTR1:%.*]] = getelementptr inbounds i8, i8* [[BPTR]], i64 1
158; CHECK-NEXT:    [[BPTR2:%.*]] = getelementptr inbounds i8, i8* [[BPTR]], i64 2
159; CHECK-NEXT:    [[BPTR3:%.*]] = getelementptr inbounds i8, i8* [[BPTR]], i64 3
160; CHECK-NEXT:    [[WPTR:%.*]] = bitcast i8* [[BPTR]] to i16*
161; CHECK-NEXT:    [[WPTRM1:%.*]] = bitcast i8* [[BPTRM1]] to i16*
162; CHECK-NEXT:    [[WPTR1:%.*]] = bitcast i8* [[BPTR1]] to i16*
163; CHECK-NEXT:    [[WPTR2:%.*]] = bitcast i8* [[BPTR2]] to i16*
164; CHECK-NEXT:    [[WPTR3:%.*]] = bitcast i8* [[BPTR3]] to i16*
165; CHECK-NEXT:    [[V:%.*]] = load i8, i8* [[BPTR]], align 1
166; CHECK-NEXT:    store i16 1456, i16* [[WPTRM1]], align 1
167; CHECK-NEXT:    store i16 1346, i16* [[WPTR]], align 1
168; CHECK-NEXT:    store i16 1756, i16* [[WPTR1]], align 1
169; CHECK-NEXT:    store i16 1126, i16* [[WPTR2]], align 1
170; CHECK-NEXT:    store i16 5656, i16* [[WPTR3]], align 1
171; CHECK-NEXT:    ret i8 [[V]]
172;
173entry:
174
175  store i32 5, i32* %ptr
176
177  %bptr = bitcast i32* %ptr to i8*
178  %bptrm1 = getelementptr inbounds i8, i8* %bptr, i64 -1
179  %bptr1 = getelementptr inbounds i8, i8* %bptr, i64 1
180  %bptr2 = getelementptr inbounds i8, i8* %bptr, i64 2
181  %bptr3 = getelementptr inbounds i8, i8* %bptr, i64 3
182
183  %wptr = bitcast i8* %bptr to i16*
184  %wptrm1 = bitcast i8* %bptrm1 to i16*
185  %wptr1 = bitcast i8* %bptr1 to i16*
186  %wptr2 = bitcast i8* %bptr2 to i16*
187  %wptr3 = bitcast i8* %bptr3 to i16*
188
189  %v = load i8, i8* %bptr, align 1
190  store i16 1456, i16* %wptrm1, align 1
191  store i16 1346, i16* %wptr, align 1
192  store i16 1756, i16* %wptr1, align 1
193  store i16 1126, i16* %wptr2, align 1
194  store i16 5656, i16* %wptr3, align 1
195
196
197  ret i8 %v
198
199}
200
201%struct.foostruct = type {
202i32 (i8*, i8**, i32, i8, i8*)*,
203i32 (i8*, i8**, i32, i8, i8*)*,
204i32 (i8*, i8**, i32, i8, i8*)*,
205i32 (i8*, i8**, i32, i8, i8*)*,
206void (i8*, i32, i32)*
207}
208declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1)
209declare void @goFunc(%struct.foostruct*)
210declare i32 @fa(i8*, i8**, i32, i8, i8*)
211
212; We miss this case, because of an aggressive limit of partial overlap analysis.
213; With a larger partial store limit, we remove the memset.
214define void @test4()  {
215; CHECK-LABEL: @test4(
216; CHECK-NEXT:  entry:
217; CHECK-NEXT:    [[BANG:%.*]] = alloca [[STRUCT_FOOSTRUCT:%.*]], align 8
218; CHECK-NEXT:    [[V2:%.*]] = getelementptr inbounds [[STRUCT_FOOSTRUCT]], %struct.foostruct* [[BANG]], i64 0, i32 0
219; CHECK-NEXT:    store i32 (i8*, i8**, i32, i8, i8*)* @fa, i32 (i8*, i8**, i32, i8, i8*)** [[V2]], align 8
220; CHECK-NEXT:    [[V3:%.*]] = getelementptr inbounds [[STRUCT_FOOSTRUCT]], %struct.foostruct* [[BANG]], i64 0, i32 1
221; CHECK-NEXT:    store i32 (i8*, i8**, i32, i8, i8*)* @fa, i32 (i8*, i8**, i32, i8, i8*)** [[V3]], align 8
222; CHECK-NEXT:    [[V4:%.*]] = getelementptr inbounds [[STRUCT_FOOSTRUCT]], %struct.foostruct* [[BANG]], i64 0, i32 2
223; CHECK-NEXT:    store i32 (i8*, i8**, i32, i8, i8*)* @fa, i32 (i8*, i8**, i32, i8, i8*)** [[V4]], align 8
224; CHECK-NEXT:    [[V5:%.*]] = getelementptr inbounds [[STRUCT_FOOSTRUCT]], %struct.foostruct* [[BANG]], i64 0, i32 3
225; CHECK-NEXT:    store i32 (i8*, i8**, i32, i8, i8*)* @fa, i32 (i8*, i8**, i32, i8, i8*)** [[V5]], align 8
226; CHECK-NEXT:    [[V6:%.*]] = getelementptr inbounds [[STRUCT_FOOSTRUCT]], %struct.foostruct* [[BANG]], i64 0, i32 4
227; CHECK-NEXT:    store void (i8*, i32, i32)* null, void (i8*, i32, i32)** [[V6]], align 8
228; CHECK-NEXT:    call void @goFunc(%struct.foostruct* [[BANG]])
229; CHECK-NEXT:    ret void
230entry:
231
232  %bang = alloca %struct.foostruct, align 8
233  %v1 = bitcast %struct.foostruct* %bang to i8*
234  call void @llvm.memset.p0i8.i64(i8* align 8 %v1, i8 0, i64 40, i1 false)
235  %v2 = getelementptr inbounds %struct.foostruct, %struct.foostruct* %bang, i64 0, i32 0
236  store i32 (i8*, i8**, i32, i8, i8*)* @fa, i32 (i8*, i8**, i32, i8, i8*)** %v2, align 8
237  %v3 = getelementptr inbounds %struct.foostruct, %struct.foostruct* %bang, i64 0, i32 1
238  store i32 (i8*, i8**, i32, i8, i8*)* @fa, i32 (i8*, i8**, i32, i8, i8*)** %v3, align 8
239  %v4 = getelementptr inbounds %struct.foostruct, %struct.foostruct* %bang, i64 0, i32 2
240  store i32 (i8*, i8**, i32, i8, i8*)* @fa, i32 (i8*, i8**, i32, i8, i8*)** %v4, align 8
241  %v5 = getelementptr inbounds %struct.foostruct, %struct.foostruct* %bang, i64 0, i32 3
242  store i32 (i8*, i8**, i32, i8, i8*)* @fa, i32 (i8*, i8**, i32, i8, i8*)** %v5, align 8
243  %v6 = getelementptr inbounds %struct.foostruct, %struct.foostruct* %bang, i64 0, i32 4
244  store void (i8*, i32, i32)* null, void (i8*, i32, i32)** %v6, align 8
245  call void @goFunc(%struct.foostruct* %bang)
246  ret void
247
248}
249
250define signext i8 @test5(i32 *%ptr) {
251; CHECK-LABEL: @test5(
252; CHECK-NEXT:  entry:
253; CHECK-NEXT:    [[BPTR:%.*]] = bitcast i32* [[PTR:%.*]] to i8*
254; CHECK-NEXT:    [[BPTR1:%.*]] = getelementptr inbounds i8, i8* [[BPTR]], i64 1
255; CHECK-NEXT:    [[BPTR2:%.*]] = getelementptr inbounds i8, i8* [[BPTR]], i64 2
256; CHECK-NEXT:    [[BPTR3:%.*]] = getelementptr inbounds i8, i8* [[BPTR]], i64 3
257; CHECK-NEXT:    [[WPTR:%.*]] = bitcast i8* [[BPTR]] to i16*
258; CHECK-NEXT:    [[WPTR1:%.*]] = bitcast i8* [[BPTR1]] to i16*
259; CHECK-NEXT:    [[WPTR2:%.*]] = bitcast i8* [[BPTR2]] to i16*
260; CHECK-NEXT:    store i16 -1, i16* [[WPTR2]], align 1
261; CHECK-NEXT:    store i16 1456, i16* [[WPTR1]], align 1
262; CHECK-NEXT:    store i16 1346, i16* [[WPTR]], align 1
263; CHECK-NEXT:    ret i8 0
264;
265entry:
266
267  store i32 0, i32* %ptr
268
269  %bptr = bitcast i32* %ptr to i8*
270  %bptr1 = getelementptr inbounds i8, i8* %bptr, i64 1
271  %bptr2 = getelementptr inbounds i8, i8* %bptr, i64 2
272  %bptr3 = getelementptr inbounds i8, i8* %bptr, i64 3
273
274  %wptr = bitcast i8* %bptr to i16*
275  %wptr1 = bitcast i8* %bptr1 to i16*
276  %wptr2 = bitcast i8* %bptr2 to i16*
277
278  store i16 65535, i16* %wptr2, align 1
279  store i16 1456, i16* %wptr1, align 1
280  store i16 1346, i16* %wptr, align 1
281
282
283  ret i8 0
284}
285
286define signext i8 @test6(i32 *%ptr) {
287; CHECK-LABEL: @test6(
288; CHECK-NEXT:  entry:
289; CHECK-NEXT:    [[BPTR:%.*]] = bitcast i32* [[PTR:%.*]] to i16*
290; CHECK-NEXT:    [[BPTR1:%.*]] = getelementptr inbounds i16, i16* [[BPTR]], i64 0
291; CHECK-NEXT:    [[BPTR2:%.*]] = getelementptr inbounds i16, i16* [[BPTR]], i64 1
292; CHECK-NEXT:    store i16 1456, i16* [[BPTR2]], align 1
293; CHECK-NEXT:    store i16 -1, i16* [[BPTR1]], align 1
294; CHECK-NEXT:    ret i8 0
295;
296entry:
297
298  store i32 0, i32* %ptr
299
300  %bptr = bitcast i32* %ptr to i16*
301  %bptr1 = getelementptr inbounds i16, i16* %bptr, i64 0
302  %bptr2 = getelementptr inbounds i16, i16* %bptr, i64 1
303
304  store i16 1456, i16* %bptr2, align 1
305  store i16 65535, i16* %bptr1, align 1
306
307
308  ret i8 0
309}
310
311define signext i8 @test7(i64 *%ptr) {
312; CHECK-LABEL: @test7(
313; CHECK-NEXT:  entry:
314; CHECK-NEXT:    [[BPTR:%.*]] = bitcast i64* [[PTR:%.*]] to i16*
315; CHECK-NEXT:    [[BPTR1:%.*]] = getelementptr inbounds i16, i16* [[BPTR]], i64 0
316; CHECK-NEXT:    [[BPTR2:%.*]] = getelementptr inbounds i16, i16* [[BPTR]], i64 1
317; CHECK-NEXT:    [[BPTR3:%.*]] = getelementptr inbounds i16, i16* [[BPTR]], i64 2
318; CHECK-NEXT:    [[BPTR4:%.*]] = getelementptr inbounds i16, i16* [[BPTR]], i64 3
319; CHECK-NEXT:    store i16 1346, i16* [[BPTR1]], align 1
320; CHECK-NEXT:    store i16 1756, i16* [[BPTR3]], align 1
321; CHECK-NEXT:    store i16 1456, i16* [[BPTR2]], align 1
322; CHECK-NEXT:    store i16 5656, i16* [[BPTR4]], align 1
323; CHECK-NEXT:    ret i8 0
324;
325entry:
326
327  store i64 0, i64* %ptr
328
329  %bptr = bitcast i64* %ptr to i16*
330  %bptr1 = getelementptr inbounds i16, i16* %bptr, i64 0
331  %bptr2 = getelementptr inbounds i16, i16* %bptr, i64 1
332  %bptr3 = getelementptr inbounds i16, i16* %bptr, i64 2
333  %bptr4 = getelementptr inbounds i16, i16* %bptr, i64 3
334
335  store i16 1346, i16* %bptr1, align 1
336  store i16 1756, i16* %bptr3, align 1
337  store i16 1456, i16* %bptr2, align 1
338  store i16 5656, i16* %bptr4, align 1
339
340
341  ret i8 0
342}
343