1; RUN: opt -basic-aa -gvn -S < %s | FileCheck %s
2
3target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
4target triple = "x86_64-apple-macosx10.7.0"
5
6@x = common global i32 0, align 4
7@y = common global i32 0, align 4
8
9; GVN across unordered store (allowed)
10define i32 @test1() nounwind uwtable ssp {
11; CHECK-LABEL: test1
12; CHECK: add i32 %x, %x
13entry:
14  %x = load i32, i32* @y
15  store atomic i32 %x, i32* @x unordered, align 4
16  %y = load i32, i32* @y
17  %z = add i32 %x, %y
18  ret i32 %z
19}
20
21; GVN across unordered load (allowed)
22define i32 @test3() nounwind uwtable ssp {
23; CHECK-LABEL: test3
24; CHECK: add i32 %x, %x
25entry:
26  %x = load i32, i32* @y
27  %y = load atomic i32, i32* @x unordered, align 4
28  %z = load i32, i32* @y
29  %a = add i32 %x, %z
30  %b = add i32 %y, %a
31  ret i32 %b
32}
33
34; GVN load to unordered load (allowed)
35define i32 @test5() nounwind uwtable ssp {
36; CHECK-LABEL: test5
37; CHECK: add i32 %x, %x
38entry:
39  %x = load atomic i32, i32* @x unordered, align 4
40  %y = load i32, i32* @x
41  %z = add i32 %x, %y
42  ret i32 %z
43}
44
45; GVN unordered load to load (unordered load must not be removed)
46define i32 @test6() nounwind uwtable ssp {
47; CHECK-LABEL: test6
48; CHECK: load atomic i32, i32* @x unordered
49entry:
50  %x = load i32, i32* @x
51  %x2 = load atomic i32, i32* @x unordered, align 4
52  %x3 = add i32 %x, %x2
53  ret i32 %x3
54}
55
56; GVN across release-acquire pair (forbidden)
57define i32 @test7() nounwind uwtable ssp {
58; CHECK-LABEL: test7
59; CHECK: add i32 %x, %y
60entry:
61  %x = load i32, i32* @y
62  store atomic i32 %x, i32* @x release, align 4
63  %w = load atomic i32, i32* @x acquire, align 4
64  %y = load i32, i32* @y
65  %z = add i32 %x, %y
66  ret i32 %z
67}
68
69; GVN across monotonic store (allowed)
70define i32 @test9() nounwind uwtable ssp {
71; CHECK-LABEL: test9
72; CHECK: add i32 %x, %x
73entry:
74  %x = load i32, i32* @y
75  store atomic i32 %x, i32* @x monotonic, align 4
76  %y = load i32, i32* @y
77  %z = add i32 %x, %y
78  ret i32 %z
79}
80
81; GVN of an unordered across monotonic load (not allowed)
82define i32 @test10() nounwind uwtable ssp {
83; CHECK-LABEL: test10
84; CHECK: add i32 %x, %y
85entry:
86  %x = load atomic i32, i32* @y unordered, align 4
87  %clobber = load atomic i32, i32* @x monotonic, align 4
88  %y = load atomic i32, i32* @y monotonic, align 4
89  %z = add i32 %x, %y
90  ret i32 %z
91}
92
93define i32 @PR22708(i1 %flag) {
94; CHECK-LABEL: PR22708
95entry:
96  br i1 %flag, label %if.then, label %if.end
97
98if.then:
99  store i32 43, i32* @y, align 4
100; CHECK: store i32 43, i32* @y, align 4
101  br label %if.end
102
103if.end:
104  load atomic i32, i32* @x acquire, align 4
105  %load = load i32, i32* @y, align 4
106; CHECK: load atomic i32, i32* @x acquire, align 4
107; CHECK: load i32, i32* @y, align 4
108  ret i32 %load
109}
110
111; CHECK-LABEL: @test12(
112; Can't remove a load over a ordering barrier
113define i32 @test12(i1 %B, i32* %P1, i32* %P2) {
114  %load0 = load i32, i32* %P1
115  %1 = load atomic i32, i32* %P2 seq_cst, align 4
116  %load1 = load i32, i32* %P1
117  %sel = select i1 %B, i32 %load0, i32 %load1
118  ret i32 %sel
119  ; CHECK: load i32, i32* %P1
120  ; CHECK: load i32, i32* %P1
121}
122
123; CHECK-LABEL: @test13(
124; atomic to non-atomic forwarding is legal
125define i32 @test13(i32* %P1) {
126  %a = load atomic i32, i32* %P1 seq_cst, align 4
127  %b = load i32, i32* %P1
128  %res = sub i32 %a, %b
129  ret i32 %res
130  ; CHECK: load atomic i32, i32* %P1
131  ; CHECK: ret i32 0
132}
133
134; CHECK-LABEL: @test13b(
135define i32 @test13b(i32* %P1) {
136  store  atomic i32 0, i32* %P1 unordered, align 4
137  %b = load i32, i32* %P1
138  ret i32 %b
139  ; CHECK: ret i32 0
140}
141
142; CHECK-LABEL: @test14(
143; atomic to unordered atomic forwarding is legal
144define i32 @test14(i32* %P1) {
145  %a = load atomic i32, i32* %P1 seq_cst, align 4
146  %b = load atomic i32, i32* %P1 unordered, align 4
147  %res = sub i32 %a, %b
148  ret i32 %res
149  ; CHECK: load atomic i32, i32* %P1 seq_cst
150  ; CHECK-NEXT: ret i32 0
151}
152
153; CHECK-LABEL: @test15(
154; implementation restriction: can't forward to stonger
155; than unordered
156define i32 @test15(i32* %P1, i32* %P2) {
157  %a = load atomic i32, i32* %P1 seq_cst, align 4
158  %b = load atomic i32, i32* %P1 seq_cst, align 4
159  %res = sub i32 %a, %b
160  ret i32 %res
161  ; CHECK: load atomic i32, i32* %P1
162  ; CHECK: load atomic i32, i32* %P1
163}
164
165; CHECK-LABEL: @test16(
166; forwarding non-atomic to atomic is wrong! (However,
167; it would be legal to use the later value in place of the
168; former in this particular example.  We just don't
169; do that right now.)
170define i32 @test16(i32* %P1, i32* %P2) {
171  %a = load i32, i32* %P1, align 4
172  %b = load atomic i32, i32* %P1 unordered, align 4
173  %res = sub i32 %a, %b
174  ret i32 %res
175  ; CHECK: load i32, i32* %P1
176  ; CHECK: load atomic i32, i32* %P1
177}
178
179; CHECK-LABEL: @test16b(
180define i32 @test16b(i32* %P1) {
181  store i32 0, i32* %P1
182  %b = load atomic i32, i32* %P1 unordered, align 4
183  ret i32 %b
184  ; CHECK: load atomic i32, i32* %P1
185}
186
187; Can't DSE across a full fence
188define void @fence_seq_cst_store(i32* %P1, i32* %P2) {
189; CHECK-LABEL: @fence_seq_cst_store(
190; CHECK: store
191; CHECK: store atomic
192; CHECK: store
193  store i32 0, i32* %P1, align 4
194  store atomic i32 0, i32* %P2 seq_cst, align 4
195  store i32 0, i32* %P1, align 4
196  ret void
197}
198
199; Can't DSE across a full fence
200define void @fence_seq_cst(i32* %P1, i32* %P2) {
201; CHECK-LABEL: @fence_seq_cst(
202; CHECK: store
203; CHECK: fence seq_cst
204; CHECK: store
205  store i32 0, i32* %P1, align 4
206  fence seq_cst
207  store i32 0, i32* %P1, align 4
208  ret void
209}
210
211; Can't DSE across a full syncscope("singlethread") fence
212define void @fence_seq_cst_st(i32* %P1, i32* %P2) {
213; CHECK-LABEL: @fence_seq_cst_st(
214; CHECK: store
215; CHECK: fence syncscope("singlethread") seq_cst
216; CHECK: store
217  store i32 0, i32* %P1, align 4
218  fence syncscope("singlethread") seq_cst
219  store i32 0, i32* %P1, align 4
220  ret void
221}
222
223; Can't DSE across a full fence
224define void @fence_asm_sideeffect(i32* %P1, i32* %P2) {
225; CHECK-LABEL: @fence_asm_sideeffect(
226; CHECK: store
227; CHECK: call void asm sideeffect
228; CHECK: store
229  store i32 0, i32* %P1, align 4
230  call void asm sideeffect "", ""()
231  store i32 0, i32* %P1, align 4
232  ret void
233}
234
235; Can't DSE across a full fence
236define void @fence_asm_memory(i32* %P1, i32* %P2) {
237; CHECK-LABEL: @fence_asm_memory(
238; CHECK: store
239; CHECK: call void asm
240; CHECK: store
241  store i32 0, i32* %P1, align 4
242  call void asm "", "~{memory}"()
243  store i32 0, i32* %P1, align 4
244  ret void
245}
246
247; Can't remove a volatile load
248define i32 @volatile_load(i32* %P1, i32* %P2) {
249  %a = load i32, i32* %P1, align 4
250  %b = load volatile i32, i32* %P1, align 4
251  %res = sub i32 %a, %b
252  ret i32 %res
253  ; CHECK-LABEL: @volatile_load(
254  ; CHECK: load i32, i32* %P1
255  ; CHECK: load volatile i32, i32* %P1
256}
257
258; Can't remove redundant volatile loads
259define i32 @redundant_volatile_load(i32* %P1, i32* %P2) {
260  %a = load volatile i32, i32* %P1, align 4
261  %b = load volatile i32, i32* %P1, align 4
262  %res = sub i32 %a, %b
263  ret i32 %res
264  ; CHECK-LABEL: @redundant_volatile_load(
265  ; CHECK: load volatile i32, i32* %P1
266  ; CHECK: load volatile i32, i32* %P1
267  ; CHECK: sub
268}
269
270; Can't DSE a volatile store
271define void @volatile_store(i32* %P1, i32* %P2) {
272; CHECK-LABEL: @volatile_store(
273; CHECK: store volatile
274; CHECK: store
275  store volatile i32 0, i32* %P1, align 4
276  store i32 3, i32* %P1, align 4
277  ret void
278}
279
280; Can't DSE a redundant volatile store
281define void @redundant_volatile_store(i32* %P1, i32* %P2) {
282; CHECK-LABEL: @redundant_volatile_store(
283; CHECK: store volatile
284; CHECK: store volatile
285  store volatile i32 0, i32* %P1, align 4
286  store volatile i32 0, i32* %P1, align 4
287  ret void
288}
289
290; Can value forward from volatiles
291define i32 @test20(i32* %P1, i32* %P2) {
292  %a = load volatile i32, i32* %P1, align 4
293  %b = load i32, i32* %P1, align 4
294  %res = sub i32 %a, %b
295  ret i32 %res
296  ; CHECK-LABEL: @test20(
297  ; CHECK: load volatile i32, i32* %P1
298  ; CHECK: ret i32 0
299}
300
301; We're currently conservative about widening
302define i64 @widen1(i32* %P1) {
303  ; CHECK-LABEL: @widen1(
304  ; CHECK: load atomic i32, i32* %P1
305  ; CHECK: load atomic i64, i64* %p2
306  %p2 = bitcast i32* %P1 to i64*
307  %a = load atomic i32, i32* %P1 unordered, align 4
308  %b = load atomic i64, i64* %p2 unordered, align 4
309  %a64 = sext i32 %a to i64
310  %res = sub i64 %a64, %b
311  ret i64 %res
312}
313
314; narrowing does work
315define i64 @narrow(i32* %P1) {
316  ; CHECK-LABEL: @narrow(
317  ; CHECK: load atomic i64, i64* %p2
318  ; CHECK-NOT: load atomic i32, i32* %P1
319  %p2 = bitcast i32* %P1 to i64*
320  %a64 = load atomic i64, i64* %p2 unordered, align 4
321  %b = load atomic i32, i32* %P1 unordered, align 4
322  %b64 = sext i32 %b to i64
323  %res = sub i64 %a64, %b64
324  ret i64 %res
325}
326
327; Missed optimization, we don't yet optimize ordered loads
328define i64 @narrow2(i32* %P1) {
329  ; CHECK-LABEL: @narrow2(
330  ; CHECK: load atomic i64, i64* %p2
331  ; CHECK: load atomic i32, i32* %P1
332  %p2 = bitcast i32* %P1 to i64*
333  %a64 = load atomic i64, i64* %p2 acquire, align 4
334  %b = load atomic i32, i32* %P1 acquire, align 4
335  %b64 = sext i32 %b to i64
336  %res = sub i64 %a64, %b64
337  ret i64 %res
338}
339
340; Note: The cross block FRE testing is deliberately light.  All of the tricky
341; bits of legality are shared code with the block-local FRE above.  These
342; are here only to show that we haven't obviously broken anything.
343
344; unordered atomic to unordered atomic
345define i32 @non_local_fre(i32* %P1) {
346; CHECK-LABEL: @non_local_fre(
347; CHECK: load atomic i32, i32* %P1
348; CHECK: ret i32 0
349; CHECK: ret i32 0
350  %a = load atomic i32, i32* %P1 unordered, align 4
351  %cmp = icmp eq i32 %a, 0
352  br i1 %cmp, label %early, label %next
353early:
354  ret i32 %a
355next:
356  %b = load atomic i32, i32* %P1 unordered, align 4
357  %res = sub i32 %a, %b
358  ret i32 %res
359}
360
361; unordered atomic to non-atomic
362define i32 @non_local_fre2(i32* %P1) {
363; CHECK-LABEL: @non_local_fre2(
364; CHECK: load atomic i32, i32* %P1
365; CHECK: ret i32 0
366; CHECK: ret i32 0
367  %a = load atomic i32, i32* %P1 unordered, align 4
368  %cmp = icmp eq i32 %a, 0
369  br i1 %cmp, label %early, label %next
370early:
371  ret i32 %a
372next:
373  %b = load i32, i32* %P1
374  %res = sub i32 %a, %b
375  ret i32 %res
376}
377
378; Can't forward ordered atomics.
379define i32 @non_local_fre3(i32* %P1) {
380; CHECK-LABEL: @non_local_fre3(
381; CHECK: load atomic i32, i32* %P1 acquire
382; CHECK: ret i32 0
383; CHECK: load atomic i32, i32* %P1 acquire
384; CHECK: ret i32 %res
385  %a = load atomic i32, i32* %P1 acquire, align 4
386  %cmp = icmp eq i32 %a, 0
387  br i1 %cmp, label %early, label %next
388early:
389  ret i32 %a
390next:
391  %b = load atomic i32, i32* %P1 acquire, align 4
392  %res = sub i32 %a, %b
393  ret i32 %res
394}
395
396declare void @clobber()
397
398; unordered atomic to unordered atomic
399define i32 @non_local_pre(i32* %P1) {
400; CHECK-LABEL: @non_local_pre(
401; CHECK: load atomic i32, i32* %P1 unordered
402; CHECK: load atomic i32, i32* %P1 unordered
403; CHECK: %b = phi i32 [ %b.pre, %early ], [ %a, %0 ]
404; CHECK: ret i32 %b
405  %a = load atomic i32, i32* %P1 unordered, align 4
406  %cmp = icmp eq i32 %a, 0
407  br i1 %cmp, label %early, label %next
408early:
409  call void @clobber()
410  br label %next
411next:
412  %b = load atomic i32, i32* %P1 unordered, align 4
413  ret i32 %b
414}
415
416; unordered atomic to non-atomic
417define i32 @non_local_pre2(i32* %P1) {
418; CHECK-LABEL: @non_local_pre2(
419; CHECK: load atomic i32, i32* %P1 unordered
420; CHECK: load i32, i32* %P1
421; CHECK: %b = phi i32 [ %b.pre, %early ], [ %a, %0 ]
422; CHECK: ret i32 %b
423  %a = load atomic i32, i32* %P1 unordered, align 4
424  %cmp = icmp eq i32 %a, 0
425  br i1 %cmp, label %early, label %next
426early:
427  call void @clobber()
428  br label %next
429next:
430  %b = load i32, i32* %P1
431  ret i32 %b
432}
433
434; non-atomic to unordered atomic - can't forward!
435define i32 @non_local_pre3(i32* %P1) {
436; CHECK-LABEL: @non_local_pre3(
437; CHECK: %a = load i32, i32* %P1
438; CHECK: %b = load atomic i32, i32* %P1 unordered
439; CHECK: ret i32 %b
440  %a = load i32, i32* %P1
441  %cmp = icmp eq i32 %a, 0
442  br i1 %cmp, label %early, label %next
443early:
444  call void @clobber()
445  br label %next
446next:
447  %b = load atomic i32, i32* %P1 unordered, align 4
448  ret i32 %b
449}
450
451; ordered atomic to ordered atomic - can't forward
452define i32 @non_local_pre4(i32* %P1) {
453; CHECK-LABEL: @non_local_pre4(
454; CHECK: %a = load atomic i32, i32* %P1 seq_cst
455; CHECK: %b = load atomic i32, i32* %P1 seq_cst
456; CHECK: ret i32 %b
457  %a = load atomic i32, i32* %P1 seq_cst, align 4
458  %cmp = icmp eq i32 %a, 0
459  br i1 %cmp, label %early, label %next
460early:
461  call void @clobber()
462  br label %next
463next:
464  %b = load atomic i32, i32* %P1 seq_cst, align 4
465  ret i32 %b
466}
467
468; can't remove volatile on any path
469define i32 @non_local_pre5(i32* %P1) {
470; CHECK-LABEL: @non_local_pre5(
471; CHECK: %a = load atomic i32, i32* %P1 seq_cst
472; CHECK: %b = load volatile i32, i32* %P1
473; CHECK: ret i32 %b
474  %a = load atomic i32, i32* %P1 seq_cst, align 4
475  %cmp = icmp eq i32 %a, 0
476  br i1 %cmp, label %early, label %next
477early:
478  call void @clobber()
479  br label %next
480next:
481  %b = load volatile i32, i32* %P1
482  ret i32 %b
483}
484
485
486; ordered atomic to unordered atomic
487define i32 @non_local_pre6(i32* %P1) {
488; CHECK-LABEL: @non_local_pre6(
489; CHECK: load atomic i32, i32* %P1 seq_cst
490; CHECK: load atomic i32, i32* %P1 unordered
491; CHECK: %b = phi i32 [ %b.pre, %early ], [ %a, %0 ]
492; CHECK: ret i32 %b
493  %a = load atomic i32, i32* %P1 seq_cst, align 4
494  %cmp = icmp eq i32 %a, 0
495  br i1 %cmp, label %early, label %next
496early:
497  call void @clobber()
498  br label %next
499next:
500  %b = load atomic i32, i32* %P1 unordered, align 4
501  ret i32 %b
502}
503
504