1; RUN: opt < %s -jump-threading -S | FileCheck %s
2; RUN: opt < %s -aa-pipeline=basic-aa -passes=jump-threading -S | FileCheck %s
3
4target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
5target triple = "i386-apple-darwin7"
6
7; Test that we can thread through the block with the partially redundant load (%2).
8; rdar://6402033
9define i32 @test1(i32* %P) nounwind {
10; CHECK-LABEL: @test1(
11entry:
12	%0 = tail call i32 (...) @f1() nounwind		; <i32> [#uses=1]
13	%1 = icmp eq i32 %0, 0		; <i1> [#uses=1]
14	br i1 %1, label %bb1, label %bb
15
16bb:		; preds = %entry
17; CHECK: bb1.thread:
18; CHECK: store
19; CHECK: br label %bb3
20	store i32 42, i32* %P, align 4
21	br label %bb1
22
23bb1:		; preds = %entry, %bb
24	%res.0 = phi i32 [ 1, %bb ], [ 0, %entry ]		; <i32> [#uses=2]
25	%2 = load i32, i32* %P, align 4		; <i32> [#uses=1]
26	%3 = icmp sgt i32 %2, 36		; <i1> [#uses=1]
27	br i1 %3, label %bb3, label %bb2
28
29bb2:		; preds = %bb1
30	%4 = tail call i32 (...) @f2() nounwind		; <i32> [#uses=0]
31	ret i32 %res.0
32
33bb3:		; preds = %bb1
34; CHECK: bb3:
35; CHECK: %res.02 = phi i32 [ 1, %bb1.thread ], [ 0, %bb1 ]
36; CHECK: ret i32 %res.02
37	ret i32 %res.0
38}
39
40declare i32 @f1(...)
41
42declare i32 @f2(...)
43
44
45;; Check that we preserve TBAA information.
46; rdar://11039258
47
48define i32 @test2(i32* %P) nounwind {
49; CHECK-LABEL: @test2(
50entry:
51	%0 = tail call i32 (...) @f1() nounwind		; <i32> [#uses=1]
52	%1 = icmp eq i32 %0, 0		; <i1> [#uses=1]
53	br i1 %1, label %bb1, label %bb
54
55bb:		; preds = %entry
56; CHECK: bb1.thread:
57; CHECK: store{{.*}}, !tbaa !0
58; CHECK: br label %bb3
59	store i32 42, i32* %P, align 4, !tbaa !0
60	br label %bb1
61
62bb1:		; preds = %entry, %bb
63	%res.0 = phi i32 [ 1, %bb ], [ 0, %entry ]
64	%2 = load i32, i32* %P, align 4, !tbaa !0
65	%3 = icmp sgt i32 %2, 36
66	br i1 %3, label %bb3, label %bb2
67
68bb2:		; preds = %bb1
69	%4 = tail call i32 (...) @f2() nounwind
70	ret i32 %res.0
71
72bb3:		; preds = %bb1
73; CHECK: bb3:
74; CHECK: %res.02 = phi i32 [ 1, %bb1.thread ], [ 0, %bb1 ]
75; CHECK: ret i32 %res.02
76	ret i32 %res.0
77}
78
79define i32 @test3(i8** %x, i1 %f) {
80; Correctly thread loads of different (but compatible) types, placing bitcasts
81; as necessary in the predecessors. This is especially tricky because the same
82; predecessor ends up with two entries in the PHI node and they must share
83; a single cast.
84; CHECK-LABEL: @test3(
85entry:
86  %0 = bitcast i8** %x to i32**
87  %1 = load i32*, i32** %0, align 8
88  br i1 %f, label %if.end57, label %if.then56
89; CHECK: %[[LOAD:.*]] = load i32*, i32**
90; CHECK: %[[CAST:.*]] = bitcast i32* %[[LOAD]] to i8*
91
92if.then56:
93  br label %if.end57
94
95if.end57:
96  %2 = load i8*, i8** %x, align 8
97  %tobool59 = icmp eq i8* %2, null
98  br i1 %tobool59, label %return, label %if.then60
99; CHECK: %[[PHI:.*]] = phi i8* [ %[[CAST]], %[[PRED:[^ ]+]] ], [ %[[CAST]], %[[PRED]] ]
100; CHECK-NEXT: %[[CMP:.*]] = icmp eq i8* %[[PHI]], null
101; CHECK-NEXT: br i1 %[[CMP]]
102
103if.then60:
104  ret i32 42
105
106return:
107  ret i32 13
108}
109
110define i32 @test4(i32* %P) {
111; CHECK-LABEL: @test4(
112entry:
113  %v0 = tail call i32 (...) @f1()
114  %v1 = icmp eq i32 %v0, 0
115  br i1 %v1, label %bb1, label %bb
116
117bb:
118; CHECK: bb1.thread:
119; CHECK: store atomic
120; CHECK: br label %bb3
121  store atomic i32 42, i32* %P unordered, align 4
122  br label %bb1
123
124bb1:
125; CHECK: bb1:
126; CHECK-NOT: phi
127; CHECK: load atomic
128  %res.0 = phi i32 [ 1, %bb ], [ 0, %entry ]
129  %v2 = load atomic i32, i32* %P unordered, align 4
130  %v3 = icmp sgt i32 %v2, 36
131  br i1 %v3, label %bb3, label %bb2
132
133bb2:
134  %v4 = tail call i32 (...) @f2()
135  ret i32 %res.0
136
137bb3:
138  ret i32 %res.0
139}
140
141define i32 @test5(i32* %P) {
142; Negative test
143
144; CHECK-LABEL: @test5(
145entry:
146  %v0 = tail call i32 (...) @f1()
147  %v1 = icmp eq i32 %v0, 0
148  br i1 %v1, label %bb1, label %bb
149
150bb:
151; CHECK: bb:
152; CHECK-NEXT:   store atomic i32 42, i32* %P release, align 4
153; CHECK-NEXT:   br label %bb1
154  store atomic i32 42, i32* %P release, align 4
155  br label %bb1
156
157bb1:
158; CHECK: bb1:
159; CHECK-NEXT:  %res.0 = phi i32 [ 1, %bb ], [ 0, %entry ]
160; CHECK-NEXT:  %v2 = load atomic i32, i32* %P acquire, align 4
161; CHECK-NEXT:  %v3 = icmp sgt i32 %v2, 36
162; CHECK-NEXT:  br i1 %v3, label %bb3, label %bb2
163
164  %res.0 = phi i32 [ 1, %bb ], [ 0, %entry ]
165  %v2 = load atomic i32, i32* %P acquire, align 4
166  %v3 = icmp sgt i32 %v2, 36
167  br i1 %v3, label %bb3, label %bb2
168
169bb2:
170  %v4 = tail call i32 (...) @f2()
171  ret i32 %res.0
172
173bb3:
174  ret i32 %res.0
175}
176
177define i32 @test6(i32* %P) {
178; Negative test
179
180; CHECK-LABEL: @test6(
181entry:
182  %v0 = tail call i32 (...) @f1()
183  %v1 = icmp eq i32 %v0, 0
184  br i1 %v1, label %bb1, label %bb
185
186bb:
187; CHECK: bb:
188; CHECK-NEXT:   store i32 42, i32* %P
189; CHECK-NEXT:   br label %bb1
190  store i32 42, i32* %P
191  br label %bb1
192
193bb1:
194; CHECK: bb1:
195; CHECK-NEXT:  %res.0 = phi i32 [ 1, %bb ], [ 0, %entry ]
196; CHECK-NEXT:  %v2 = load atomic i32, i32* %P acquire, align 4
197; CHECK-NEXT:  %v3 = icmp sgt i32 %v2, 36
198; CHECK-NEXT:  br i1 %v3, label %bb3, label %bb2
199
200  %res.0 = phi i32 [ 1, %bb ], [ 0, %entry ]
201  %v2 = load atomic i32, i32* %P acquire, align 4
202  %v3 = icmp sgt i32 %v2, 36
203  br i1 %v3, label %bb3, label %bb2
204
205bb2:
206  %v4 = tail call i32 (...) @f2()
207  ret i32 %res.0
208
209bb3:
210  ret i32 %res.0
211}
212
213define i32 @test7(i32* %P) {
214; Negative test
215
216; CHECK-LABEL: @test7(
217entry:
218  %v0 = tail call i32 (...) @f1()
219  %v1 = icmp eq i32 %v0, 0
220  br i1 %v1, label %bb1, label %bb
221
222bb:
223; CHECK: bb:
224; CHECK-NEXT:   %val = load i32, i32* %P
225; CHECK-NEXT:   br label %bb1
226  %val = load i32, i32* %P
227  br label %bb1
228
229bb1:
230; CHECK: bb1:
231; CHECK-NEXT:  %res.0 = phi i32 [ 1, %bb ], [ 0, %entry ]
232; CHECK-NEXT:  %v2 = load atomic i32, i32* %P acquire, align 4
233; CHECK-NEXT:  %v3 = icmp sgt i32 %v2, 36
234; CHECK-NEXT:  br i1 %v3, label %bb3, label %bb2
235
236  %res.0 = phi i32 [ 1, %bb ], [ 0, %entry ]
237  %v2 = load atomic i32, i32* %P acquire, align 4
238  %v3 = icmp sgt i32 %v2, 36
239  br i1 %v3, label %bb3, label %bb2
240
241bb2:
242  %v4 = tail call i32 (...) @f2()
243  ret i32 %res.0
244
245bb3:
246  ret i32 %res.0
247}
248
249; Make sure we merge the aliasing metadata. We keep the range metadata for the
250; first load, as it dominates the second load. Hence we can eliminate the
251; branch.
252define void @test8(i32*, i32*, i32*) {
253; CHECK-LABEL: @test8(
254; CHECK: %a = load i32, i32* %0, !range ![[RANGE4:[0-9]+]]
255; CHECK-NEXT: store i32 %a
256; CHECK-NEXT: %xxx = tail call i32 (...) @f1()
257; CHECK-NEXT: ret void
258  %a = load i32, i32* %0, !tbaa !0, !range !4, !alias.scope !9, !noalias !10
259  %b = load i32, i32* %0, !range !5
260  store i32 %a, i32* %1
261  %c = icmp eq i32 %b, 8
262  br i1 %c, label %ret1, label %ret2
263
264ret1:
265  ret void
266
267ret2:
268  %xxx = tail call i32 (...) @f1() nounwind
269  ret void
270}
271
272; Make sure we merge/PRE aliasing metadata correctly.  That means that
273; we need to remove metadata from the existing load, and add appropriate
274; metadata to the newly inserted load.
275define void @test9(i32*, i32*, i32*, i1 %c) {
276; CHECK-LABEL: @test9(
277  br i1 %c, label %d1, label %d2
278
279; CHECK: d1:
280; CHECK-NEXT: %a = load i32, i32* %0{{$}}
281d1:
282  %a = load i32, i32* %0, !range !4, !alias.scope !9, !noalias !10
283  br label %d3
284
285; CHECK: d2:
286; CHECK-NEXT: %xxxx = tail call i32 (...) @f1()
287; CHECK-NEXT: %b.pr = load i32, i32* %0, !tbaa !0{{$}}
288d2:
289  %xxxx = tail call i32 (...) @f1() nounwind
290  br label %d3
291
292d3:
293  %p = phi i32 [ 1, %d2 ], [ %a, %d1 ]
294  %b = load i32, i32* %0, !tbaa !0
295  store i32 %p, i32* %1
296  %c2 = icmp eq i32 %b, 8
297  br i1 %c2, label %ret1, label %ret2
298
299ret1:
300  ret void
301
302ret2:
303  %xxx = tail call i32 (...) @f1() nounwind
304  ret void
305}
306
307define i32 @fn_noalias(i1 %c2,i64* noalias %P, i64* noalias %P2) {
308; CHECK-LABEL: @fn_noalias
309; CHECK-LABEL: cond1:
310; CHECK: %[[LD1:.*]] = load i64, i64* %P
311; CHECK: br i1 %c, label %[[THREAD:.*]], label %end
312; CHECK-LABEL: cond2:
313; CHECK: %[[LD2:.*]] = load i64, i64* %P
314; CHECK-LABEL: cond3:
315; CHECK: %[[PHI:.*]] = phi i64 [ %[[LD1]], %[[THREAD]] ], [ %[[LD2]], %cond2 ]
316; CHECK: call void @fn3(i64 %[[PHI]])
317entry:
318  br i1 %c2, label %cond2, label %cond1
319
320cond1:
321  %l1 = load i64, i64* %P
322  store i64 42, i64* %P2
323  %c = icmp eq i64 %l1, 0
324  br i1 %c, label %cond2, label %end
325
326cond2:
327  %l2 = load i64, i64* %P
328  call void @fn2(i64 %l2)
329  %c3 = icmp eq i64 %l2,  0
330  br i1 %c3, label %cond3, label %end
331
332cond3:
333  call void @fn3(i64 %l2)
334  br label %end
335
336end:
337  ret i32 0
338}
339
340; This tests if we can thread from %sw.bb.i to %do.body.preheader.i67 through
341; %sw.bb21.i. To make this happen, %l2 should be detected as a partically
342; redundant load with %l3 across the store to %phase in %sw.bb21.i.
343
344%struct.NEXT_MOVE = type { i32, i32, i32* }
345@hash_move = unnamed_addr global [65 x i32] zeroinitializer, align 4
346@current_move = internal global [65 x i32] zeroinitializer, align 4
347@last = internal unnamed_addr global [65 x i32*] zeroinitializer, align 8
348@next_status = internal unnamed_addr global [65 x %struct.NEXT_MOVE] zeroinitializer, align 8
349define fastcc i32 @Search(i64 %idxprom.i, i64 %idxprom.i89, i32 %c) {
350; CHECK-LABEL: @Search
351; CHECK-LABEL: sw.bb.i:
352; CHECK: %[[LD1:.*]] = load i32, i32* %arrayidx185, align 4
353; CHECK: %[[C1:.*]] = icmp eq i32 %[[LD1]], 0
354; CHECK: br i1 %[[C1]], label %sw.bb21.i.thread, label %if.then.i64
355; CHECK-LABEL: sw.bb21.i.thread:
356; CHECK: br label %[[THREAD_TO:.*]]
357; CHECK-LABEL: sw.bb21.i:
358; CHECK: %[[LD2:.*]] = load i32, i32* %arrayidx185, align 4
359; CHECK: %[[C2:.*]] = icmp eq i32 %[[LD2]], 0
360; CHECK:br i1 %[[C2]], label %[[THREAD_TO]], label %cleanup
361entry:
362  %arrayidx185 = getelementptr inbounds [65 x i32], [65 x i32]* @hash_move, i64 0, i64 %idxprom.i
363  %arrayidx307 = getelementptr inbounds [65 x i32], [65 x i32]* @current_move, i64 0, i64 %idxprom.i
364  %arrayidx89 = getelementptr inbounds [65 x i32*], [65 x i32*]* @last, i64 0, i64 %idxprom.i
365  %phase = getelementptr inbounds [65 x %struct.NEXT_MOVE], [65 x %struct.NEXT_MOVE]* @next_status, i64 0, i64 %idxprom.i, i32 0
366  br label %cond.true282
367
368cond.true282:
369  switch i32 %c, label %sw.default.i [
370    i32 1, label %sw.bb.i
371    i32 0, label %sw.bb21.i
372  ]
373
374sw.default.i:
375  br label %cleanup
376
377sw.bb.i:
378  %call.i62 = call fastcc i32* @GenerateCheckEvasions()
379  store i32* %call.i62, i32** %arrayidx89, align 8
380  %l2 = load i32, i32* %arrayidx185, align 4
381  %tobool.i63 = icmp eq i32 %l2, 0
382  br i1 %tobool.i63, label %sw.bb21.i, label %if.then.i64
383
384if.then.i64:                                      ; preds = %sw.bb.i
385  store i32 7, i32* %phase, align 8
386  store i32 %l2, i32* %arrayidx307, align 4
387  %call16.i = call fastcc i32 @ValidMove(i32 %l2)
388  %tobool17.i = icmp eq i32 %call16.i, 0
389  br i1 %tobool17.i, label %if.else.i65, label %cleanup
390
391if.else.i65:
392  call void @f65()
393  br label %sw.bb21.i
394
395sw.bb21.i:
396  store i32 10, i32* %phase, align 8
397  %l3= load i32, i32* %arrayidx185, align 4
398  %tobool27.i = icmp eq i32 %l3, 0
399  br i1 %tobool27.i, label %do.body.preheader.i67, label %cleanup
400
401do.body.preheader.i67:
402  call void @f67()
403  ret  i32 67
404
405cleanup:
406  call void @Cleanup()
407  ret  i32 0
408}
409
410declare fastcc i32* @GenerateCheckEvasions()
411declare fastcc i32 @ValidMove(i32 %move)
412declare void @f67()
413declare void @Cleanup()
414declare void @f65()
415
416define i32 @fn_SinglePred(i1 %c2,i64* %P) {
417; CHECK-LABEL: @fn_SinglePred
418; CHECK-LABEL: entry:
419; CHECK: %[[L1:.*]] = load i64, i64* %P
420; CHECK: br i1 %c, label %cond3, label %cond1
421; CHECK-LABEL: cond2:
422; CHECK-NOT: load
423; CHECK: %[[PHI:.*]] = phi i64 [ %[[L1]], %cond1 ]
424; CHECK: call void @fn2(i64 %[[PHI]])
425; CHECK: br label %end
426; CHECK-LABEL: cond3:
427; CHECK: call void @fn2(i64 %l1)
428; CHECK: call void @fn3(i64 %l1)
429
430entry:
431  %l1 = load i64, i64* %P
432  %c = icmp eq i64 %l1, 0
433  br i1 %c, label %cond2, label %cond1
434
435cond1:
436  br i1 %c2, label %cond2, label %end
437
438cond2:
439  %l2 = load i64, i64* %P
440  call void @fn2(i64 %l2)
441  %c3 = icmp eq i64 %l2,  0
442  br i1 %c3, label %cond3, label %end
443
444cond3:
445  call void @fn3(i64 %l2)
446  br label %end
447
448end:
449  ret i32 0
450}
451
452define i32 @fn_SinglePredMultihop(i1 %c1, i1 %c2,i64* %P) {
453; CHECK-LABEL: @fn_SinglePredMultihop
454; CHECK-LABEL: entry:
455; CHECK: %[[L1:.*]] = load i64, i64* %P
456; CHECK: br i1 %c0, label %cond3, label %cond0
457; CHECK-LABEL: cond2:
458; CHECK-NOT: load
459; CHECK: %[[PHI:.*]] = phi i64 [ %[[L1]], %cond1 ]
460; CHECK: call void @fn2(i64 %[[PHI]])
461; CHECK: br label %end
462; CHECK-LABEL: cond3:
463; CHECK: call void @fn2(i64 %l1)
464; CHECK: call void @fn3(i64 %l1)
465
466entry:
467  %l1 = load i64, i64* %P
468  %c0 = icmp eq i64 %l1, 0
469  br i1 %c0, label %cond2, label %cond0
470
471cond0:
472  br i1 %c1, label %cond1, label %end
473
474cond1:
475  br i1 %c2, label %cond2, label %end
476
477cond2:
478  %l2 = load i64, i64* %P
479  call void @fn2(i64 %l2)
480  %c3 = icmp eq i64 %l2,  0
481  br i1 %c3, label %cond3, label %end
482
483cond3:
484  call void @fn3(i64 %l2)
485  br label %end
486
487end:
488  ret i32 0
489}
490
491declare void @fn2(i64)
492declare void @fn3(i64)
493
494
495; Make sure we phi-translate and make the partially redundant load in
496; merge fully redudant and then we can jump-thread the block with the
497; store.
498;
499; CHECK-LABEL: define i32 @phi_translate_partial_redundant_loads(i32 %0, i32* %1, i32* %2
500; CHECK: merge.thread:
501; CHECK: store
502; CHECK: br label %left_x
503;
504; CHECK: left_x:
505; CHECK-NEXT: ret i32 20
506define i32 @phi_translate_partial_redundant_loads(i32, i32*, i32*)  {
507  %cmp0 = icmp ne i32 %0, 0
508  br i1 %cmp0, label %left, label %right
509
510left:
511  store i32 1, i32* %1, align 4
512  br label %merge
513
514right:
515  br label %merge
516
517merge:
518  %phiptr = phi i32* [ %1, %left ], [ %2, %right ]
519  %newload = load i32, i32* %phiptr, align 4
520  %cmp1 = icmp slt i32 %newload, 5
521  br i1 %cmp1, label %left_x, label %right_x
522
523left_x:
524  ret i32 20
525
526right_x:
527  ret i32 10
528}
529
530; CHECK: ![[RANGE4]] = !{i32 0, i32 1}
531
532!0 = !{!3, !3, i64 0}
533!1 = !{!"omnipotent char", !2}
534!2 = !{!"Simple C/C++ TBAA"}
535!3 = !{!"int", !1}
536!4 = !{ i32 0, i32 1 }
537!5 = !{ i32 8, i32 10 }
538!6 = !{!6}
539!7 = !{!7, !6}
540!8 = !{!8, !6}
541!9 = !{!7}
542!10 = !{!8}
543