1; RUN: llc -mtriple amdgcn--amdhsa -mcpu=fiji -amdgpu-scalarize-global-loads=true -verify-machineinstrs  < %s | FileCheck %s
2
3; CHECK-LABEL: %bb22
4
5; Load from %arg has alias store in Loop
6
7; CHECK: flat_load_dword
8
9; #####################################################################
10
11; Load from %arg1 has no-alias store in Loop - arg1[i+1] never alias arg1[i]
12; However, our analysis cannot detect this.
13
14; CHECK: flat_load_dword
15
16; #####################################################################
17
18; CHECK-LABEL: %bb11
19
20; Load from %arg in a Loop body has alias store
21
22; CHECK: flat_load_dword
23
24; CHECK-LABEL: %bb20
25
26; CHECK: flat_store_dword
27
28define amdgpu_kernel void @cfg(i32 addrspace(1)* nocapture readonly %arg, i32 addrspace(1)* nocapture %arg1, i32 %arg2) #0 {
29bb:
30  %tmp = sext i32 %arg2 to i64
31  %tmp3 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp
32  %tmp4 = load i32, i32 addrspace(1)* %tmp3, align 4, !tbaa !0
33  %tmp5 = icmp sgt i32 %tmp4, 0
34  br i1 %tmp5, label %bb6, label %bb8
35
36bb6:                                              ; preds = %bb
37  br label %bb11
38
39bb7:                                              ; preds = %bb22
40  br label %bb8
41
42bb8:                                              ; preds = %bb7, %bb
43  %tmp9 = phi i32 [ 0, %bb ], [ %tmp30, %bb7 ]
44  %tmp10 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i64 %tmp
45  store i32 %tmp9, i32 addrspace(1)* %tmp10, align 4, !tbaa !0
46  ret void
47
48bb11:                                             ; preds = %bb22, %bb6
49  %tmp12 = phi i32 [ %tmp30, %bb22 ], [ 0, %bb6 ]
50  %tmp13 = phi i32 [ %tmp25, %bb22 ], [ 0, %bb6 ]
51  %tmp14 = srem i32 %tmp13, %arg2
52  %tmp15 = sext i32 %tmp14 to i64
53  %tmp16 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp15
54  %tmp17 = load i32, i32 addrspace(1)* %tmp16, align 4, !tbaa !0
55  %tmp18 = icmp sgt i32 %tmp17, 100
56  %tmp19 = sext i32 %tmp13 to i64
57  br i1 %tmp18, label %bb20, label %bb22
58
59bb20:                                             ; preds = %bb11
60  %tmp21 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i64 %tmp19
61  store i32 0, i32 addrspace(1)* %tmp21, align 4, !tbaa !0
62  br label %bb22
63
64bb22:                                             ; preds = %bb20, %bb11
65  %tmp23 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp19
66  %tmp24 = load i32, i32 addrspace(1)* %tmp23, align 4, !tbaa !0
67  %tmp25 = add nuw nsw i32 %tmp13, 1
68  %tmp26 = sext i32 %tmp25 to i64
69  %tmp27 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i64 %tmp26
70  %tmp28 = load i32, i32 addrspace(1)* %tmp27, align 4, !tbaa !0
71  %tmp29 = add i32 %tmp24, %tmp12
72  %tmp30 = add i32 %tmp29, %tmp28
73  %tmp31 = icmp eq i32 %tmp25, %tmp4
74  br i1 %tmp31, label %bb7, label %bb11
75}
76
77; one more test to ensure that aliasing store after the load
78; is considered clobbering if load parent block is the same
79; as a loop header block.
80
81; CHECK-LABEL: %bb1
82
83; Load from %arg has alias store that is after the load
84; but is considered clobbering because of the loop.
85
86; CHECK: flat_load_dword
87
88define amdgpu_kernel void @cfg_selfloop(i32 addrspace(1)* nocapture readonly %arg, i32 addrspace(1)* nocapture %arg1, i32 %arg2) #0 {
89bb:
90  br label %bb1
91
92bb2:
93  ret void
94
95bb1:
96  %tmp13 = phi i32 [ %tmp25, %bb1 ], [ 0, %bb ]
97  %tmp14 = srem i32 %tmp13, %arg2
98  %tmp15 = sext i32 %tmp14 to i64
99  %tmp16 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp15
100  %tmp17 = load i32, i32 addrspace(1)* %tmp16, align 4, !tbaa !0
101  %tmp19 = sext i32 %tmp13 to i64
102  %tmp21 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i64 %tmp19
103  store i32 %tmp17, i32 addrspace(1)* %tmp21, align 4, !tbaa !0
104  %tmp25 = add nuw nsw i32 %tmp13, 1
105  %tmp31 = icmp eq i32 %tmp25, 100
106  br i1 %tmp31, label %bb2, label %bb1
107}
108
109
110attributes #0 = { "target-cpu"="fiji" }
111
112!0 = !{!1, !1, i64 0}
113!1 = !{!"int", !2, i64 0}
114!2 = !{!"omnipotent char", !3, i64 0}
115!3 = !{!"Simple C/C++ TBAA"}
116