1; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=bonaire < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-CI -check-prefix=OPT-CIVI %s
2; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-VI -check-prefix=OPT-CIVI %s
3; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=gfx900 -mattr=-flat-for-global < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-GFX9 %s
4; RUN: llc -march=amdgcn -amdgpu-scalarize-global-loads=false -mcpu=bonaire -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=CIVI %s
5; RUN: llc -march=amdgcn -amdgpu-scalarize-global-loads=false -mcpu=tonga -mattr=-flat-for-global -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=CIVI %s
6; RUN: llc -march=amdgcn -amdgpu-scalarize-global-loads=false -mcpu=gfx900 -mattr=-flat-for-global -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s
7
8; OPT-LABEL: @test_no_sink_flat_small_offset_i32(
9; OPT-CIVI: getelementptr i32, i32* %in
10; OPT-CIVI: br i1
11; OPT-CIVI-NOT: ptrtoint
12
13; OPT-GFX9: br
14; OPT-GFX9: %sunkaddr = getelementptr i8, i8* %0, i64 28
15; OPT-GFX9: %1 = bitcast i8* %sunkaddr to i32*
16; OPT-GFX9: load i32, i32* %1
17
18; GCN-LABEL: {{^}}test_no_sink_flat_small_offset_i32:
19; GCN: flat_load_dword
20; GCN: {{^}}BB0_2:
21define amdgpu_kernel void @test_no_sink_flat_small_offset_i32(i32* %out, i32* %in, i32 %cond) {
22entry:
23  %out.gep = getelementptr i32, i32* %out, i64 999999
24  %in.gep = getelementptr i32, i32* %in, i64 7
25  %tmp0 = icmp eq i32 %cond, 0
26  br i1 %tmp0, label %endif, label %if
27
28if:
29  %tmp1 = load i32, i32* %in.gep
30  br label %endif
31
32endif:
33  %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
34  store i32 %x, i32* %out.gep
35  br label %done
36
37done:
38  ret void
39}
40
41; OPT-LABEL: @test_sink_noop_addrspacecast_flat_to_global_i32(
42; OPT: getelementptr i32, i32* %out,
43; rOPT-CI-NOT: getelementptr
44; OPT: br i1
45
46; OPT-CI: addrspacecast
47; OPT-CI: getelementptr
48; OPT-CI: bitcast
49; OPT: br label
50
51; GCN-LABEL: {{^}}test_sink_noop_addrspacecast_flat_to_global_i32:
52; CI: buffer_load_dword {{v[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:28
53define amdgpu_kernel void @test_sink_noop_addrspacecast_flat_to_global_i32(i32* %out, i32* %in, i32 %cond) {
54entry:
55  %out.gep = getelementptr i32, i32* %out, i64 999999
56  %in.gep = getelementptr i32, i32* %in, i64 7
57  %cast = addrspacecast i32* %in.gep to i32 addrspace(1)*
58  %tmp0 = icmp eq i32 %cond, 0
59  br i1 %tmp0, label %endif, label %if
60
61if:
62  %tmp1 = load i32, i32 addrspace(1)* %cast
63  br label %endif
64
65endif:
66  %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
67  store i32 %x, i32* %out.gep
68  br label %done
69
70done:
71  ret void
72}
73
74; OPT-LABEL: @test_sink_noop_addrspacecast_flat_to_constant_i32(
75; OPT: getelementptr i32, i32* %out,
76; OPT-CI-NOT: getelementptr
77; OPT: br i1
78
79; OPT-CI: addrspacecast
80; OPT-CI: getelementptr
81; OPT-CI: bitcast
82; OPT: br label
83
84; GCN-LABEL: {{^}}test_sink_noop_addrspacecast_flat_to_constant_i32:
85; CI: s_load_dword {{s[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd
86define amdgpu_kernel void @test_sink_noop_addrspacecast_flat_to_constant_i32(i32* %out, i32* %in, i32 %cond) {
87entry:
88  %out.gep = getelementptr i32, i32* %out, i64 999999
89  %in.gep = getelementptr i32, i32* %in, i64 7
90  %cast = addrspacecast i32* %in.gep to i32 addrspace(4)*
91  %tmp0 = icmp eq i32 %cond, 0
92  br i1 %tmp0, label %endif, label %if
93
94if:
95  %tmp1 = load i32, i32 addrspace(4)* %cast
96  br label %endif
97
98endif:
99  %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
100  store i32 %x, i32* %out.gep
101  br label %done
102
103done:
104  ret void
105}
106
107; OPT-LABEL: @test_sink_flat_small_max_flat_offset(
108; OPT-CIVI: %in.gep = getelementptr i8, i8* %in, i64 4095
109; OPT-CIVI: br
110; OPT-CIVI-NOT: getelementptr
111; OPT-CIVI: load i8, i8* %in.gep
112
113; OPT-GFX9: br
114; OPT-GFX9: %sunkaddr = getelementptr i8, i8* %in, i64 4095
115; OPT-GFX9: load i8, i8* %sunkaddr
116
117; GCN-LABEL: {{^}}test_sink_flat_small_max_flat_offset:
118; GFX9: flat_load_sbyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}} offset:4095{{$}}
119; CIVI: flat_load_sbyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]$}}
120define amdgpu_kernel void @test_sink_flat_small_max_flat_offset(i32* %out, i8* %in) #1 {
121entry:
122  %out.gep = getelementptr i32, i32* %out, i32 1024
123  %in.gep = getelementptr i8, i8* %in, i64 4095
124  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
125  %tmp0 = icmp eq i32 %tid, 0
126  br i1 %tmp0, label %endif, label %if
127
128if:
129  %tmp1 = load i8, i8* %in.gep
130  %tmp2 = sext i8 %tmp1 to i32
131  br label %endif
132
133endif:
134  %x = phi i32 [ %tmp2, %if ], [ 0, %entry ]
135  store i32 %x, i32* %out.gep
136  br label %done
137
138done:
139  ret void
140}
141
142; OPT-LABEL: @test_sink_flat_small_max_plus_1_flat_offset(
143; OPT: %in.gep = getelementptr i8, i8* %in, i64 4096
144; OPT: br
145; OPT-NOT: getelementptr
146; OPT: load i8, i8* %in.gep
147
148; GCN-LABEL: {{^}}test_sink_flat_small_max_plus_1_flat_offset:
149; GCN: flat_load_sbyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]$}}
150define amdgpu_kernel void @test_sink_flat_small_max_plus_1_flat_offset(i32* %out, i8* %in) #1 {
151entry:
152  %out.gep = getelementptr i32, i32* %out, i64 99999
153  %in.gep = getelementptr i8, i8* %in, i64 4096
154  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
155  %tmp0 = icmp eq i32 %tid, 0
156  br i1 %tmp0, label %endif, label %if
157
158if:
159  %tmp1 = load i8, i8* %in.gep
160  %tmp2 = sext i8 %tmp1 to i32
161  br label %endif
162
163endif:
164  %x = phi i32 [ %tmp2, %if ], [ 0, %entry ]
165  store i32 %x, i32* %out.gep
166  br label %done
167
168done:
169  ret void
170}
171
172; OPT-LABEL: @test_no_sink_flat_reg_offset(
173; OPT: %in.gep = getelementptr i8, i8* %in, i64 %reg
174; OPT: br
175
176; OPT-NOT: getelementptr
177; OPT: load i8, i8* %in.gep
178
179; GCN-LABEL: {{^}}test_no_sink_flat_reg_offset:
180; GCN: flat_load_sbyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]$}}
181define amdgpu_kernel void @test_no_sink_flat_reg_offset(i32* %out, i8* %in, i64 %reg) #1 {
182entry:
183  %out.gep = getelementptr i32, i32* %out, i32 1024
184  %in.gep = getelementptr i8, i8* %in, i64 %reg
185  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
186  %tmp0 = icmp eq i32 %tid, 0
187  br i1 %tmp0, label %endif, label %if
188
189if:
190  %tmp1 = load i8, i8* %in.gep
191  %tmp2 = sext i8 %tmp1 to i32
192  br label %endif
193
194endif:
195  %x = phi i32 [ %tmp2, %if ], [ 0, %entry ]
196  store i32 %x, i32* %out.gep
197  br label %done
198
199done:
200  ret void
201}
202
203declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0
204
205attributes #0 = { nounwind readnone }
206attributes #1 = { nounwind }
207attributes #2 = { nounwind argmemonly }
208