1; RUN: opt -S -mtriple=amdgcn-- -codegenprepare < %s | FileCheck -check-prefix=OPT %s
2; RUN: opt -S -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global -codegenprepare < %s | FileCheck -check-prefix=OPT %s
3; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
4; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
5
6; This particular case will actually be worse in terms of code size
7; from sinking into both.
8
9; OPT-LABEL: @sink_ubfe_i32(
10; OPT: entry:
11; OPT-NEXT: br i1
12
13; OPT: bb0:
14; OPT: %0 = lshr i32 %arg1, 8
15; OPT-NEXT: %val0 = and i32 %0, 255
16; OPT: br label
17
18; OPT: bb1:
19; OPT: %1 = lshr i32 %arg1, 8
20; OPT-NEXT: %val1 = and i32 %1, 127
21; OPT: br label
22
23; OPT: ret:
24; OPT: store
25; OPT: ret
26
27
28; GCN-LABEL: {{^}}sink_ubfe_i32:
29; GCN-NOT: lshr
30; GCN: s_cbranch_scc{{[0-1]}}
31
32; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x70008
33; GCN: BB0_3:
34; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80008
35
36; GCN: buffer_store_dword
37; GCN: s_endpgm
38define amdgpu_kernel void @sink_ubfe_i32(i32 addrspace(1)* %out, i32 %arg1) #0 {
39entry:
40  %shr = lshr i32 %arg1, 8
41  br i1 undef, label %bb0, label %bb1
42
43bb0:
44  %val0 = and i32 %shr, 255
45  store volatile i32 0, i32 addrspace(1)* undef
46  br label %ret
47
48bb1:
49  %val1 = and i32 %shr, 127
50  store volatile i32 0, i32 addrspace(1)* undef
51  br label %ret
52
53ret:
54  %phi = phi i32 [ %val0, %bb0 ], [ %val1, %bb1 ]
55  store i32 %phi, i32 addrspace(1)* %out
56  ret void
57}
58
59; OPT-LABEL: @sink_sbfe_i32(
60; OPT: entry:
61; OPT-NEXT: br i1
62
63; OPT: bb0:
64; OPT: %0 = ashr i32 %arg1, 8
65; OPT-NEXT: %val0 = and i32 %0, 255
66; OPT: br label
67
68; OPT: bb1:
69; OPT: %1 = ashr i32 %arg1, 8
70; OPT-NEXT: %val1 = and i32 %1, 127
71; OPT: br label
72
73; OPT: ret:
74; OPT: store
75; OPT: ret
76
77; GCN-LABEL: {{^}}sink_sbfe_i32:
78define amdgpu_kernel void @sink_sbfe_i32(i32 addrspace(1)* %out, i32 %arg1) #0 {
79entry:
80  %shr = ashr i32 %arg1, 8
81  br i1 undef, label %bb0, label %bb1
82
83bb0:
84  %val0 = and i32 %shr, 255
85  store volatile i32 0, i32 addrspace(1)* undef
86  br label %ret
87
88bb1:
89  %val1 = and i32 %shr, 127
90  store volatile i32 0, i32 addrspace(1)* undef
91  br label %ret
92
93ret:
94  %phi = phi i32 [ %val0, %bb0 ], [ %val1, %bb1 ]
95  store i32 %phi, i32 addrspace(1)* %out
96  ret void
97}
98
99
100; OPT-LABEL: @sink_ubfe_i16(
101; OPT: entry:
102; OPT-NEXT: br i1
103
104; OPT: bb0:
105; OPT: %0 = lshr i16 %arg1, 4
106; OPT-NEXT: %val0 = and i16 %0, 255
107; OPT: br label
108
109; OPT: bb1:
110; OPT: %1 = lshr i16 %arg1, 4
111; OPT-NEXT: %val1 = and i16 %1, 127
112; OPT: br label
113
114; OPT: ret:
115; OPT: store
116; OPT: ret
117
118; For GFX8: since i16 is legal type, we cannot sink lshr into BBs.
119
120; GCN-LABEL: {{^}}sink_ubfe_i16:
121; GCN-NOT: lshr
122; VI: s_load_dword [[ARG:s[0-9]+]], s[0:1], 0x2c
123; VI: s_bfe_u32 [[BFE:s[0-9]+]], [[ARG]], 0xc0004
124; GCN: s_cbranch_scc{{[0-1]}}
125
126; SI: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x70004
127; VI: v_mov_b32_e32 v{{[0-9]+}}, 0x7f
128
129; GCN: BB2_3:
130; SI: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80004
131; VI: v_mov_b32_e32 v{{[0-9]+}}, 0xff
132
133; GCN: buffer_store_short
134; GCN: s_endpgm
135define amdgpu_kernel void @sink_ubfe_i16(i16 addrspace(1)* %out, i16 %arg1) #0 {
136entry:
137  %shr = lshr i16 %arg1, 4
138  br i1 undef, label %bb0, label %bb1
139
140bb0:
141  %val0 = and i16 %shr, 255
142  store volatile i16 0, i16 addrspace(1)* undef
143  br label %ret
144
145bb1:
146  %val1 = and i16 %shr, 127
147  store volatile i16 0, i16 addrspace(1)* undef
148  br label %ret
149
150ret:
151  %phi = phi i16 [ %val0, %bb0 ], [ %val1, %bb1 ]
152  store i16 %phi, i16 addrspace(1)* %out
153  ret void
154}
155
156; We don't really want to sink this one since it isn't reducible to a
157; 32-bit BFE on one half of the integer.
158
159; OPT-LABEL: @sink_ubfe_i64_span_midpoint(
160; OPT: entry:
161; OPT-NOT: lshr
162; OPT: br i1
163
164; OPT: bb0:
165; OPT: %0 = lshr i64 %arg1, 30
166; OPT-NEXT: %val0 = and i64 %0, 255
167
168; OPT: bb1:
169; OPT: %1 = lshr i64 %arg1, 30
170; OPT-NEXT: %val1 = and i64 %1, 127
171
172; OPT: ret:
173; OPT: store
174; OPT: ret
175
176; GCN-LABEL: {{^}}sink_ubfe_i64_span_midpoint:
177
178; GCN: s_cbranch_scc{{[0-1]}} BB3_2
179; GCN: v_alignbit_b32 v[[LO:[0-9]+]], s{{[0-9]+}}, v{{[0-9]+}}, 30
180; GCN: v_and_b32_e32 v{{[0-9]+}}, 0x7f, v[[LO]]
181
182; GCN: BB3_3:
183; GCN: v_and_b32_e32 v{{[0-9]+}}, 0xff, v[[LO]]
184
185; GCN: buffer_store_dwordx2
186define amdgpu_kernel void @sink_ubfe_i64_span_midpoint(i64 addrspace(1)* %out, i64 %arg1) #0 {
187entry:
188  %shr = lshr i64 %arg1, 30
189  br i1 undef, label %bb0, label %bb1
190
191bb0:
192  %val0 = and i64 %shr, 255
193  store volatile i32 0, i32 addrspace(1)* undef
194  br label %ret
195
196bb1:
197  %val1 = and i64 %shr, 127
198  store volatile i32 0, i32 addrspace(1)* undef
199  br label %ret
200
201ret:
202  %phi = phi i64 [ %val0, %bb0 ], [ %val1, %bb1 ]
203  store i64 %phi, i64 addrspace(1)* %out
204  ret void
205}
206
207; OPT-LABEL: @sink_ubfe_i64_low32(
208; OPT: entry:
209; OPT-NOT: lshr
210; OPT: br i1
211
212; OPT: bb0:
213; OPT: %0 = lshr i64 %arg1, 15
214; OPT-NEXT: %val0 = and i64 %0, 255
215
216; OPT: bb1:
217; OPT: %1 = lshr i64 %arg1, 15
218; OPT-NEXT: %val1 = and i64 %1, 127
219
220; OPT: ret:
221; OPT: store
222; OPT: ret
223
224; GCN-LABEL: {{^}}sink_ubfe_i64_low32:
225
226; GCN: s_cbranch_scc{{[0-1]}} BB4_2
227
228; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x7000f
229
230; GCN: BB4_3:
231; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x8000f
232
233; GCN: buffer_store_dwordx2
234define amdgpu_kernel void @sink_ubfe_i64_low32(i64 addrspace(1)* %out, i64 %arg1) #0 {
235entry:
236  %shr = lshr i64 %arg1, 15
237  br i1 undef, label %bb0, label %bb1
238
239bb0:
240  %val0 = and i64 %shr, 255
241  store volatile i32 0, i32 addrspace(1)* undef
242  br label %ret
243
244bb1:
245  %val1 = and i64 %shr, 127
246  store volatile i32 0, i32 addrspace(1)* undef
247  br label %ret
248
249ret:
250  %phi = phi i64 [ %val0, %bb0 ], [ %val1, %bb1 ]
251  store i64 %phi, i64 addrspace(1)* %out
252  ret void
253}
254
255; OPT-LABEL: @sink_ubfe_i64_high32(
256; OPT: entry:
257; OPT-NOT: lshr
258; OPT: br i1
259
260; OPT: bb0:
261; OPT: %0 = lshr i64 %arg1, 35
262; OPT-NEXT: %val0 = and i64 %0, 255
263
264; OPT: bb1:
265; OPT: %1 = lshr i64 %arg1, 35
266; OPT-NEXT: %val1 = and i64 %1, 127
267
268; OPT: ret:
269; OPT: store
270; OPT: ret
271
272; GCN-LABEL: {{^}}sink_ubfe_i64_high32:
273; GCN: s_cbranch_scc{{[0-1]}} BB5_2
274; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x70003
275
276; GCN: BB5_3:
277; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80003
278
279; GCN: buffer_store_dwordx2
280define amdgpu_kernel void @sink_ubfe_i64_high32(i64 addrspace(1)* %out, i64 %arg1) #0 {
281entry:
282  %shr = lshr i64 %arg1, 35
283  br i1 undef, label %bb0, label %bb1
284
285bb0:
286  %val0 = and i64 %shr, 255
287  store volatile i32 0, i32 addrspace(1)* undef
288  br label %ret
289
290bb1:
291  %val1 = and i64 %shr, 127
292  store volatile i32 0, i32 addrspace(1)* undef
293  br label %ret
294
295ret:
296  %phi = phi i64 [ %val0, %bb0 ], [ %val1, %bb1 ]
297  store i64 %phi, i64 addrspace(1)* %out
298  ret void
299}
300
301attributes #0 = { nounwind }
302