1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti -amdgpu-load-store-vectorizer=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX6 %s
3
4define i32 @v_bfe_i32_arg_arg_arg(i32 %src0, i32 %src1, i32 %src2) #0 {
5; GFX6-LABEL: v_bfe_i32_arg_arg_arg:
6; GFX6:       ; %bb.0:
7; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8; GFX6-NEXT:    v_bfe_u32 v0, v0, v1, v2
9; GFX6-NEXT:    s_setpc_b64 s[30:31]
10  %bfe_i32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 %src2)
11  ret i32 %bfe_i32
12}
13
14define amdgpu_ps i32 @s_bfe_i32_arg_arg_arg(i32 inreg %src0, i32 inreg %src1, i32 inreg %src2) #0 {
15; GFX6-LABEL: s_bfe_i32_arg_arg_arg:
16; GFX6:       ; %bb.0:
17; GFX6-NEXT:    s_and_b32 s1, s1, 63
18; GFX6-NEXT:    s_lshl_b32 s2, s2, 16
19; GFX6-NEXT:    s_or_b32 s1, s1, s2
20; GFX6-NEXT:    s_bfe_u32 s0, s0, s1
21; GFX6-NEXT:    ; return to shader part epilog
22  %bfe_i32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 %src2)
23  ret i32 %bfe_i32
24}
25
26; TODO: Need to expand this.
27; define i64 @v_bfe_i64_arg_arg_arg(i64 %src0, i32 %src1, i32 %src2) #0 {
28;   %bfe_i64 = call i32 @llvm.amdgcn.ubfe.i64(i32 %src0, i32 %src1, i32 %src2)
29;   ret i64 %bfe_i64
30; }
31
32define amdgpu_ps i64 @s_bfe_i64_arg_arg_arg(i64 inreg %src0, i32 inreg %src1, i32 inreg %src2) #0 {
33; GFX6-LABEL: s_bfe_i64_arg_arg_arg:
34; GFX6:       ; %bb.0:
35; GFX6-NEXT:    s_and_b32 s2, s2, 63
36; GFX6-NEXT:    s_lshl_b32 s3, s3, 16
37; GFX6-NEXT:    s_or_b32 s2, s2, s3
38; GFX6-NEXT:    s_bfe_u64 s[0:1], s[0:1], s2
39; GFX6-NEXT:    ; return to shader part epilog
40  %bfe_i32 = call i64 @llvm.amdgcn.ubfe.i64(i64 %src0, i32 %src1, i32 %src2)
41  ret i64 %bfe_i32
42}
43
44define amdgpu_kernel void @bfe_u32_arg_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #0 {
45; GFX6-LABEL: bfe_u32_arg_arg_arg:
46; GFX6:       ; %bb.0:
47; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
48; GFX6-NEXT:    s_load_dword s2, s[0:1], 0xb
49; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xc
50; GFX6-NEXT:    s_mov_b32 s6, -1
51; GFX6-NEXT:    s_mov_b32 s7, 0xf000
52; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
53; GFX6-NEXT:    s_and_b32 s1, s0, 63
54; GFX6-NEXT:    s_lshl_b32 s0, s0, 16
55; GFX6-NEXT:    s_or_b32 s0, s1, s0
56; GFX6-NEXT:    s_bfe_u32 s0, s2, s0
57; GFX6-NEXT:    v_mov_b32_e32 v0, s0
58; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
59; GFX6-NEXT:    s_endpgm
60  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 %src1)
61  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
62  ret void
63}
64
65define amdgpu_kernel void @bfe_u32_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 {
66; GFX6-LABEL: bfe_u32_arg_arg_imm:
67; GFX6:       ; %bb.0:
68; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
69; GFX6-NEXT:    s_load_dword s2, s[0:1], 0xb
70; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xc
71; GFX6-NEXT:    s_mov_b32 s6, -1
72; GFX6-NEXT:    s_mov_b32 s7, 0xf000
73; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
74; GFX6-NEXT:    s_and_b32 s0, s0, 63
75; GFX6-NEXT:    s_or_b32 s0, s0, 0x7b0000
76; GFX6-NEXT:    s_bfe_u32 s0, s2, s0
77; GFX6-NEXT:    v_mov_b32_e32 v0, s0
78; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
79; GFX6-NEXT:    s_endpgm
80  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 123)
81  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
82  ret void
83}
84
85define amdgpu_kernel void @bfe_u32_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src2) #0 {
86; GFX6-LABEL: bfe_u32_arg_imm_arg:
87; GFX6:       ; %bb.0:
88; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
89; GFX6-NEXT:    s_load_dword s2, s[0:1], 0xb
90; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xc
91; GFX6-NEXT:    s_mov_b32 s6, -1
92; GFX6-NEXT:    s_mov_b32 s7, 0xf000
93; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
94; GFX6-NEXT:    s_lshl_b32 s0, s0, 16
95; GFX6-NEXT:    s_or_b32 s0, 59, s0
96; GFX6-NEXT:    s_bfe_u32 s0, s2, s0
97; GFX6-NEXT:    v_mov_b32_e32 v0, s0
98; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
99; GFX6-NEXT:    s_endpgm
100  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 123, i32 %src2)
101  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
102  ret void
103}
104
105define amdgpu_kernel void @bfe_u32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) #0 {
106; GFX6-LABEL: bfe_u32_imm_arg_arg:
107; GFX6:       ; %bb.0:
108; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
109; GFX6-NEXT:    s_load_dword s2, s[0:1], 0xb
110; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xc
111; GFX6-NEXT:    s_mov_b32 s6, -1
112; GFX6-NEXT:    s_mov_b32 s7, 0xf000
113; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
114; GFX6-NEXT:    s_and_b32 s1, s2, 63
115; GFX6-NEXT:    s_lshl_b32 s0, s0, 16
116; GFX6-NEXT:    s_or_b32 s0, s1, s0
117; GFX6-NEXT:    s_bfe_u32 s0, 0x7b, s0
118; GFX6-NEXT:    v_mov_b32_e32 v0, s0
119; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
120; GFX6-NEXT:    s_endpgm
121  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 123, i32 %src1, i32 %src2)
122  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
123  ret void
124}
125
126define amdgpu_kernel void @bfe_u32_arg_0_width_reg_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 {
127; GFX6-LABEL: bfe_u32_arg_0_width_reg_offset:
128; GFX6:       ; %bb.0:
129; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
130; GFX6-NEXT:    s_load_dword s2, s[0:1], 0xb
131; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xc
132; GFX6-NEXT:    s_mov_b32 s6, -1
133; GFX6-NEXT:    s_mov_b32 s7, 0xf000
134; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
135; GFX6-NEXT:    s_and_b32 s0, s0, 63
136; GFX6-NEXT:    s_bfe_u32 s0, s2, s0
137; GFX6-NEXT:    v_mov_b32_e32 v0, s0
138; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
139; GFX6-NEXT:    s_endpgm
140  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 0)
141  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
142  ret void
143}
144
145define amdgpu_kernel void @bfe_u32_arg_0_width_imm_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 {
146; GFX6-LABEL: bfe_u32_arg_0_width_imm_offset:
147; GFX6:       ; %bb.0:
148; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
149; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
150; GFX6-NEXT:    s_mov_b32 s6, -1
151; GFX6-NEXT:    s_mov_b32 s7, 0xf000
152; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
153; GFX6-NEXT:    s_bfe_u32 s0, s0, 8
154; GFX6-NEXT:    v_mov_b32_e32 v0, s0
155; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
156; GFX6-NEXT:    s_endpgm
157  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 8, i32 0)
158  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
159  ret void
160}
161
162define amdgpu_kernel void @bfe_u32_zextload_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) #0 {
163; GFX6-LABEL: bfe_u32_zextload_i8:
164; GFX6:       ; %bb.0:
165; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
166; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
167; GFX6-NEXT:    s_mov_b32 s2, -1
168; GFX6-NEXT:    s_mov_b32 s3, 0xf000
169; GFX6-NEXT:    s_mov_b64 s[6:7], s[2:3]
170; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
171; GFX6-NEXT:    buffer_load_ubyte v0, off, s[0:3], 0
172; GFX6-NEXT:    s_waitcnt vmcnt(0)
173; GFX6-NEXT:    v_bfe_u32 v0, v0, 0, 8
174; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
175; GFX6-NEXT:    s_endpgm
176  %load = load i8, i8 addrspace(1)* %in
177  %ext = zext i8 %load to i32
178  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 0, i32 8)
179  store i32 %bfe, i32 addrspace(1)* %out, align 4
180  ret void
181}
182
183; FIXME: Should be using s_add_i32
184define amdgpu_kernel void @bfe_u32_zext_in_reg_i8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
185; GFX6-LABEL: bfe_u32_zext_in_reg_i8:
186; GFX6:       ; %bb.0:
187; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
188; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
189; GFX6-NEXT:    s_mov_b32 s6, -1
190; GFX6-NEXT:    s_mov_b32 s7, 0xf000
191; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
192; GFX6-NEXT:    s_load_dword s0, s[0:1], 0x0
193; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
194; GFX6-NEXT:    s_add_i32 s0, s0, 1
195; GFX6-NEXT:    s_and_b32 s0, s0, 0xff
196; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x80000
197; GFX6-NEXT:    v_mov_b32_e32 v0, s0
198; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
199; GFX6-NEXT:    s_endpgm
200  %load = load i32, i32 addrspace(1)* %in, align 4
201  %add = add i32 %load, 1
202  %ext = and i32 %add, 255
203  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 0, i32 8)
204  store i32 %bfe, i32 addrspace(1)* %out, align 4
205  ret void
206}
207
208define amdgpu_kernel void @bfe_u32_zext_in_reg_i16(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
209; GFX6-LABEL: bfe_u32_zext_in_reg_i16:
210; GFX6:       ; %bb.0:
211; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
212; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
213; GFX6-NEXT:    s_mov_b32 s6, -1
214; GFX6-NEXT:    s_mov_b32 s7, 0xf000
215; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
216; GFX6-NEXT:    s_load_dword s0, s[0:1], 0x0
217; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
218; GFX6-NEXT:    s_add_i32 s0, s0, 1
219; GFX6-NEXT:    s_and_b32 s0, s0, 0xffff
220; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x100000
221; GFX6-NEXT:    v_mov_b32_e32 v0, s0
222; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
223; GFX6-NEXT:    s_endpgm
224  %load = load i32, i32 addrspace(1)* %in, align 4
225  %add = add i32 %load, 1
226  %ext = and i32 %add, 65535
227  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 0, i32 16)
228  store i32 %bfe, i32 addrspace(1)* %out, align 4
229  ret void
230}
231
232define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
233; GFX6-LABEL: bfe_u32_zext_in_reg_i8_offset_1:
234; GFX6:       ; %bb.0:
235; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
236; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
237; GFX6-NEXT:    s_mov_b32 s6, -1
238; GFX6-NEXT:    s_mov_b32 s7, 0xf000
239; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
240; GFX6-NEXT:    s_load_dword s0, s[0:1], 0x0
241; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
242; GFX6-NEXT:    s_add_i32 s0, s0, 1
243; GFX6-NEXT:    s_and_b32 s0, s0, 0xff
244; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x80001
245; GFX6-NEXT:    v_mov_b32_e32 v0, s0
246; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
247; GFX6-NEXT:    s_endpgm
248  %load = load i32, i32 addrspace(1)* %in, align 4
249  %add = add i32 %load, 1
250  %ext = and i32 %add, 255
251  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 1, i32 8)
252  store i32 %bfe, i32 addrspace(1)* %out, align 4
253  ret void
254}
255
256define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
257; GFX6-LABEL: bfe_u32_zext_in_reg_i8_offset_3:
258; GFX6:       ; %bb.0:
259; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
260; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
261; GFX6-NEXT:    s_mov_b32 s6, -1
262; GFX6-NEXT:    s_mov_b32 s7, 0xf000
263; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
264; GFX6-NEXT:    s_load_dword s0, s[0:1], 0x0
265; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
266; GFX6-NEXT:    s_add_i32 s0, s0, 1
267; GFX6-NEXT:    s_and_b32 s0, s0, 0xff
268; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x80003
269; GFX6-NEXT:    v_mov_b32_e32 v0, s0
270; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
271; GFX6-NEXT:    s_endpgm
272  %load = load i32, i32 addrspace(1)* %in, align 4
273  %add = add i32 %load, 1
274  %ext = and i32 %add, 255
275  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 3, i32 8)
276  store i32 %bfe, i32 addrspace(1)* %out, align 4
277  ret void
278}
279
280define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
281; GFX6-LABEL: bfe_u32_zext_in_reg_i8_offset_7:
282; GFX6:       ; %bb.0:
283; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
284; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
285; GFX6-NEXT:    s_mov_b32 s6, -1
286; GFX6-NEXT:    s_mov_b32 s7, 0xf000
287; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
288; GFX6-NEXT:    s_load_dword s0, s[0:1], 0x0
289; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
290; GFX6-NEXT:    s_add_i32 s0, s0, 1
291; GFX6-NEXT:    s_and_b32 s0, s0, 0xff
292; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x80007
293; GFX6-NEXT:    v_mov_b32_e32 v0, s0
294; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
295; GFX6-NEXT:    s_endpgm
296  %load = load i32, i32 addrspace(1)* %in, align 4
297  %add = add i32 %load, 1
298  %ext = and i32 %add, 255
299  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 7, i32 8)
300  store i32 %bfe, i32 addrspace(1)* %out, align 4
301  ret void
302}
303
304define amdgpu_kernel void @bfe_u32_zext_in_reg_i16_offset_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
305; GFX6-LABEL: bfe_u32_zext_in_reg_i16_offset_8:
306; GFX6:       ; %bb.0:
307; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
308; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
309; GFX6-NEXT:    s_mov_b32 s6, -1
310; GFX6-NEXT:    s_mov_b32 s7, 0xf000
311; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
312; GFX6-NEXT:    s_load_dword s0, s[0:1], 0x0
313; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
314; GFX6-NEXT:    s_add_i32 s0, s0, 1
315; GFX6-NEXT:    s_and_b32 s0, s0, 0xffff
316; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x80008
317; GFX6-NEXT:    v_mov_b32_e32 v0, s0
318; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
319; GFX6-NEXT:    s_endpgm
320  %load = load i32, i32 addrspace(1)* %in, align 4
321  %add = add i32 %load, 1
322  %ext = and i32 %add, 65535
323  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 8, i32 8)
324  store i32 %bfe, i32 addrspace(1)* %out, align 4
325  ret void
326}
327
328define amdgpu_kernel void @bfe_u32_test_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
329; GFX6-LABEL: bfe_u32_test_1:
330; GFX6:       ; %bb.0:
331; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
332; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
333; GFX6-NEXT:    s_mov_b32 s6, -1
334; GFX6-NEXT:    s_mov_b32 s7, 0xf000
335; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
336; GFX6-NEXT:    s_load_dword s0, s[0:1], 0x0
337; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
338; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x10000
339; GFX6-NEXT:    v_mov_b32_e32 v0, s0
340; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
341; GFX6-NEXT:    s_endpgm
342  %x = load i32, i32 addrspace(1)* %in, align 4
343  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 0, i32 1)
344  store i32 %bfe, i32 addrspace(1)* %out, align 4
345  ret void
346}
347
348define amdgpu_kernel void @bfe_u32_test_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
349; GFX6-LABEL: bfe_u32_test_2:
350; GFX6:       ; %bb.0:
351; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
352; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
353; GFX6-NEXT:    s_mov_b32 s6, -1
354; GFX6-NEXT:    s_mov_b32 s7, 0xf000
355; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
356; GFX6-NEXT:    s_load_dword s0, s[0:1], 0x0
357; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
358; GFX6-NEXT:    s_lshl_b32 s0, s0, 31
359; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x80000
360; GFX6-NEXT:    v_mov_b32_e32 v0, s0
361; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
362; GFX6-NEXT:    s_endpgm
363  %x = load i32, i32 addrspace(1)* %in, align 4
364  %shl = shl i32 %x, 31
365  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 0, i32 8)
366  store i32 %bfe, i32 addrspace(1)* %out, align 4
367  ret void
368}
369
370define amdgpu_kernel void @bfe_u32_test_3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
371; GFX6-LABEL: bfe_u32_test_3:
372; GFX6:       ; %bb.0:
373; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
374; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
375; GFX6-NEXT:    s_mov_b32 s6, -1
376; GFX6-NEXT:    s_mov_b32 s7, 0xf000
377; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
378; GFX6-NEXT:    s_load_dword s0, s[0:1], 0x0
379; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
380; GFX6-NEXT:    s_lshl_b32 s0, s0, 31
381; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x10000
382; GFX6-NEXT:    v_mov_b32_e32 v0, s0
383; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
384; GFX6-NEXT:    s_endpgm
385  %x = load i32, i32 addrspace(1)* %in, align 4
386  %shl = shl i32 %x, 31
387  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 0, i32 1)
388  store i32 %bfe, i32 addrspace(1)* %out, align 4
389  ret void
390}
391
392define amdgpu_kernel void @bfe_u32_test_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
393; GFX6-LABEL: bfe_u32_test_4:
394; GFX6:       ; %bb.0:
395; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
396; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
397; GFX6-NEXT:    s_mov_b32 s6, -1
398; GFX6-NEXT:    s_mov_b32 s7, 0xf000
399; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
400; GFX6-NEXT:    s_load_dword s0, s[0:1], 0x0
401; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
402; GFX6-NEXT:    s_lshl_b32 s0, s0, 31
403; GFX6-NEXT:    s_lshr_b32 s0, s0, 31
404; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x1001f
405; GFX6-NEXT:    v_mov_b32_e32 v0, s0
406; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
407; GFX6-NEXT:    s_endpgm
408  %x = load i32, i32 addrspace(1)* %in, align 4
409  %shl = shl i32 %x, 31
410  %shr = lshr i32 %shl, 31
411  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shr, i32 31, i32 1)
412  store i32 %bfe, i32 addrspace(1)* %out, align 4
413  ret void
414}
415
416define amdgpu_kernel void @bfe_u32_test_5(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
417; GFX6-LABEL: bfe_u32_test_5:
418; GFX6:       ; %bb.0:
419; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
420; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
421; GFX6-NEXT:    s_mov_b32 s6, -1
422; GFX6-NEXT:    s_mov_b32 s7, 0xf000
423; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
424; GFX6-NEXT:    s_load_dword s0, s[0:1], 0x0
425; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
426; GFX6-NEXT:    s_lshl_b32 s0, s0, 31
427; GFX6-NEXT:    s_ashr_i32 s0, s0, 31
428; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x10000
429; GFX6-NEXT:    v_mov_b32_e32 v0, s0
430; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
431; GFX6-NEXT:    s_endpgm
432  %x = load i32, i32 addrspace(1)* %in, align 4
433  %shl = shl i32 %x, 31
434  %shr = ashr i32 %shl, 31
435  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shr, i32 0, i32 1)
436  store i32 %bfe, i32 addrspace(1)* %out, align 4
437  ret void
438}
439
440define amdgpu_kernel void @bfe_u32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
441; GFX6-LABEL: bfe_u32_test_6:
442; GFX6:       ; %bb.0:
443; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
444; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
445; GFX6-NEXT:    s_mov_b32 s6, -1
446; GFX6-NEXT:    s_mov_b32 s7, 0xf000
447; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
448; GFX6-NEXT:    s_load_dword s0, s[0:1], 0x0
449; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
450; GFX6-NEXT:    s_lshl_b32 s0, s0, 31
451; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x1f0001
452; GFX6-NEXT:    v_mov_b32_e32 v0, s0
453; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
454; GFX6-NEXT:    s_endpgm
455  %x = load i32, i32 addrspace(1)* %in, align 4
456  %shl = shl i32 %x, 31
457  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 1, i32 31)
458  store i32 %bfe, i32 addrspace(1)* %out, align 4
459  ret void
460}
461
462define amdgpu_kernel void @bfe_u32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
463; GFX6-LABEL: bfe_u32_test_7:
464; GFX6:       ; %bb.0:
465; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
466; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
467; GFX6-NEXT:    s_mov_b32 s6, -1
468; GFX6-NEXT:    s_mov_b32 s7, 0xf000
469; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
470; GFX6-NEXT:    s_load_dword s0, s[0:1], 0x0
471; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
472; GFX6-NEXT:    s_lshl_b32 s0, s0, 31
473; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x1f0000
474; GFX6-NEXT:    v_mov_b32_e32 v0, s0
475; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
476; GFX6-NEXT:    s_endpgm
477  %x = load i32, i32 addrspace(1)* %in, align 4
478  %shl = shl i32 %x, 31
479  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 0, i32 31)
480  store i32 %bfe, i32 addrspace(1)* %out, align 4
481  ret void
482}
483
484define amdgpu_kernel void @bfe_u32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
485; GFX6-LABEL: bfe_u32_test_8:
486; GFX6:       ; %bb.0:
487; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
488; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
489; GFX6-NEXT:    s_mov_b32 s6, -1
490; GFX6-NEXT:    s_mov_b32 s7, 0xf000
491; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
492; GFX6-NEXT:    s_load_dword s0, s[0:1], 0x0
493; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
494; GFX6-NEXT:    s_lshl_b32 s0, s0, 31
495; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x1001f
496; GFX6-NEXT:    v_mov_b32_e32 v0, s0
497; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
498; GFX6-NEXT:    s_endpgm
499  %x = load i32, i32 addrspace(1)* %in, align 4
500  %shl = shl i32 %x, 31
501  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 31, i32 1)
502  store i32 %bfe, i32 addrspace(1)* %out, align 4
503  ret void
504}
505
506define amdgpu_kernel void @bfe_u32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
507; GFX6-LABEL: bfe_u32_test_9:
508; GFX6:       ; %bb.0:
509; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
510; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
511; GFX6-NEXT:    s_mov_b32 s6, -1
512; GFX6-NEXT:    s_mov_b32 s7, 0xf000
513; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
514; GFX6-NEXT:    s_load_dword s0, s[0:1], 0x0
515; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
516; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x1001f
517; GFX6-NEXT:    v_mov_b32_e32 v0, s0
518; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
519; GFX6-NEXT:    s_endpgm
520  %x = load i32, i32 addrspace(1)* %in, align 4
521  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 31, i32 1)
522  store i32 %bfe, i32 addrspace(1)* %out, align 4
523  ret void
524}
525
526define amdgpu_kernel void @bfe_u32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
527; GFX6-LABEL: bfe_u32_test_10:
528; GFX6:       ; %bb.0:
529; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
530; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
531; GFX6-NEXT:    s_mov_b32 s6, -1
532; GFX6-NEXT:    s_mov_b32 s7, 0xf000
533; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
534; GFX6-NEXT:    s_load_dword s0, s[0:1], 0x0
535; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
536; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x1f0001
537; GFX6-NEXT:    v_mov_b32_e32 v0, s0
538; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
539; GFX6-NEXT:    s_endpgm
540  %x = load i32, i32 addrspace(1)* %in, align 4
541  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 1, i32 31)
542  store i32 %bfe, i32 addrspace(1)* %out, align 4
543  ret void
544}
545
546define amdgpu_kernel void @bfe_u32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
547; GFX6-LABEL: bfe_u32_test_11:
548; GFX6:       ; %bb.0:
549; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
550; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
551; GFX6-NEXT:    s_mov_b32 s6, -1
552; GFX6-NEXT:    s_mov_b32 s7, 0xf000
553; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
554; GFX6-NEXT:    s_load_dword s0, s[0:1], 0x0
555; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
556; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x180008
557; GFX6-NEXT:    v_mov_b32_e32 v0, s0
558; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
559; GFX6-NEXT:    s_endpgm
560  %x = load i32, i32 addrspace(1)* %in, align 4
561  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 8, i32 24)
562  store i32 %bfe, i32 addrspace(1)* %out, align 4
563  ret void
564}
565
566define amdgpu_kernel void @bfe_u32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
567; GFX6-LABEL: bfe_u32_test_12:
568; GFX6:       ; %bb.0:
569; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
570; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
571; GFX6-NEXT:    s_mov_b32 s6, -1
572; GFX6-NEXT:    s_mov_b32 s7, 0xf000
573; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
574; GFX6-NEXT:    s_load_dword s0, s[0:1], 0x0
575; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
576; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x80018
577; GFX6-NEXT:    v_mov_b32_e32 v0, s0
578; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
579; GFX6-NEXT:    s_endpgm
580  %x = load i32, i32 addrspace(1)* %in, align 4
581  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 24, i32 8)
582  store i32 %bfe, i32 addrspace(1)* %out, align 4
583  ret void
584}
585
586; V_ASHRREV_U32_e32 {{v[0-9]+}}, 31, {{v[0-9]+}}
587define amdgpu_kernel void @bfe_u32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
588; GFX6-LABEL: bfe_u32_test_13:
589; GFX6:       ; %bb.0:
590; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
591; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
592; GFX6-NEXT:    s_mov_b32 s6, -1
593; GFX6-NEXT:    s_mov_b32 s7, 0xf000
594; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
595; GFX6-NEXT:    s_load_dword s0, s[0:1], 0x0
596; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
597; GFX6-NEXT:    s_ashr_i32 s0, s0, 31
598; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x1001f
599; GFX6-NEXT:    v_mov_b32_e32 v0, s0
600; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
601; GFX6-NEXT:    s_endpgm
602  %x = load i32, i32 addrspace(1)* %in, align 4
603  %shl = ashr i32 %x, 31
604  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 31, i32 1)
605  store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void
606}
607
608define amdgpu_kernel void @bfe_u32_test_14(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
609; GFX6-LABEL: bfe_u32_test_14:
610; GFX6:       ; %bb.0:
611; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
612; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
613; GFX6-NEXT:    s_mov_b32 s6, -1
614; GFX6-NEXT:    s_mov_b32 s7, 0xf000
615; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
616; GFX6-NEXT:    s_load_dword s0, s[0:1], 0x0
617; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
618; GFX6-NEXT:    s_lshr_b32 s0, s0, 31
619; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x1001f
620; GFX6-NEXT:    v_mov_b32_e32 v0, s0
621; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
622; GFX6-NEXT:    s_endpgm
623  %x = load i32, i32 addrspace(1)* %in, align 4
624  %shl = lshr i32 %x, 31
625  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 31, i32 1)
626  store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void
627}
628
629define amdgpu_kernel void @bfe_u32_constant_fold_test_0(i32 addrspace(1)* %out) #0 {
630; GFX6-LABEL: bfe_u32_constant_fold_test_0:
631; GFX6:       ; %bb.0:
632; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
633; GFX6-NEXT:    s_bfe_u32 s2, 0, 0
634; GFX6-NEXT:    v_mov_b32_e32 v0, s2
635; GFX6-NEXT:    s_mov_b32 s2, -1
636; GFX6-NEXT:    s_mov_b32 s3, 0xf000
637; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
638; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
639; GFX6-NEXT:    s_endpgm
640  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 0, i32 0, i32 0)
641  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
642  ret void
643}
644
645define amdgpu_kernel void @bfe_u32_constant_fold_test_1(i32 addrspace(1)* %out) #0 {
646; GFX6-LABEL: bfe_u32_constant_fold_test_1:
647; GFX6:       ; %bb.0:
648; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
649; GFX6-NEXT:    s_bfe_u32 s2, 0x302e, 0
650; GFX6-NEXT:    v_mov_b32_e32 v0, s2
651; GFX6-NEXT:    s_mov_b32 s2, -1
652; GFX6-NEXT:    s_mov_b32 s3, 0xf000
653; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
654; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
655; GFX6-NEXT:    s_endpgm
656  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 12334, i32 0, i32 0)
657  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
658  ret void
659}
660
661define amdgpu_kernel void @bfe_u32_constant_fold_test_2(i32 addrspace(1)* %out) #0 {
662; GFX6-LABEL: bfe_u32_constant_fold_test_2:
663; GFX6:       ; %bb.0:
664; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
665; GFX6-NEXT:    s_bfe_u32 s2, 0, 0x10000
666; GFX6-NEXT:    v_mov_b32_e32 v0, s2
667; GFX6-NEXT:    s_mov_b32 s2, -1
668; GFX6-NEXT:    s_mov_b32 s3, 0xf000
669; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
670; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
671; GFX6-NEXT:    s_endpgm
672  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 0, i32 0, i32 1)
673  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
674  ret void
675}
676
677define amdgpu_kernel void @bfe_u32_constant_fold_test_3(i32 addrspace(1)* %out) #0 {
678; GFX6-LABEL: bfe_u32_constant_fold_test_3:
679; GFX6:       ; %bb.0:
680; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
681; GFX6-NEXT:    s_bfe_u32 s2, 1, 0x10000
682; GFX6-NEXT:    v_mov_b32_e32 v0, s2
683; GFX6-NEXT:    s_mov_b32 s2, -1
684; GFX6-NEXT:    s_mov_b32 s3, 0xf000
685; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
686; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
687; GFX6-NEXT:    s_endpgm
688  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 1, i32 0, i32 1)
689  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
690  ret void
691}
692
693define amdgpu_kernel void @bfe_u32_constant_fold_test_4(i32 addrspace(1)* %out) #0 {
694; GFX6-LABEL: bfe_u32_constant_fold_test_4:
695; GFX6:       ; %bb.0:
696; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
697; GFX6-NEXT:    s_bfe_u32 s3, -1, 0x10000
698; GFX6-NEXT:    v_mov_b32_e32 v0, s3
699; GFX6-NEXT:    s_mov_b32 s2, -1
700; GFX6-NEXT:    s_mov_b32 s3, 0xf000
701; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
702; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
703; GFX6-NEXT:    s_endpgm
704  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 4294967295, i32 0, i32 1)
705  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
706  ret void
707}
708
709define amdgpu_kernel void @bfe_u32_constant_fold_test_5(i32 addrspace(1)* %out) #0 {
710; GFX6-LABEL: bfe_u32_constant_fold_test_5:
711; GFX6:       ; %bb.0:
712; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
713; GFX6-NEXT:    s_mov_b32 s2, 0x10007
714; GFX6-NEXT:    s_bfe_u32 s2, 0x80, s2
715; GFX6-NEXT:    v_mov_b32_e32 v0, s2
716; GFX6-NEXT:    s_mov_b32 s2, -1
717; GFX6-NEXT:    s_mov_b32 s3, 0xf000
718; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
719; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
720; GFX6-NEXT:    s_endpgm
721  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 128, i32 7, i32 1)
722  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
723  ret void
724}
725
726define amdgpu_kernel void @bfe_u32_constant_fold_test_6(i32 addrspace(1)* %out) #0 {
727; GFX6-LABEL: bfe_u32_constant_fold_test_6:
728; GFX6:       ; %bb.0:
729; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
730; GFX6-NEXT:    s_mov_b32 s2, 0x80000
731; GFX6-NEXT:    s_bfe_u32 s2, 0x80, s2
732; GFX6-NEXT:    v_mov_b32_e32 v0, s2
733; GFX6-NEXT:    s_mov_b32 s2, -1
734; GFX6-NEXT:    s_mov_b32 s3, 0xf000
735; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
736; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
737; GFX6-NEXT:    s_endpgm
738  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 128, i32 0, i32 8)
739  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
740  ret void
741}
742
743define amdgpu_kernel void @bfe_u32_constant_fold_test_7(i32 addrspace(1)* %out) #0 {
744; GFX6-LABEL: bfe_u32_constant_fold_test_7:
745; GFX6:       ; %bb.0:
746; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
747; GFX6-NEXT:    s_mov_b32 s2, 0x80000
748; GFX6-NEXT:    s_bfe_u32 s2, 0x7f, s2
749; GFX6-NEXT:    v_mov_b32_e32 v0, s2
750; GFX6-NEXT:    s_mov_b32 s2, -1
751; GFX6-NEXT:    s_mov_b32 s3, 0xf000
752; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
753; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
754; GFX6-NEXT:    s_endpgm
755  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 127, i32 0, i32 8)
756  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
757  ret void
758}
759
760define amdgpu_kernel void @bfe_u32_constant_fold_test_8(i32 addrspace(1)* %out) #0 {
761; GFX6-LABEL: bfe_u32_constant_fold_test_8:
762; GFX6:       ; %bb.0:
763; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
764; GFX6-NEXT:    s_mov_b32 s2, 0x80006
765; GFX6-NEXT:    s_bfe_u32 s2, 0x7f, s2
766; GFX6-NEXT:    v_mov_b32_e32 v0, s2
767; GFX6-NEXT:    s_mov_b32 s2, -1
768; GFX6-NEXT:    s_mov_b32 s3, 0xf000
769; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
770; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
771; GFX6-NEXT:    s_endpgm
772  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 127, i32 6, i32 8)
773  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
774  ret void
775}
776
777define amdgpu_kernel void @bfe_u32_constant_fold_test_9(i32 addrspace(1)* %out) #0 {
778; GFX6-LABEL: bfe_u32_constant_fold_test_9:
779; GFX6:       ; %bb.0:
780; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
781; GFX6-NEXT:    s_mov_b32 s2, 0x80010
782; GFX6-NEXT:    s_bfe_u32 s2, 0x10000, s2
783; GFX6-NEXT:    v_mov_b32_e32 v0, s2
784; GFX6-NEXT:    s_mov_b32 s2, -1
785; GFX6-NEXT:    s_mov_b32 s3, 0xf000
786; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
787; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
788; GFX6-NEXT:    s_endpgm
789  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 65536, i32 16, i32 8)
790  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
791  ret void
792}
793
794define amdgpu_kernel void @bfe_u32_constant_fold_test_10(i32 addrspace(1)* %out) #0 {
795; GFX6-LABEL: bfe_u32_constant_fold_test_10:
796; GFX6:       ; %bb.0:
797; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
798; GFX6-NEXT:    s_mov_b32 s2, 0x100010
799; GFX6-NEXT:    s_bfe_u32 s2, 0xffff, s2
800; GFX6-NEXT:    v_mov_b32_e32 v0, s2
801; GFX6-NEXT:    s_mov_b32 s2, -1
802; GFX6-NEXT:    s_mov_b32 s3, 0xf000
803; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
804; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
805; GFX6-NEXT:    s_endpgm
806  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 65535, i32 16, i32 16)
807  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
808  ret void
809}
810
811define amdgpu_kernel void @bfe_u32_constant_fold_test_11(i32 addrspace(1)* %out) #0 {
812; GFX6-LABEL: bfe_u32_constant_fold_test_11:
813; GFX6:       ; %bb.0:
814; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
815; GFX6-NEXT:    s_mov_b32 s2, 0x40004
816; GFX6-NEXT:    s_bfe_u32 s2, 0xa0, s2
817; GFX6-NEXT:    v_mov_b32_e32 v0, s2
818; GFX6-NEXT:    s_mov_b32 s2, -1
819; GFX6-NEXT:    s_mov_b32 s3, 0xf000
820; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
821; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
822; GFX6-NEXT:    s_endpgm
823  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 4, i32 4)
824  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
825  ret void
826}
827
828define amdgpu_kernel void @bfe_u32_constant_fold_test_12(i32 addrspace(1)* %out) #0 {
829; GFX6-LABEL: bfe_u32_constant_fold_test_12:
830; GFX6:       ; %bb.0:
831; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
832; GFX6-NEXT:    s_mov_b32 s2, 0x1001f
833; GFX6-NEXT:    s_bfe_u32 s2, 0xa0, s2
834; GFX6-NEXT:    v_mov_b32_e32 v0, s2
835; GFX6-NEXT:    s_mov_b32 s2, -1
836; GFX6-NEXT:    s_mov_b32 s3, 0xf000
837; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
838; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
839; GFX6-NEXT:    s_endpgm
840  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 31, i32 1)
841  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
842  ret void
843}
844
845define amdgpu_kernel void @bfe_u32_constant_fold_test_13(i32 addrspace(1)* %out) #0 {
846; GFX6-LABEL: bfe_u32_constant_fold_test_13:
847; GFX6:       ; %bb.0:
848; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
849; GFX6-NEXT:    s_mov_b32 s2, 0x100010
850; GFX6-NEXT:    s_bfe_u32 s2, 0x1fffe, s2
851; GFX6-NEXT:    v_mov_b32_e32 v0, s2
852; GFX6-NEXT:    s_mov_b32 s2, -1
853; GFX6-NEXT:    s_mov_b32 s3, 0xf000
854; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
855; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
856; GFX6-NEXT:    s_endpgm
857  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 131070, i32 16, i32 16)
858  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
859  ret void
860}
861
862define amdgpu_kernel void @bfe_u32_constant_fold_test_14(i32 addrspace(1)* %out) #0 {
863; GFX6-LABEL: bfe_u32_constant_fold_test_14:
864; GFX6:       ; %bb.0:
865; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
866; GFX6-NEXT:    s_mov_b32 s2, 0x1e0002
867; GFX6-NEXT:    s_bfe_u32 s2, 0xa0, s2
868; GFX6-NEXT:    v_mov_b32_e32 v0, s2
869; GFX6-NEXT:    s_mov_b32 s2, -1
870; GFX6-NEXT:    s_mov_b32 s3, 0xf000
871; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
872; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
873; GFX6-NEXT:    s_endpgm
874  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 2, i32 30)
875  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
876  ret void
877}
878
879define amdgpu_kernel void @bfe_u32_constant_fold_test_15(i32 addrspace(1)* %out) #0 {
880; GFX6-LABEL: bfe_u32_constant_fold_test_15:
881; GFX6:       ; %bb.0:
882; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
883; GFX6-NEXT:    s_mov_b32 s2, 0x1c0004
884; GFX6-NEXT:    s_bfe_u32 s2, 0xa0, s2
885; GFX6-NEXT:    v_mov_b32_e32 v0, s2
886; GFX6-NEXT:    s_mov_b32 s2, -1
887; GFX6-NEXT:    s_mov_b32 s3, 0xf000
888; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
889; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
890; GFX6-NEXT:    s_endpgm
891  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 4, i32 28)
892  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
893  ret void
894}
895
896define amdgpu_kernel void @bfe_u32_constant_fold_test_16(i32 addrspace(1)* %out) #0 {
897; GFX6-LABEL: bfe_u32_constant_fold_test_16:
898; GFX6:       ; %bb.0:
899; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
900; GFX6-NEXT:    s_bfe_u32 s3, -1, 0x70001
901; GFX6-NEXT:    v_mov_b32_e32 v0, s3
902; GFX6-NEXT:    s_mov_b32 s2, -1
903; GFX6-NEXT:    s_mov_b32 s3, 0xf000
904; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
905; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
906; GFX6-NEXT:    s_endpgm
907  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 4294967295, i32 1, i32 7)
908  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
909  ret void
910}
911
912define amdgpu_kernel void @bfe_u32_constant_fold_test_17(i32 addrspace(1)* %out) #0 {
913; GFX6-LABEL: bfe_u32_constant_fold_test_17:
914; GFX6:       ; %bb.0:
915; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
916; GFX6-NEXT:    s_mov_b32 s2, 0x1f0001
917; GFX6-NEXT:    s_bfe_u32 s2, 0xff, s2
918; GFX6-NEXT:    v_mov_b32_e32 v0, s2
919; GFX6-NEXT:    s_mov_b32 s2, -1
920; GFX6-NEXT:    s_mov_b32 s3, 0xf000
921; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
922; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
923; GFX6-NEXT:    s_endpgm
924  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 255, i32 1, i32 31)
925  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
926  ret void
927}
928
929define amdgpu_kernel void @bfe_u32_constant_fold_test_18(i32 addrspace(1)* %out) #0 {
930; GFX6-LABEL: bfe_u32_constant_fold_test_18:
931; GFX6:       ; %bb.0:
932; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
933; GFX6-NEXT:    s_mov_b32 s2, 0x1001f
934; GFX6-NEXT:    s_bfe_u32 s2, 0xff, s2
935; GFX6-NEXT:    v_mov_b32_e32 v0, s2
936; GFX6-NEXT:    s_mov_b32 s2, -1
937; GFX6-NEXT:    s_mov_b32 s3, 0xf000
938; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
939; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
940; GFX6-NEXT:    s_endpgm
941  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 255, i32 31, i32 1)
942  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
943  ret void
944}
945
946; Make sure that SimplifyDemandedBits doesn't cause the and to be
947; reduced to the bits demanded by the bfe.
948
949; XXX: The operand to v_bfe_u32 could also just directly be the load register.
950define amdgpu_kernel void @simplify_bfe_u32_multi_use_arg(i32 addrspace(1)* %out0,
951; GFX6-LABEL: simplify_bfe_u32_multi_use_arg:
952; GFX6:       ; %bb.0:
953; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
954; GFX6-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0xb
955; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xd
956; GFX6-NEXT:    s_mov_b32 s6, -1
957; GFX6-NEXT:    s_mov_b32 s7, 0xf000
958; GFX6-NEXT:    s_mov_b64 s[10:11], s[6:7]
959; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
960; GFX6-NEXT:    s_load_dword s0, s[0:1], 0x0
961; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
962; GFX6-NEXT:    s_and_b32 s0, s0, 63
963; GFX6-NEXT:    s_bfe_u32 s1, s0, 0x20002
964; GFX6-NEXT:    v_mov_b32_e32 v1, s1
965; GFX6-NEXT:    v_mov_b32_e32 v0, s0
966; GFX6-NEXT:    buffer_store_dword v1, off, s[4:7], 0
967; GFX6-NEXT:    buffer_store_dword v0, off, s[8:11], 0
968; GFX6-NEXT:    s_endpgm
969                                            i32 addrspace(1)* %out1,
970                                            i32 addrspace(1)* %in) #0 {
971  %src = load i32, i32 addrspace(1)* %in, align 4
972  %and = and i32 %src, 63
973  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %and, i32 2, i32 2)
974  store i32 %bfe_u32, i32 addrspace(1)* %out0, align 4
975  store i32 %and, i32 addrspace(1)* %out1, align 4
976  ret void
977}
978
979define amdgpu_kernel void @lshr_and(i32 addrspace(1)* %out, i32 %a) #0 {
980; GFX6-LABEL: lshr_and:
981; GFX6:       ; %bb.0:
982; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
983; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
984; GFX6-NEXT:    s_mov_b32 s6, -1
985; GFX6-NEXT:    s_mov_b32 s7, 0xf000
986; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
987; GFX6-NEXT:    s_lshr_b32 s0, s0, 6
988; GFX6-NEXT:    s_and_b32 s0, s0, 7
989; GFX6-NEXT:    v_mov_b32_e32 v0, s0
990; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
991; GFX6-NEXT:    s_endpgm
992  %b = lshr i32 %a, 6
993  %c = and i32 %b, 7
994  store i32 %c, i32 addrspace(1)* %out, align 8
995  ret void
996}
997
998define amdgpu_kernel void @v_lshr_and(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
999; GFX6-LABEL: v_lshr_and:
1000; GFX6:       ; %bb.0:
1001; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
1002; GFX6-NEXT:    s_load_dword s2, s[0:1], 0xb
1003; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xc
1004; GFX6-NEXT:    s_mov_b32 s6, -1
1005; GFX6-NEXT:    s_mov_b32 s7, 0xf000
1006; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
1007; GFX6-NEXT:    s_lshr_b32 s0, s2, s0
1008; GFX6-NEXT:    s_and_b32 s0, s0, 7
1009; GFX6-NEXT:    v_mov_b32_e32 v0, s0
1010; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
1011; GFX6-NEXT:    s_endpgm
1012  %c = lshr i32 %a, %b
1013  %d = and i32 %c, 7
1014  store i32 %d, i32 addrspace(1)* %out, align 8
1015  ret void
1016}
1017
1018define amdgpu_kernel void @and_lshr(i32 addrspace(1)* %out, i32 %a) #0 {
1019; GFX6-LABEL: and_lshr:
1020; GFX6:       ; %bb.0:
1021; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
1022; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
1023; GFX6-NEXT:    s_mov_b32 s6, -1
1024; GFX6-NEXT:    s_mov_b32 s7, 0xf000
1025; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
1026; GFX6-NEXT:    s_and_b32 s0, s0, 0x1c0
1027; GFX6-NEXT:    s_lshr_b32 s0, s0, 6
1028; GFX6-NEXT:    v_mov_b32_e32 v0, s0
1029; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
1030; GFX6-NEXT:    s_endpgm
1031  %b = and i32 %a, 448
1032  %c = lshr i32 %b, 6
1033  store i32 %c, i32 addrspace(1)* %out, align 8
1034  ret void
1035}
1036
1037define amdgpu_kernel void @and_lshr2(i32 addrspace(1)* %out, i32 %a) #0 {
1038; GFX6-LABEL: and_lshr2:
1039; GFX6:       ; %bb.0:
1040; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
1041; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
1042; GFX6-NEXT:    s_mov_b32 s6, -1
1043; GFX6-NEXT:    s_mov_b32 s7, 0xf000
1044; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
1045; GFX6-NEXT:    s_and_b32 s0, s0, 0x1ff
1046; GFX6-NEXT:    s_lshr_b32 s0, s0, 6
1047; GFX6-NEXT:    v_mov_b32_e32 v0, s0
1048; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
1049; GFX6-NEXT:    s_endpgm
1050  %b = and i32 %a, 511
1051  %c = lshr i32 %b, 6
1052  store i32 %c, i32 addrspace(1)* %out, align 8
1053  ret void
1054}
1055
1056define amdgpu_kernel void @shl_lshr(i32 addrspace(1)* %out, i32 %a) #0 {
1057; GFX6-LABEL: shl_lshr:
1058; GFX6:       ; %bb.0:
1059; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
1060; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
1061; GFX6-NEXT:    s_mov_b32 s6, -1
1062; GFX6-NEXT:    s_mov_b32 s7, 0xf000
1063; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
1064; GFX6-NEXT:    s_lshl_b32 s0, s0, 9
1065; GFX6-NEXT:    s_lshr_b32 s0, s0, 11
1066; GFX6-NEXT:    v_mov_b32_e32 v0, s0
1067; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
1068; GFX6-NEXT:    s_endpgm
1069  %b = shl i32 %a, 9
1070  %c = lshr i32 %b, 11
1071  store i32 %c, i32 addrspace(1)* %out, align 8
1072  ret void
1073}
1074
1075declare i32 @llvm.amdgcn.ubfe.i32(i32, i32, i32) #1
1076declare i64 @llvm.amdgcn.ubfe.i64(i64, i32, i32) #1
1077
1078attributes #0 = { nounwind }
1079attributes #1 = { nounwind readnone }
1080