1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti -amdgpu-load-store-vectorizer=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX6 %s
3
4define i32 @v_bfe_i32_arg_arg_arg(i32 %src0, i32 %src1, i32 %src2) #0 {
5; GFX6-LABEL: v_bfe_i32_arg_arg_arg:
6; GFX6:       ; %bb.0:
7; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8; GFX6-NEXT:    v_bfe_u32 v0, v0, v1, v2
9; GFX6-NEXT:    s_setpc_b64 s[30:31]
10  %bfe_i32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 %src2)
11  ret i32 %bfe_i32
12}
13
14define amdgpu_ps i32 @s_bfe_i32_arg_arg_arg(i32 inreg %src0, i32 inreg %src1, i32 inreg %src2) #0 {
15; GFX6-LABEL: s_bfe_i32_arg_arg_arg:
16; GFX6:       ; %bb.0:
17; GFX6-NEXT:    s_and_b32 s1, s1, 63
18; GFX6-NEXT:    s_lshl_b32 s2, s2, 16
19; GFX6-NEXT:    s_or_b32 s1, s1, s2
20; GFX6-NEXT:    s_bfe_u32 s0, s0, s1
21; GFX6-NEXT:    ; return to shader part epilog
22  %bfe_i32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 %src2)
23  ret i32 %bfe_i32
24}
25
26; TODO: Need to expand this.
27; define i64 @v_bfe_i64_arg_arg_arg(i64 %src0, i32 %src1, i32 %src2) #0 {
28;   %bfe_i64 = call i32 @llvm.amdgcn.ubfe.i64(i32 %src0, i32 %src1, i32 %src2)
29;   ret i64 %bfe_i64
30; }
31
32define amdgpu_ps i64 @s_bfe_i64_arg_arg_arg(i64 inreg %src0, i32 inreg %src1, i32 inreg %src2) #0 {
33; GFX6-LABEL: s_bfe_i64_arg_arg_arg:
34; GFX6:       ; %bb.0:
35; GFX6-NEXT:    s_and_b32 s2, s2, 63
36; GFX6-NEXT:    s_lshl_b32 s3, s3, 16
37; GFX6-NEXT:    s_or_b32 s2, s2, s3
38; GFX6-NEXT:    s_bfe_u64 s[0:1], s[0:1], s2
39; GFX6-NEXT:    ; return to shader part epilog
40  %bfe_i32 = call i64 @llvm.amdgcn.ubfe.i64(i64 %src0, i32 %src1, i32 %src2)
41  ret i64 %bfe_i32
42}
43
44define amdgpu_kernel void @bfe_u32_arg_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #0 {
45; GFX6-LABEL: bfe_u32_arg_arg_arg:
46; GFX6:       ; %bb.0:
47; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
48; GFX6-NEXT:    s_load_dword s2, s[0:1], 0xb
49; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xc
50; GFX6-NEXT:    s_mov_b32 s6, -1
51; GFX6-NEXT:    s_mov_b32 s7, 0xf000
52; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
53; GFX6-NEXT:    s_and_b32 s1, s0, 63
54; GFX6-NEXT:    s_lshl_b32 s0, s0, 16
55; GFX6-NEXT:    s_or_b32 s0, s1, s0
56; GFX6-NEXT:    s_bfe_u32 s0, s2, s0
57; GFX6-NEXT:    v_mov_b32_e32 v0, s0
58; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
59; GFX6-NEXT:    s_endpgm
60  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 %src1)
61  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
62  ret void
63}
64
65define amdgpu_kernel void @bfe_u32_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 {
66; GFX6-LABEL: bfe_u32_arg_arg_imm:
67; GFX6:       ; %bb.0:
68; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
69; GFX6-NEXT:    s_load_dword s2, s[0:1], 0xb
70; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xc
71; GFX6-NEXT:    s_mov_b32 s6, -1
72; GFX6-NEXT:    s_mov_b32 s7, 0xf000
73; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
74; GFX6-NEXT:    s_and_b32 s0, s0, 63
75; GFX6-NEXT:    s_or_b32 s0, s0, 0x7b0000
76; GFX6-NEXT:    s_bfe_u32 s0, s2, s0
77; GFX6-NEXT:    v_mov_b32_e32 v0, s0
78; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
79; GFX6-NEXT:    s_endpgm
80  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 123)
81  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
82  ret void
83}
84
85define amdgpu_kernel void @bfe_u32_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src2) #0 {
86; GFX6-LABEL: bfe_u32_arg_imm_arg:
87; GFX6:       ; %bb.0:
88; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
89; GFX6-NEXT:    s_load_dword s2, s[0:1], 0xb
90; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xc
91; GFX6-NEXT:    s_mov_b32 s6, -1
92; GFX6-NEXT:    s_mov_b32 s7, 0xf000
93; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
94; GFX6-NEXT:    s_lshl_b32 s0, s0, 16
95; GFX6-NEXT:    s_or_b32 s0, 59, s0
96; GFX6-NEXT:    s_bfe_u32 s0, s2, s0
97; GFX6-NEXT:    v_mov_b32_e32 v0, s0
98; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
99; GFX6-NEXT:    s_endpgm
100  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 123, i32 %src2)
101  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
102  ret void
103}
104
105define amdgpu_kernel void @bfe_u32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) #0 {
106; GFX6-LABEL: bfe_u32_imm_arg_arg:
107; GFX6:       ; %bb.0:
108; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
109; GFX6-NEXT:    s_load_dword s2, s[0:1], 0xb
110; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xc
111; GFX6-NEXT:    s_mov_b32 s6, -1
112; GFX6-NEXT:    s_mov_b32 s7, 0xf000
113; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
114; GFX6-NEXT:    s_and_b32 s1, s2, 63
115; GFX6-NEXT:    s_lshl_b32 s0, s0, 16
116; GFX6-NEXT:    s_or_b32 s0, s1, s0
117; GFX6-NEXT:    s_bfe_u32 s0, 0x7b, s0
118; GFX6-NEXT:    v_mov_b32_e32 v0, s0
119; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
120; GFX6-NEXT:    s_endpgm
121  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 123, i32 %src1, i32 %src2)
122  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
123  ret void
124}
125
126define amdgpu_kernel void @bfe_u32_arg_0_width_reg_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 {
127; GFX6-LABEL: bfe_u32_arg_0_width_reg_offset:
128; GFX6:       ; %bb.0:
129; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
130; GFX6-NEXT:    s_load_dword s2, s[0:1], 0xb
131; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xc
132; GFX6-NEXT:    s_mov_b32 s6, -1
133; GFX6-NEXT:    s_mov_b32 s7, 0xf000
134; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
135; GFX6-NEXT:    s_and_b32 s0, s0, 63
136; GFX6-NEXT:    s_bfe_u32 s0, s2, s0
137; GFX6-NEXT:    v_mov_b32_e32 v0, s0
138; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
139; GFX6-NEXT:    s_endpgm
140  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 0)
141  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
142  ret void
143}
144
145define amdgpu_kernel void @bfe_u32_arg_0_width_imm_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 {
146; GFX6-LABEL: bfe_u32_arg_0_width_imm_offset:
147; GFX6:       ; %bb.0:
148; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
149; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
150; GFX6-NEXT:    s_mov_b32 s6, -1
151; GFX6-NEXT:    s_mov_b32 s7, 0xf000
152; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
153; GFX6-NEXT:    s_bfe_u32 s0, s0, 8
154; GFX6-NEXT:    v_mov_b32_e32 v0, s0
155; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
156; GFX6-NEXT:    s_endpgm
157  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 8, i32 0)
158  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
159  ret void
160}
161
162define amdgpu_kernel void @bfe_u32_zextload_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) #0 {
163; GFX6-LABEL: bfe_u32_zextload_i8:
164; GFX6:       ; %bb.0:
165; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
166; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
167; GFX6-NEXT:    s_mov_b32 s2, -1
168; GFX6-NEXT:    s_mov_b32 s3, 0xf000
169; GFX6-NEXT:    s_mov_b64 s[6:7], s[2:3]
170; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
171; GFX6-NEXT:    buffer_load_ubyte v0, off, s[0:3], 0
172; GFX6-NEXT:    s_waitcnt vmcnt(0)
173; GFX6-NEXT:    v_bfe_u32 v0, v0, 0, 8
174; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
175; GFX6-NEXT:    s_endpgm
176  %load = load i8, i8 addrspace(1)* %in
177  %ext = zext i8 %load to i32
178  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 0, i32 8)
179  store i32 %bfe, i32 addrspace(1)* %out, align 4
180  ret void
181}
182
183; FIXME: Should be using s_add_i32
184define amdgpu_kernel void @bfe_u32_zext_in_reg_i8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
185; GFX6-LABEL: bfe_u32_zext_in_reg_i8:
186; GFX6:       ; %bb.0:
187; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
188; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
189; GFX6-NEXT:    s_mov_b32 s6, -1
190; GFX6-NEXT:    s_mov_b32 s7, 0xf000
191; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
192; GFX6-NEXT:    s_load_dword s0, s[0:1], 0x0
193; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
194; GFX6-NEXT:    s_add_i32 s0, s0, 1
195; GFX6-NEXT:    s_and_b32 s0, s0, 0xff
196; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x80000
197; GFX6-NEXT:    v_mov_b32_e32 v0, s0
198; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
199; GFX6-NEXT:    s_endpgm
200  %load = load i32, i32 addrspace(1)* %in, align 4
201  %add = add i32 %load, 1
202  %ext = and i32 %add, 255
203  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 0, i32 8)
204  store i32 %bfe, i32 addrspace(1)* %out, align 4
205  ret void
206}
207
208define amdgpu_kernel void @bfe_u32_zext_in_reg_i16(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
209; GFX6-LABEL: bfe_u32_zext_in_reg_i16:
210; GFX6:       ; %bb.0:
211; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
212; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
213; GFX6-NEXT:    s_mov_b32 s6, -1
214; GFX6-NEXT:    s_mov_b32 s7, 0xf000
215; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
216; GFX6-NEXT:    s_load_dword s0, s[0:1], 0x0
217; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
218; GFX6-NEXT:    s_add_i32 s0, s0, 1
219; GFX6-NEXT:    s_and_b32 s0, s0, 0xffff
220; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x100000
221; GFX6-NEXT:    v_mov_b32_e32 v0, s0
222; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
223; GFX6-NEXT:    s_endpgm
224  %load = load i32, i32 addrspace(1)* %in, align 4
225  %add = add i32 %load, 1
226  %ext = and i32 %add, 65535
227  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 0, i32 16)
228  store i32 %bfe, i32 addrspace(1)* %out, align 4
229  ret void
230}
231
232define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
233; GFX6-LABEL: bfe_u32_zext_in_reg_i8_offset_1:
234; GFX6:       ; %bb.0:
235; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
236; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
237; GFX6-NEXT:    s_mov_b32 s6, -1
238; GFX6-NEXT:    s_mov_b32 s7, 0xf000
239; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
240; GFX6-NEXT:    s_load_dword s0, s[0:1], 0x0
241; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
242; GFX6-NEXT:    s_add_i32 s0, s0, 1
243; GFX6-NEXT:    s_and_b32 s0, s0, 0xff
244; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x80001
245; GFX6-NEXT:    v_mov_b32_e32 v0, s0
246; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
247; GFX6-NEXT:    s_endpgm
248  %load = load i32, i32 addrspace(1)* %in, align 4
249  %add = add i32 %load, 1
250  %ext = and i32 %add, 255
251  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 1, i32 8)
252  store i32 %bfe, i32 addrspace(1)* %out, align 4
253  ret void
254}
255
256define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
257; GFX6-LABEL: bfe_u32_zext_in_reg_i8_offset_3:
258; GFX6:       ; %bb.0:
259; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
260; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
261; GFX6-NEXT:    s_mov_b32 s6, -1
262; GFX6-NEXT:    s_mov_b32 s7, 0xf000
263; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
264; GFX6-NEXT:    s_load_dword s0, s[0:1], 0x0
265; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
266; GFX6-NEXT:    s_add_i32 s0, s0, 1
267; GFX6-NEXT:    s_and_b32 s0, s0, 0xff
268; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x80003
269; GFX6-NEXT:    v_mov_b32_e32 v0, s0
270; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
271; GFX6-NEXT:    s_endpgm
272  %load = load i32, i32 addrspace(1)* %in, align 4
273  %add = add i32 %load, 1
274  %ext = and i32 %add, 255
275  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 3, i32 8)
276  store i32 %bfe, i32 addrspace(1)* %out, align 4
277  ret void
278}
279
280define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
281; GFX6-LABEL: bfe_u32_zext_in_reg_i8_offset_7:
282; GFX6:       ; %bb.0:
283; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
284; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
285; GFX6-NEXT:    s_mov_b32 s6, -1
286; GFX6-NEXT:    s_mov_b32 s7, 0xf000
287; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
288; GFX6-NEXT:    s_load_dword s0, s[0:1], 0x0
289; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
290; GFX6-NEXT:    s_add_i32 s0, s0, 1
291; GFX6-NEXT:    s_and_b32 s0, s0, 0xff
292; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x80007
293; GFX6-NEXT:    v_mov_b32_e32 v0, s0
294; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
295; GFX6-NEXT:    s_endpgm
296  %load = load i32, i32 addrspace(1)* %in, align 4
297  %add = add i32 %load, 1
298  %ext = and i32 %add, 255
299  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 7, i32 8)
300  store i32 %bfe, i32 addrspace(1)* %out, align 4
301  ret void
302}
303
304define amdgpu_kernel void @bfe_u32_zext_in_reg_i16_offset_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
305; GFX6-LABEL: bfe_u32_zext_in_reg_i16_offset_8:
306; GFX6:       ; %bb.0:
307; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
308; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
309; GFX6-NEXT:    s_mov_b32 s6, -1
310; GFX6-NEXT:    s_mov_b32 s7, 0xf000
311; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
312; GFX6-NEXT:    s_load_dword s0, s[0:1], 0x0
313; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
314; GFX6-NEXT:    s_add_i32 s0, s0, 1
315; GFX6-NEXT:    s_and_b32 s0, s0, 0xffff
316; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x80008
317; GFX6-NEXT:    v_mov_b32_e32 v0, s0
318; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
319; GFX6-NEXT:    s_endpgm
320  %load = load i32, i32 addrspace(1)* %in, align 4
321  %add = add i32 %load, 1
322  %ext = and i32 %add, 65535
323  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 8, i32 8)
324  store i32 %bfe, i32 addrspace(1)* %out, align 4
325  ret void
326}
327
328define amdgpu_kernel void @bfe_u32_test_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
329; GFX6-LABEL: bfe_u32_test_1:
330; GFX6:       ; %bb.0:
331; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
332; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
333; GFX6-NEXT:    s_mov_b32 s6, -1
334; GFX6-NEXT:    s_mov_b32 s7, 0xf000
335; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
336; GFX6-NEXT:    s_load_dword s0, s[0:1], 0x0
337; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
338; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x10000
339; GFX6-NEXT:    v_mov_b32_e32 v0, s0
340; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
341; GFX6-NEXT:    s_endpgm
342  %x = load i32, i32 addrspace(1)* %in, align 4
343  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 0, i32 1)
344  store i32 %bfe, i32 addrspace(1)* %out, align 4
345  ret void
346}
347
348define amdgpu_kernel void @bfe_u32_test_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
349; GFX6-LABEL: bfe_u32_test_2:
350; GFX6:       ; %bb.0:
351; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
352; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
353; GFX6-NEXT:    s_mov_b32 s6, -1
354; GFX6-NEXT:    s_mov_b32 s7, 0xf000
355; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
356; GFX6-NEXT:    s_load_dword s0, s[0:1], 0x0
357; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
358; GFX6-NEXT:    s_lshl_b32 s0, s0, 31
359; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x80000
360; GFX6-NEXT:    v_mov_b32_e32 v0, s0
361; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
362; GFX6-NEXT:    s_endpgm
363  %x = load i32, i32 addrspace(1)* %in, align 4
364  %shl = shl i32 %x, 31
365  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 0, i32 8)
366  store i32 %bfe, i32 addrspace(1)* %out, align 4
367  ret void
368}
369
370define amdgpu_kernel void @bfe_u32_test_3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
371; GFX6-LABEL: bfe_u32_test_3:
372; GFX6:       ; %bb.0:
373; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
374; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
375; GFX6-NEXT:    s_mov_b32 s6, -1
376; GFX6-NEXT:    s_mov_b32 s7, 0xf000
377; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
378; GFX6-NEXT:    s_load_dword s0, s[0:1], 0x0
379; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
380; GFX6-NEXT:    s_lshl_b32 s0, s0, 31
381; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x10000
382; GFX6-NEXT:    v_mov_b32_e32 v0, s0
383; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
384; GFX6-NEXT:    s_endpgm
385  %x = load i32, i32 addrspace(1)* %in, align 4
386  %shl = shl i32 %x, 31
387  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 0, i32 1)
388  store i32 %bfe, i32 addrspace(1)* %out, align 4
389  ret void
390}
391
392define amdgpu_kernel void @bfe_u32_test_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
393; GFX6-LABEL: bfe_u32_test_4:
394; GFX6:       ; %bb.0:
395; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
396; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
397; GFX6-NEXT:    s_mov_b32 s6, -1
398; GFX6-NEXT:    s_mov_b32 s7, 0xf000
399; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
400; GFX6-NEXT:    s_load_dword s0, s[0:1], 0x0
401; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
402; GFX6-NEXT:    s_lshl_b32 s0, s0, 31
403; GFX6-NEXT:    s_lshr_b32 s0, s0, 31
404; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x1001f
405; GFX6-NEXT:    v_mov_b32_e32 v0, s0
406; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
407; GFX6-NEXT:    s_endpgm
408  %x = load i32, i32 addrspace(1)* %in, align 4
409  %shl = shl i32 %x, 31
410  %shr = lshr i32 %shl, 31
411  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shr, i32 31, i32 1)
412  store i32 %bfe, i32 addrspace(1)* %out, align 4
413  ret void
414}
415
416define amdgpu_kernel void @bfe_u32_test_5(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
417; GFX6-LABEL: bfe_u32_test_5:
418; GFX6:       ; %bb.0:
419; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
420; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
421; GFX6-NEXT:    s_mov_b32 s6, -1
422; GFX6-NEXT:    s_mov_b32 s7, 0xf000
423; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
424; GFX6-NEXT:    s_load_dword s0, s[0:1], 0x0
425; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
426; GFX6-NEXT:    s_bfe_i32 s0, s0, 0x10000
427; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x10000
428; GFX6-NEXT:    v_mov_b32_e32 v0, s0
429; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
430; GFX6-NEXT:    s_endpgm
431  %x = load i32, i32 addrspace(1)* %in, align 4
432  %shl = shl i32 %x, 31
433  %shr = ashr i32 %shl, 31
434  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shr, i32 0, i32 1)
435  store i32 %bfe, i32 addrspace(1)* %out, align 4
436  ret void
437}
438
439define amdgpu_kernel void @bfe_u32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
440; GFX6-LABEL: bfe_u32_test_6:
441; GFX6:       ; %bb.0:
442; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
443; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
444; GFX6-NEXT:    s_mov_b32 s6, -1
445; GFX6-NEXT:    s_mov_b32 s7, 0xf000
446; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
447; GFX6-NEXT:    s_load_dword s0, s[0:1], 0x0
448; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
449; GFX6-NEXT:    s_lshl_b32 s0, s0, 31
450; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x1f0001
451; GFX6-NEXT:    v_mov_b32_e32 v0, s0
452; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
453; GFX6-NEXT:    s_endpgm
454  %x = load i32, i32 addrspace(1)* %in, align 4
455  %shl = shl i32 %x, 31
456  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 1, i32 31)
457  store i32 %bfe, i32 addrspace(1)* %out, align 4
458  ret void
459}
460
461define amdgpu_kernel void @bfe_u32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
462; GFX6-LABEL: bfe_u32_test_7:
463; GFX6:       ; %bb.0:
464; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
465; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
466; GFX6-NEXT:    s_mov_b32 s6, -1
467; GFX6-NEXT:    s_mov_b32 s7, 0xf000
468; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
469; GFX6-NEXT:    s_load_dword s0, s[0:1], 0x0
470; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
471; GFX6-NEXT:    s_lshl_b32 s0, s0, 31
472; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x1f0000
473; GFX6-NEXT:    v_mov_b32_e32 v0, s0
474; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
475; GFX6-NEXT:    s_endpgm
476  %x = load i32, i32 addrspace(1)* %in, align 4
477  %shl = shl i32 %x, 31
478  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 0, i32 31)
479  store i32 %bfe, i32 addrspace(1)* %out, align 4
480  ret void
481}
482
483define amdgpu_kernel void @bfe_u32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
484; GFX6-LABEL: bfe_u32_test_8:
485; GFX6:       ; %bb.0:
486; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
487; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
488; GFX6-NEXT:    s_mov_b32 s6, -1
489; GFX6-NEXT:    s_mov_b32 s7, 0xf000
490; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
491; GFX6-NEXT:    s_load_dword s0, s[0:1], 0x0
492; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
493; GFX6-NEXT:    s_lshl_b32 s0, s0, 31
494; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x1001f
495; GFX6-NEXT:    v_mov_b32_e32 v0, s0
496; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
497; GFX6-NEXT:    s_endpgm
498  %x = load i32, i32 addrspace(1)* %in, align 4
499  %shl = shl i32 %x, 31
500  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 31, i32 1)
501  store i32 %bfe, i32 addrspace(1)* %out, align 4
502  ret void
503}
504
505define amdgpu_kernel void @bfe_u32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
506; GFX6-LABEL: bfe_u32_test_9:
507; GFX6:       ; %bb.0:
508; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
509; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
510; GFX6-NEXT:    s_mov_b32 s6, -1
511; GFX6-NEXT:    s_mov_b32 s7, 0xf000
512; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
513; GFX6-NEXT:    s_load_dword s0, s[0:1], 0x0
514; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
515; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x1001f
516; GFX6-NEXT:    v_mov_b32_e32 v0, s0
517; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
518; GFX6-NEXT:    s_endpgm
519  %x = load i32, i32 addrspace(1)* %in, align 4
520  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 31, i32 1)
521  store i32 %bfe, i32 addrspace(1)* %out, align 4
522  ret void
523}
524
525define amdgpu_kernel void @bfe_u32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
526; GFX6-LABEL: bfe_u32_test_10:
527; GFX6:       ; %bb.0:
528; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
529; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
530; GFX6-NEXT:    s_mov_b32 s6, -1
531; GFX6-NEXT:    s_mov_b32 s7, 0xf000
532; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
533; GFX6-NEXT:    s_load_dword s0, s[0:1], 0x0
534; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
535; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x1f0001
536; GFX6-NEXT:    v_mov_b32_e32 v0, s0
537; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
538; GFX6-NEXT:    s_endpgm
539  %x = load i32, i32 addrspace(1)* %in, align 4
540  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 1, i32 31)
541  store i32 %bfe, i32 addrspace(1)* %out, align 4
542  ret void
543}
544
545define amdgpu_kernel void @bfe_u32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
546; GFX6-LABEL: bfe_u32_test_11:
547; GFX6:       ; %bb.0:
548; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
549; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
550; GFX6-NEXT:    s_mov_b32 s6, -1
551; GFX6-NEXT:    s_mov_b32 s7, 0xf000
552; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
553; GFX6-NEXT:    s_load_dword s0, s[0:1], 0x0
554; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
555; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x180008
556; GFX6-NEXT:    v_mov_b32_e32 v0, s0
557; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
558; GFX6-NEXT:    s_endpgm
559  %x = load i32, i32 addrspace(1)* %in, align 4
560  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 8, i32 24)
561  store i32 %bfe, i32 addrspace(1)* %out, align 4
562  ret void
563}
564
565define amdgpu_kernel void @bfe_u32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
566; GFX6-LABEL: bfe_u32_test_12:
567; GFX6:       ; %bb.0:
568; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
569; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
570; GFX6-NEXT:    s_mov_b32 s6, -1
571; GFX6-NEXT:    s_mov_b32 s7, 0xf000
572; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
573; GFX6-NEXT:    s_load_dword s0, s[0:1], 0x0
574; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
575; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x80018
576; GFX6-NEXT:    v_mov_b32_e32 v0, s0
577; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
578; GFX6-NEXT:    s_endpgm
579  %x = load i32, i32 addrspace(1)* %in, align 4
580  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 24, i32 8)
581  store i32 %bfe, i32 addrspace(1)* %out, align 4
582  ret void
583}
584
585; V_ASHRREV_U32_e32 {{v[0-9]+}}, 31, {{v[0-9]+}}
586define amdgpu_kernel void @bfe_u32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
587; GFX6-LABEL: bfe_u32_test_13:
588; GFX6:       ; %bb.0:
589; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
590; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
591; GFX6-NEXT:    s_mov_b32 s6, -1
592; GFX6-NEXT:    s_mov_b32 s7, 0xf000
593; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
594; GFX6-NEXT:    s_load_dword s0, s[0:1], 0x0
595; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
596; GFX6-NEXT:    s_ashr_i32 s0, s0, 31
597; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x1001f
598; GFX6-NEXT:    v_mov_b32_e32 v0, s0
599; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
600; GFX6-NEXT:    s_endpgm
601  %x = load i32, i32 addrspace(1)* %in, align 4
602  %shl = ashr i32 %x, 31
603  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 31, i32 1)
604  store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void
605}
606
607define amdgpu_kernel void @bfe_u32_test_14(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
608; GFX6-LABEL: bfe_u32_test_14:
609; GFX6:       ; %bb.0:
610; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
611; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
612; GFX6-NEXT:    s_mov_b32 s6, -1
613; GFX6-NEXT:    s_mov_b32 s7, 0xf000
614; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
615; GFX6-NEXT:    s_load_dword s0, s[0:1], 0x0
616; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
617; GFX6-NEXT:    s_lshr_b32 s0, s0, 31
618; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x1001f
619; GFX6-NEXT:    v_mov_b32_e32 v0, s0
620; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
621; GFX6-NEXT:    s_endpgm
622  %x = load i32, i32 addrspace(1)* %in, align 4
623  %shl = lshr i32 %x, 31
624  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 31, i32 1)
625  store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void
626}
627
628define amdgpu_kernel void @bfe_u32_constant_fold_test_0(i32 addrspace(1)* %out) #0 {
629; GFX6-LABEL: bfe_u32_constant_fold_test_0:
630; GFX6:       ; %bb.0:
631; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
632; GFX6-NEXT:    s_bfe_u32 s2, 0, 0
633; GFX6-NEXT:    v_mov_b32_e32 v0, s2
634; GFX6-NEXT:    s_mov_b32 s2, -1
635; GFX6-NEXT:    s_mov_b32 s3, 0xf000
636; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
637; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
638; GFX6-NEXT:    s_endpgm
639  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 0, i32 0, i32 0)
640  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
641  ret void
642}
643
644define amdgpu_kernel void @bfe_u32_constant_fold_test_1(i32 addrspace(1)* %out) #0 {
645; GFX6-LABEL: bfe_u32_constant_fold_test_1:
646; GFX6:       ; %bb.0:
647; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
648; GFX6-NEXT:    s_bfe_u32 s2, 0x302e, 0
649; GFX6-NEXT:    v_mov_b32_e32 v0, s2
650; GFX6-NEXT:    s_mov_b32 s2, -1
651; GFX6-NEXT:    s_mov_b32 s3, 0xf000
652; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
653; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
654; GFX6-NEXT:    s_endpgm
655  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 12334, i32 0, i32 0)
656  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
657  ret void
658}
659
660define amdgpu_kernel void @bfe_u32_constant_fold_test_2(i32 addrspace(1)* %out) #0 {
661; GFX6-LABEL: bfe_u32_constant_fold_test_2:
662; GFX6:       ; %bb.0:
663; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
664; GFX6-NEXT:    s_bfe_u32 s2, 0, 0x10000
665; GFX6-NEXT:    v_mov_b32_e32 v0, s2
666; GFX6-NEXT:    s_mov_b32 s2, -1
667; GFX6-NEXT:    s_mov_b32 s3, 0xf000
668; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
669; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
670; GFX6-NEXT:    s_endpgm
671  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 0, i32 0, i32 1)
672  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
673  ret void
674}
675
676define amdgpu_kernel void @bfe_u32_constant_fold_test_3(i32 addrspace(1)* %out) #0 {
677; GFX6-LABEL: bfe_u32_constant_fold_test_3:
678; GFX6:       ; %bb.0:
679; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
680; GFX6-NEXT:    s_bfe_u32 s2, 1, 0x10000
681; GFX6-NEXT:    v_mov_b32_e32 v0, s2
682; GFX6-NEXT:    s_mov_b32 s2, -1
683; GFX6-NEXT:    s_mov_b32 s3, 0xf000
684; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
685; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
686; GFX6-NEXT:    s_endpgm
687  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 1, i32 0, i32 1)
688  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
689  ret void
690}
691
692define amdgpu_kernel void @bfe_u32_constant_fold_test_4(i32 addrspace(1)* %out) #0 {
693; GFX6-LABEL: bfe_u32_constant_fold_test_4:
694; GFX6:       ; %bb.0:
695; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
696; GFX6-NEXT:    s_bfe_u32 s2, -1, 0x10000
697; GFX6-NEXT:    v_mov_b32_e32 v0, s2
698; GFX6-NEXT:    s_mov_b32 s2, -1
699; GFX6-NEXT:    s_mov_b32 s3, 0xf000
700; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
701; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
702; GFX6-NEXT:    s_endpgm
703  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 4294967295, i32 0, i32 1)
704  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
705  ret void
706}
707
708define amdgpu_kernel void @bfe_u32_constant_fold_test_5(i32 addrspace(1)* %out) #0 {
709; GFX6-LABEL: bfe_u32_constant_fold_test_5:
710; GFX6:       ; %bb.0:
711; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
712; GFX6-NEXT:    s_mov_b32 s2, 0x10007
713; GFX6-NEXT:    s_bfe_u32 s2, 0x80, s2
714; GFX6-NEXT:    v_mov_b32_e32 v0, s2
715; GFX6-NEXT:    s_mov_b32 s2, -1
716; GFX6-NEXT:    s_mov_b32 s3, 0xf000
717; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
718; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
719; GFX6-NEXT:    s_endpgm
720  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 128, i32 7, i32 1)
721  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
722  ret void
723}
724
725define amdgpu_kernel void @bfe_u32_constant_fold_test_6(i32 addrspace(1)* %out) #0 {
726; GFX6-LABEL: bfe_u32_constant_fold_test_6:
727; GFX6:       ; %bb.0:
728; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
729; GFX6-NEXT:    s_mov_b32 s2, 0x80000
730; GFX6-NEXT:    s_bfe_u32 s2, 0x80, s2
731; GFX6-NEXT:    v_mov_b32_e32 v0, s2
732; GFX6-NEXT:    s_mov_b32 s2, -1
733; GFX6-NEXT:    s_mov_b32 s3, 0xf000
734; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
735; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
736; GFX6-NEXT:    s_endpgm
737  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 128, i32 0, i32 8)
738  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
739  ret void
740}
741
742define amdgpu_kernel void @bfe_u32_constant_fold_test_7(i32 addrspace(1)* %out) #0 {
743; GFX6-LABEL: bfe_u32_constant_fold_test_7:
744; GFX6:       ; %bb.0:
745; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
746; GFX6-NEXT:    s_mov_b32 s2, 0x80000
747; GFX6-NEXT:    s_bfe_u32 s2, 0x7f, s2
748; GFX6-NEXT:    v_mov_b32_e32 v0, s2
749; GFX6-NEXT:    s_mov_b32 s2, -1
750; GFX6-NEXT:    s_mov_b32 s3, 0xf000
751; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
752; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
753; GFX6-NEXT:    s_endpgm
754  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 127, i32 0, i32 8)
755  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
756  ret void
757}
758
759define amdgpu_kernel void @bfe_u32_constant_fold_test_8(i32 addrspace(1)* %out) #0 {
760; GFX6-LABEL: bfe_u32_constant_fold_test_8:
761; GFX6:       ; %bb.0:
762; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
763; GFX6-NEXT:    s_mov_b32 s2, 0x80006
764; GFX6-NEXT:    s_bfe_u32 s2, 0x7f, s2
765; GFX6-NEXT:    v_mov_b32_e32 v0, s2
766; GFX6-NEXT:    s_mov_b32 s2, -1
767; GFX6-NEXT:    s_mov_b32 s3, 0xf000
768; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
769; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
770; GFX6-NEXT:    s_endpgm
771  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 127, i32 6, i32 8)
772  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
773  ret void
774}
775
776define amdgpu_kernel void @bfe_u32_constant_fold_test_9(i32 addrspace(1)* %out) #0 {
777; GFX6-LABEL: bfe_u32_constant_fold_test_9:
778; GFX6:       ; %bb.0:
779; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
780; GFX6-NEXT:    s_mov_b32 s2, 0x80010
781; GFX6-NEXT:    s_bfe_u32 s2, 0x10000, s2
782; GFX6-NEXT:    v_mov_b32_e32 v0, s2
783; GFX6-NEXT:    s_mov_b32 s2, -1
784; GFX6-NEXT:    s_mov_b32 s3, 0xf000
785; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
786; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
787; GFX6-NEXT:    s_endpgm
788  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 65536, i32 16, i32 8)
789  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
790  ret void
791}
792
793define amdgpu_kernel void @bfe_u32_constant_fold_test_10(i32 addrspace(1)* %out) #0 {
794; GFX6-LABEL: bfe_u32_constant_fold_test_10:
795; GFX6:       ; %bb.0:
796; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
797; GFX6-NEXT:    s_mov_b32 s2, 0x100010
798; GFX6-NEXT:    s_bfe_u32 s2, 0xffff, s2
799; GFX6-NEXT:    v_mov_b32_e32 v0, s2
800; GFX6-NEXT:    s_mov_b32 s2, -1
801; GFX6-NEXT:    s_mov_b32 s3, 0xf000
802; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
803; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
804; GFX6-NEXT:    s_endpgm
805  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 65535, i32 16, i32 16)
806  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
807  ret void
808}
809
810define amdgpu_kernel void @bfe_u32_constant_fold_test_11(i32 addrspace(1)* %out) #0 {
811; GFX6-LABEL: bfe_u32_constant_fold_test_11:
812; GFX6:       ; %bb.0:
813; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
814; GFX6-NEXT:    s_mov_b32 s2, 0x40004
815; GFX6-NEXT:    s_bfe_u32 s2, 0xa0, s2
816; GFX6-NEXT:    v_mov_b32_e32 v0, s2
817; GFX6-NEXT:    s_mov_b32 s2, -1
818; GFX6-NEXT:    s_mov_b32 s3, 0xf000
819; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
820; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
821; GFX6-NEXT:    s_endpgm
822  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 4, i32 4)
823  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
824  ret void
825}
826
827define amdgpu_kernel void @bfe_u32_constant_fold_test_12(i32 addrspace(1)* %out) #0 {
828; GFX6-LABEL: bfe_u32_constant_fold_test_12:
829; GFX6:       ; %bb.0:
830; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
831; GFX6-NEXT:    s_mov_b32 s2, 0x1001f
832; GFX6-NEXT:    s_bfe_u32 s2, 0xa0, s2
833; GFX6-NEXT:    v_mov_b32_e32 v0, s2
834; GFX6-NEXT:    s_mov_b32 s2, -1
835; GFX6-NEXT:    s_mov_b32 s3, 0xf000
836; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
837; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
838; GFX6-NEXT:    s_endpgm
839  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 31, i32 1)
840  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
841  ret void
842}
843
844define amdgpu_kernel void @bfe_u32_constant_fold_test_13(i32 addrspace(1)* %out) #0 {
845; GFX6-LABEL: bfe_u32_constant_fold_test_13:
846; GFX6:       ; %bb.0:
847; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
848; GFX6-NEXT:    s_mov_b32 s2, 0x100010
849; GFX6-NEXT:    s_bfe_u32 s2, 0x1fffe, s2
850; GFX6-NEXT:    v_mov_b32_e32 v0, s2
851; GFX6-NEXT:    s_mov_b32 s2, -1
852; GFX6-NEXT:    s_mov_b32 s3, 0xf000
853; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
854; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
855; GFX6-NEXT:    s_endpgm
856  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 131070, i32 16, i32 16)
857  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
858  ret void
859}
860
861define amdgpu_kernel void @bfe_u32_constant_fold_test_14(i32 addrspace(1)* %out) #0 {
862; GFX6-LABEL: bfe_u32_constant_fold_test_14:
863; GFX6:       ; %bb.0:
864; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
865; GFX6-NEXT:    s_mov_b32 s2, 0x1e0002
866; GFX6-NEXT:    s_bfe_u32 s2, 0xa0, s2
867; GFX6-NEXT:    v_mov_b32_e32 v0, s2
868; GFX6-NEXT:    s_mov_b32 s2, -1
869; GFX6-NEXT:    s_mov_b32 s3, 0xf000
870; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
871; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
872; GFX6-NEXT:    s_endpgm
873  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 2, i32 30)
874  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
875  ret void
876}
877
878define amdgpu_kernel void @bfe_u32_constant_fold_test_15(i32 addrspace(1)* %out) #0 {
879; GFX6-LABEL: bfe_u32_constant_fold_test_15:
880; GFX6:       ; %bb.0:
881; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
882; GFX6-NEXT:    s_mov_b32 s2, 0x1c0004
883; GFX6-NEXT:    s_bfe_u32 s2, 0xa0, s2
884; GFX6-NEXT:    v_mov_b32_e32 v0, s2
885; GFX6-NEXT:    s_mov_b32 s2, -1
886; GFX6-NEXT:    s_mov_b32 s3, 0xf000
887; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
888; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
889; GFX6-NEXT:    s_endpgm
890  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 4, i32 28)
891  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
892  ret void
893}
894
895define amdgpu_kernel void @bfe_u32_constant_fold_test_16(i32 addrspace(1)* %out) #0 {
896; GFX6-LABEL: bfe_u32_constant_fold_test_16:
897; GFX6:       ; %bb.0:
898; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
899; GFX6-NEXT:    s_bfe_u32 s2, -1, 0x70001
900; GFX6-NEXT:    v_mov_b32_e32 v0, s2
901; GFX6-NEXT:    s_mov_b32 s2, -1
902; GFX6-NEXT:    s_mov_b32 s3, 0xf000
903; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
904; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
905; GFX6-NEXT:    s_endpgm
906  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 4294967295, i32 1, i32 7)
907  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
908  ret void
909}
910
911define amdgpu_kernel void @bfe_u32_constant_fold_test_17(i32 addrspace(1)* %out) #0 {
912; GFX6-LABEL: bfe_u32_constant_fold_test_17:
913; GFX6:       ; %bb.0:
914; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
915; GFX6-NEXT:    s_mov_b32 s2, 0x1f0001
916; GFX6-NEXT:    s_bfe_u32 s2, 0xff, s2
917; GFX6-NEXT:    v_mov_b32_e32 v0, s2
918; GFX6-NEXT:    s_mov_b32 s2, -1
919; GFX6-NEXT:    s_mov_b32 s3, 0xf000
920; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
921; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
922; GFX6-NEXT:    s_endpgm
923  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 255, i32 1, i32 31)
924  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
925  ret void
926}
927
928define amdgpu_kernel void @bfe_u32_constant_fold_test_18(i32 addrspace(1)* %out) #0 {
929; GFX6-LABEL: bfe_u32_constant_fold_test_18:
930; GFX6:       ; %bb.0:
931; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
932; GFX6-NEXT:    s_mov_b32 s2, 0x1001f
933; GFX6-NEXT:    s_bfe_u32 s2, 0xff, s2
934; GFX6-NEXT:    v_mov_b32_e32 v0, s2
935; GFX6-NEXT:    s_mov_b32 s2, -1
936; GFX6-NEXT:    s_mov_b32 s3, 0xf000
937; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
938; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
939; GFX6-NEXT:    s_endpgm
940  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 255, i32 31, i32 1)
941  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
942  ret void
943}
944
945; Make sure that SimplifyDemandedBits doesn't cause the and to be
946; reduced to the bits demanded by the bfe.
947
948; XXX: The operand to v_bfe_u32 could also just directly be the load register.
949define amdgpu_kernel void @simplify_bfe_u32_multi_use_arg(i32 addrspace(1)* %out0,
950; GFX6-LABEL: simplify_bfe_u32_multi_use_arg:
951; GFX6:       ; %bb.0:
952; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
953; GFX6-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0xb
954; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xd
955; GFX6-NEXT:    s_mov_b32 s6, -1
956; GFX6-NEXT:    s_mov_b32 s7, 0xf000
957; GFX6-NEXT:    s_mov_b64 s[10:11], s[6:7]
958; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
959; GFX6-NEXT:    s_load_dword s0, s[0:1], 0x0
960; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
961; GFX6-NEXT:    s_and_b32 s0, s0, 63
962; GFX6-NEXT:    s_bfe_u32 s1, s0, 0x20002
963; GFX6-NEXT:    v_mov_b32_e32 v1, s1
964; GFX6-NEXT:    v_mov_b32_e32 v0, s0
965; GFX6-NEXT:    buffer_store_dword v1, off, s[4:7], 0
966; GFX6-NEXT:    buffer_store_dword v0, off, s[8:11], 0
967; GFX6-NEXT:    s_endpgm
968                                            i32 addrspace(1)* %out1,
969                                            i32 addrspace(1)* %in) #0 {
970  %src = load i32, i32 addrspace(1)* %in, align 4
971  %and = and i32 %src, 63
972  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %and, i32 2, i32 2)
973  store i32 %bfe_u32, i32 addrspace(1)* %out0, align 4
974  store i32 %and, i32 addrspace(1)* %out1, align 4
975  ret void
976}
977
978define amdgpu_kernel void @lshr_and(i32 addrspace(1)* %out, i32 %a) #0 {
979; GFX6-LABEL: lshr_and:
980; GFX6:       ; %bb.0:
981; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
982; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
983; GFX6-NEXT:    s_mov_b32 s6, -1
984; GFX6-NEXT:    s_mov_b32 s7, 0xf000
985; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
986; GFX6-NEXT:    s_lshr_b32 s0, s0, 6
987; GFX6-NEXT:    s_and_b32 s0, s0, 7
988; GFX6-NEXT:    v_mov_b32_e32 v0, s0
989; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
990; GFX6-NEXT:    s_endpgm
991  %b = lshr i32 %a, 6
992  %c = and i32 %b, 7
993  store i32 %c, i32 addrspace(1)* %out, align 8
994  ret void
995}
996
997define amdgpu_kernel void @v_lshr_and(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
998; GFX6-LABEL: v_lshr_and:
999; GFX6:       ; %bb.0:
1000; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
1001; GFX6-NEXT:    s_load_dword s2, s[0:1], 0xb
1002; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xc
1003; GFX6-NEXT:    s_mov_b32 s6, -1
1004; GFX6-NEXT:    s_mov_b32 s7, 0xf000
1005; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
1006; GFX6-NEXT:    s_lshr_b32 s0, s2, s0
1007; GFX6-NEXT:    s_and_b32 s0, s0, 7
1008; GFX6-NEXT:    v_mov_b32_e32 v0, s0
1009; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
1010; GFX6-NEXT:    s_endpgm
1011  %c = lshr i32 %a, %b
1012  %d = and i32 %c, 7
1013  store i32 %d, i32 addrspace(1)* %out, align 8
1014  ret void
1015}
1016
1017define amdgpu_kernel void @and_lshr(i32 addrspace(1)* %out, i32 %a) #0 {
1018; GFX6-LABEL: and_lshr:
1019; GFX6:       ; %bb.0:
1020; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
1021; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
1022; GFX6-NEXT:    s_mov_b32 s6, -1
1023; GFX6-NEXT:    s_mov_b32 s7, 0xf000
1024; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
1025; GFX6-NEXT:    s_and_b32 s0, s0, 0x1c0
1026; GFX6-NEXT:    s_lshr_b32 s0, s0, 6
1027; GFX6-NEXT:    v_mov_b32_e32 v0, s0
1028; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
1029; GFX6-NEXT:    s_endpgm
1030  %b = and i32 %a, 448
1031  %c = lshr i32 %b, 6
1032  store i32 %c, i32 addrspace(1)* %out, align 8
1033  ret void
1034}
1035
1036define amdgpu_kernel void @and_lshr2(i32 addrspace(1)* %out, i32 %a) #0 {
1037; GFX6-LABEL: and_lshr2:
1038; GFX6:       ; %bb.0:
1039; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
1040; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
1041; GFX6-NEXT:    s_mov_b32 s6, -1
1042; GFX6-NEXT:    s_mov_b32 s7, 0xf000
1043; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
1044; GFX6-NEXT:    s_and_b32 s0, s0, 0x1ff
1045; GFX6-NEXT:    s_lshr_b32 s0, s0, 6
1046; GFX6-NEXT:    v_mov_b32_e32 v0, s0
1047; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
1048; GFX6-NEXT:    s_endpgm
1049  %b = and i32 %a, 511
1050  %c = lshr i32 %b, 6
1051  store i32 %c, i32 addrspace(1)* %out, align 8
1052  ret void
1053}
1054
1055define amdgpu_kernel void @shl_lshr(i32 addrspace(1)* %out, i32 %a) #0 {
1056; GFX6-LABEL: shl_lshr:
1057; GFX6:       ; %bb.0:
1058; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
1059; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
1060; GFX6-NEXT:    s_mov_b32 s6, -1
1061; GFX6-NEXT:    s_mov_b32 s7, 0xf000
1062; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
1063; GFX6-NEXT:    s_lshl_b32 s0, s0, 9
1064; GFX6-NEXT:    s_lshr_b32 s0, s0, 11
1065; GFX6-NEXT:    v_mov_b32_e32 v0, s0
1066; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
1067; GFX6-NEXT:    s_endpgm
1068  %b = shl i32 %a, 9
1069  %c = lshr i32 %b, 11
1070  store i32 %c, i32 addrspace(1)* %out, align 8
1071  ret void
1072}
1073
1074declare i32 @llvm.amdgcn.ubfe.i32(i32, i32, i32) #1
1075declare i64 @llvm.amdgcn.ubfe.i64(i64, i32, i32) #1
1076
1077attributes #0 = { nounwind }
1078attributes #1 = { nounwind readnone }
1079