1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
2; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
3; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=FUNC %s
4
5declare i32 @llvm.amdgcn.workitem.id.x() #1
6
7declare i16 @llvm.bitreverse.i16(i16) #1
8declare i32 @llvm.bitreverse.i32(i32) #1
9declare i64 @llvm.bitreverse.i64(i64) #1
10
11declare <2 x i32> @llvm.bitreverse.v2i32(<2 x i32>) #1
12declare <4 x i32> @llvm.bitreverse.v4i32(<4 x i32>) #1
13
14declare <2 x i64> @llvm.bitreverse.v2i64(<2 x i64>) #1
15declare <4 x i64> @llvm.bitreverse.v4i64(<4 x i64>) #1
16
17; FUNC-LABEL: {{^}}s_brev_i16:
18; SI: s_brev_b32
19define amdgpu_kernel void @s_brev_i16(i16 addrspace(1)* noalias %out, i16 %val) #0 {
20  %brev = call i16 @llvm.bitreverse.i16(i16 %val) #1
21  store i16 %brev, i16 addrspace(1)* %out
22  ret void
23}
24
25; FUNC-LABEL: {{^}}v_brev_i16:
26; SI: v_bfrev_b32_e32
27define amdgpu_kernel void @v_brev_i16(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %valptr) #0 {
28  %val = load i16, i16 addrspace(1)* %valptr
29  %brev = call i16 @llvm.bitreverse.i16(i16 %val) #1
30  store i16 %brev, i16 addrspace(1)* %out
31  ret void
32}
33
34; FUNC-LABEL: {{^}}s_brev_i32:
35; SI: s_load_dword [[VAL:s[0-9]+]],
36; SI: s_brev_b32 [[SRESULT:s[0-9]+]], [[VAL]]
37; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
38; SI: buffer_store_dword [[VRESULT]],
39; SI: s_endpgm
40define amdgpu_kernel void @s_brev_i32(i32 addrspace(1)* noalias %out, i32 %val) #0 {
41  %brev = call i32 @llvm.bitreverse.i32(i32 %val) #1
42  store i32 %brev, i32 addrspace(1)* %out
43  ret void
44}
45
46; FUNC-LABEL: {{^}}v_brev_i32:
47; SI: {{buffer|flat}}_load_dword [[VAL:v[0-9]+]],
48; SI: v_bfrev_b32_e32 [[RESULT:v[0-9]+]], [[VAL]]
49; SI: buffer_store_dword [[RESULT]],
50; SI: s_endpgm
51define amdgpu_kernel void @v_brev_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) #0 {
52  %tid = call i32 @llvm.amdgcn.workitem.id.x()
53  %gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
54  %val = load i32, i32 addrspace(1)* %gep
55  %brev = call i32 @llvm.bitreverse.i32(i32 %val) #1
56  store i32 %brev, i32 addrspace(1)* %out
57  ret void
58}
59
60; FUNC-LABEL: {{^}}s_brev_v2i32:
61; SI: s_brev_b32
62; SI: s_brev_b32
63define amdgpu_kernel void @s_brev_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> %val) #0 {
64  %brev = call <2 x i32> @llvm.bitreverse.v2i32(<2 x i32> %val) #1
65  store <2 x i32> %brev, <2 x i32> addrspace(1)* %out
66  ret void
67}
68
69; FUNC-LABEL: {{^}}v_brev_v2i32:
70; SI: v_bfrev_b32_e32
71; SI: v_bfrev_b32_e32
72define amdgpu_kernel void @v_brev_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %valptr) #0 {
73  %tid = call i32 @llvm.amdgcn.workitem.id.x()
74  %gep = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %valptr, i32 %tid
75  %val = load <2 x i32>, <2 x i32> addrspace(1)* %gep
76  %brev = call <2 x i32> @llvm.bitreverse.v2i32(<2 x i32> %val) #1
77  store <2 x i32> %brev, <2 x i32> addrspace(1)* %out
78  ret void
79}
80
81; FUNC-LABEL: {{^}}s_brev_i64:
82define amdgpu_kernel void @s_brev_i64(i64 addrspace(1)* noalias %out, i64 %val) #0 {
83  %brev = call i64 @llvm.bitreverse.i64(i64 %val) #1
84  store i64 %brev, i64 addrspace(1)* %out
85  ret void
86}
87
88; FUNC-LABEL: {{^}}v_brev_i64:
89; SI-NOT: v_or_b32_e64 v{{[0-9]+}}, 0, 0
90define amdgpu_kernel void @v_brev_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %valptr) #0 {
91  %tid = call i32 @llvm.amdgcn.workitem.id.x()
92  %gep = getelementptr i64, i64 addrspace(1)* %valptr, i32 %tid
93  %val = load i64, i64 addrspace(1)* %gep
94  %brev = call i64 @llvm.bitreverse.i64(i64 %val) #1
95  store i64 %brev, i64 addrspace(1)* %out
96  ret void
97}
98
99; FUNC-LABEL: {{^}}s_brev_v2i64:
100define amdgpu_kernel void @s_brev_v2i64(<2 x i64> addrspace(1)* noalias %out, <2 x i64> %val) #0 {
101  %brev = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> %val) #1
102  store <2 x i64> %brev, <2 x i64> addrspace(1)* %out
103  ret void
104}
105
106; FUNC-LABEL: {{^}}v_brev_v2i64:
107define amdgpu_kernel void @v_brev_v2i64(<2 x i64> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %valptr) #0 {
108  %tid = call i32 @llvm.amdgcn.workitem.id.x()
109  %gep = getelementptr <2 x i64> , <2 x i64> addrspace(1)* %valptr, i32 %tid
110  %val = load <2 x i64>, <2 x i64> addrspace(1)* %gep
111  %brev = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> %val) #1
112  store <2 x i64> %brev, <2 x i64> addrspace(1)* %out
113  ret void
114}
115
116; FUNC-LABEL: {{^}}missing_truncate_promote_bitreverse:
117; VI: v_bfrev_b32_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0
118define float @missing_truncate_promote_bitreverse(i32 %arg) {
119bb:
120  %tmp = trunc i32 %arg to i16
121  %tmp1 = call i16 @llvm.bitreverse.i16(i16 %tmp)
122  %tmp2 = bitcast i16 %tmp1 to half
123  %tmp3 = fpext half %tmp2 to float
124  ret float %tmp3
125}
126
127attributes #0 = { nounwind }
128attributes #1 = { nounwind readnone }
129