1; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
2
3; GCN-LABEL: {{^}}lsh8_or_and:
4; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x6050400
5; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
6define amdgpu_kernel void @lsh8_or_and(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
7bb:
8  %id = tail call i32 @llvm.amdgcn.workitem.id.x()
9  %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
10  %tmp = load i32, i32 addrspace(1)* %gep, align 4
11  %tmp2 = shl i32 %tmp, 8
12  %tmp3 = and i32 %arg1, 255
13  %tmp4 = or i32 %tmp2, %tmp3
14  store i32 %tmp4, i32 addrspace(1)* %gep, align 4
15  ret void
16}
17
18; GCN-LABEL: {{^}}lsr24_or_and:
19; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x7060503
20; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
21define amdgpu_kernel void @lsr24_or_and(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
22bb:
23  %id = tail call i32 @llvm.amdgcn.workitem.id.x()
24  %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
25  %tmp = load i32, i32 addrspace(1)* %gep, align 4
26  %tmp2 = lshr i32 %tmp, 24
27  %tmp3 = and i32 %arg1, 4294967040 ; 0xffffff00
28  %tmp4 = or i32 %tmp2, %tmp3
29  store i32 %tmp4, i32 addrspace(1)* %gep, align 4
30  ret void
31}
32
33; GCN-LABEL: {{^}}and_or_lsr24:
34; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x7060503
35; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
36define amdgpu_kernel void @and_or_lsr24(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
37bb:
38  %id = tail call i32 @llvm.amdgcn.workitem.id.x()
39  %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
40  %tmp = load i32, i32 addrspace(1)* %gep, align 4
41  %tmp2 = and i32 %tmp, 4294967040 ; 0xffffff00
42  %tmp3 = lshr i32 %arg1, 24
43  %tmp4 = or i32 %tmp2, %tmp3
44  %tmp5 = xor i32 %tmp4, -2147483648
45  store i32 %tmp5, i32 addrspace(1)* %gep, align 4
46  ret void
47}
48
49; GCN-LABEL: {{^}}and_or_and:
50; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x7020500
51; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
52define amdgpu_kernel void @and_or_and(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
53bb:
54  %id = tail call i32 @llvm.amdgcn.workitem.id.x()
55  %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
56  %tmp = load i32, i32 addrspace(1)* %gep, align 4
57  %tmp2 = and i32 %tmp, -16711936
58  %tmp3 = and i32 %arg1, 16711935
59  %tmp4 = or i32 %tmp2, %tmp3
60  store i32 %tmp4, i32 addrspace(1)* %gep, align 4
61  ret void
62}
63
64; GCN-LABEL: {{^}}lsh8_or_lsr24:
65; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x6050403
66; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
67define amdgpu_kernel void @lsh8_or_lsr24(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
68bb:
69  %id = tail call i32 @llvm.amdgcn.workitem.id.x()
70  %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
71  %tmp = load i32, i32 addrspace(1)* %gep, align 4
72  %tmp2 = shl i32 %tmp, 8
73  %tmp3 = lshr i32 %arg1, 24
74  %tmp4 = or i32 %tmp2, %tmp3
75  store i32 %tmp4, i32 addrspace(1)* %gep, align 4
76  ret void
77}
78
79; GCN-LABEL: {{^}}lsh16_or_lsr24:
80; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x5040c03
81; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
82define amdgpu_kernel void @lsh16_or_lsr24(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
83bb:
84  %id = tail call i32 @llvm.amdgcn.workitem.id.x()
85  %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
86  %tmp = load i32, i32 addrspace(1)* %gep, align 4
87  %tmp2 = shl i32 %tmp, 16
88  %tmp3 = lshr i32 %arg1, 24
89  %tmp4 = or i32 %tmp2, %tmp3
90  store i32 %tmp4, i32 addrspace(1)* %gep, align 4
91  ret void
92}
93
94; GCN-LABEL: {{^}}and_xor_and:
95; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x7020104
96; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
97define amdgpu_kernel void @and_xor_and(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
98bb:
99  %id = tail call i32 @llvm.amdgcn.workitem.id.x()
100  %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
101  %tmp = load i32, i32 addrspace(1)* %gep, align 4
102  %tmp2 = and i32 %tmp, -16776961
103  %tmp3 = and i32 %arg1, 16776960
104  %tmp4 = xor i32 %tmp2, %tmp3
105  store i32 %tmp4, i32 addrspace(1)* %gep, align 4
106  ret void
107}
108
109; GCN-LABEL: {{^}}and_or_or_and:
110; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0xffff0500
111; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
112define amdgpu_kernel void @and_or_or_and(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
113bb:
114  %id = tail call i32 @llvm.amdgcn.workitem.id.x()
115  %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
116  %tmp = load i32, i32 addrspace(1)* %gep, align 4
117  %and = and i32 %tmp, 16711935     ; 0x00ff00ff
118  %tmp1 = and i32 %arg1, 4294967040 ; 0xffffff00
119  %tmp2 = or i32 %tmp1, -65536
120  %tmp3 = or i32 %tmp2, %and
121  store i32 %tmp3, i32 addrspace(1)* %gep, align 4
122  ret void
123}
124
125; GCN-LABEL: {{^}}and_or_and_shl:
126; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x50c0c00
127; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
128define amdgpu_kernel void @and_or_and_shl(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
129bb:
130  %id = tail call i32 @llvm.amdgcn.workitem.id.x()
131  %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
132  %tmp = load i32, i32 addrspace(1)* %gep, align 4
133  %tmp2 = shl i32 %tmp, 16
134  %tmp3 = and i32 %arg1, 65535
135  %tmp4 = or i32 %tmp2, %tmp3
136  %and = and i32 %tmp4, 4278190335
137  store i32 %and, i32 addrspace(1)* %gep, align 4
138  ret void
139}
140
141; GCN-LABEL: {{^}}or_and_or:
142; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x7020104
143; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
144define amdgpu_kernel void @or_and_or(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
145bb:
146  %id = tail call i32 @llvm.amdgcn.workitem.id.x()
147  %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
148  %tmp = load i32, i32 addrspace(1)* %gep, align 4
149  %or1 = or i32 %tmp, 16776960    ; 0x00ffff00
150  %or2 = or i32 %arg1, 4278190335 ; 0xff0000ff
151  %and = and i32 %or1, %or2
152  store i32 %and, i32 addrspace(1)* %gep, align 4
153  ret void
154}
155
156; GCN-LABEL: {{^}}known_ffff0500:
157; GCN-DAG: v_mov_b32_e32 [[MASK:v[0-9]+]], 0xffff0500
158; GCN-DAG: v_mov_b32_e32 [[RES:v[0-9]+]], 0xffff8004
159; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
160; GCN: store_dword v[{{[0-9:]+}}], [[RES]]{{$}}
161define amdgpu_kernel void @known_ffff0500(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
162bb:
163  %id = tail call i32 @llvm.amdgcn.workitem.id.x()
164  %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
165  %load = load i32, i32 addrspace(1)* %gep, align 4
166  %mask1 = or i32 %arg1, 32768 ; 0x8000
167  %mask2 = or i32 %load, 4
168  %and = and i32 %mask2, 16711935     ; 0x00ff00ff
169  %tmp1 = and i32 %mask1, 4294967040 ; 0xffffff00
170  %tmp2 = or i32 %tmp1, 4294901760   ; 0xffff0000
171  %tmp3 = or i32 %tmp2, %and
172  store i32 %tmp3, i32 addrspace(1)* %gep, align 4
173  %v = and i32 %tmp3, 4294934532 ; 0xffff8004
174  store i32 %v, i32 addrspace(1)* %arg, align 4
175  ret void
176}
177
178; GCN-LABEL: {{^}}known_050c0c00:
179; GCN-DAG: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x50c0c00
180; GCN-DAG: v_mov_b32_e32 [[RES:v[0-9]+]], 4{{$}}
181; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
182; GCN: store_dword v[{{[0-9:]+}}], [[RES]]{{$}}
183define amdgpu_kernel void @known_050c0c00(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
184bb:
185  %id = tail call i32 @llvm.amdgcn.workitem.id.x()
186  %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
187  %tmp = load i32, i32 addrspace(1)* %gep, align 4
188  %tmp2 = shl i32 %tmp, 16
189  %mask = or i32 %arg1, 4
190  %tmp3 = and i32 %mask, 65535
191  %tmp4 = or i32 %tmp2, %tmp3
192  %and = and i32 %tmp4, 4278190335
193  store i32 %and, i32 addrspace(1)* %gep, align 4
194  %v = and i32 %and, 16776964
195  store i32 %v, i32 addrspace(1)* %arg, align 4
196  ret void
197}
198
199; GCN-LABEL: {{^}}known_ffff8004:
200; GCN-DAG: v_mov_b32_e32 [[MASK:v[0-9]+]], 0xffff0500
201; GCN-DAG: v_mov_b32_e32 [[RES:v[0-9]+]], 0xffff8004
202; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
203; GCN: store_dword v[{{[0-9:]+}}], [[RES]]{{$}}
204define amdgpu_kernel void @known_ffff8004(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
205bb:
206  %id = tail call i32 @llvm.amdgcn.workitem.id.x()
207  %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
208  %load = load i32, i32 addrspace(1)* %gep, align 4
209  %mask1 = or i32 %arg1, 4
210  %mask2 = or i32 %load, 32768 ; 0x8000
211  %and = and i32 %mask1, 16711935     ; 0x00ff00ff
212  %tmp1 = and i32 %mask2, 4294967040 ; 0xffffff00
213  %tmp2 = or i32 %tmp1, 4294901760   ; 0xffff0000
214  %tmp3 = or i32 %tmp2, %and
215  store i32 %tmp3, i32 addrspace(1)* %gep, align 4
216  %v = and i32 %tmp3, 4294934532 ; 0xffff8004
217  store i32 %v, i32 addrspace(1)* %arg, align 4
218  ret void
219}
220
221declare i32 @llvm.amdgcn.workitem.id.x()
222