1; RUN: llc -march=amdgcn -mcpu=SI < %s
2; RUN: llc -march=amdgcn -mcpu=tonga < %s
3; RUN: llc -march=r600 -mcpu=redwood < %s
4
5define void @srem_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
6  %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
7  %num = load i32 addrspace(1) * %in
8  %den = load i32 addrspace(1) * %den_ptr
9  %result = srem i32 %num, %den
10  store i32 %result, i32 addrspace(1)* %out
11  ret void
12}
13
14define void @srem_i32_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
15  %num = load i32 addrspace(1) * %in
16  %result = srem i32 %num, 4
17  store i32 %result, i32 addrspace(1)* %out
18  ret void
19}
20
21define void @srem_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
22  %den_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
23  %num = load <2 x i32> addrspace(1) * %in
24  %den = load <2 x i32> addrspace(1) * %den_ptr
25  %result = srem <2 x i32> %num, %den
26  store <2 x i32> %result, <2 x i32> addrspace(1)* %out
27  ret void
28}
29
30define void @srem_v2i32_4(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
31  %num = load <2 x i32> addrspace(1) * %in
32  %result = srem <2 x i32> %num, <i32 4, i32 4>
33  store <2 x i32> %result, <2 x i32> addrspace(1)* %out
34  ret void
35}
36
37define void @srem_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
38  %den_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
39  %num = load <4 x i32> addrspace(1) * %in
40  %den = load <4 x i32> addrspace(1) * %den_ptr
41  %result = srem <4 x i32> %num, %den
42  store <4 x i32> %result, <4 x i32> addrspace(1)* %out
43  ret void
44}
45
46define void @srem_v4i32_4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
47  %num = load <4 x i32> addrspace(1) * %in
48  %result = srem <4 x i32> %num, <i32 4, i32 4, i32 4, i32 4>
49  store <4 x i32> %result, <4 x i32> addrspace(1)* %out
50  ret void
51}
52