1; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
2; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
3; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
4
5; FUNC-LABEL: {{^}}udiv24_i8:
6; SI: v_cvt_f32_ubyte
7; SI: v_cvt_f32_ubyte
8; SI: v_rcp_f32
9; SI: v_cvt_u32_f32
10
11; EG: UINT_TO_FLT
12; EG-DAG: UINT_TO_FLT
13; EG-DAG: RECIP_IEEE
14; EG: FLT_TO_UINT
15define void @udiv24_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) {
16  %den_ptr = getelementptr i8 addrspace(1)* %in, i8 1
17  %num = load i8 addrspace(1) * %in
18  %den = load i8 addrspace(1) * %den_ptr
19  %result = udiv i8 %num, %den
20  store i8 %result, i8 addrspace(1)* %out
21  ret void
22}
23
24; FUNC-LABEL: {{^}}udiv24_i16:
25; SI: v_cvt_f32_u32
26; SI: v_cvt_f32_u32
27; SI: v_rcp_f32
28; SI: v_cvt_u32_f32
29
30; EG: UINT_TO_FLT
31; EG-DAG: UINT_TO_FLT
32; EG-DAG: RECIP_IEEE
33; EG: FLT_TO_UINT
34define void @udiv24_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
35  %den_ptr = getelementptr i16 addrspace(1)* %in, i16 1
36  %num = load i16 addrspace(1) * %in, align 2
37  %den = load i16 addrspace(1) * %den_ptr, align 2
38  %result = udiv i16 %num, %den
39  store i16 %result, i16 addrspace(1)* %out, align 2
40  ret void
41}
42
43; FUNC-LABEL: {{^}}udiv24_i32:
44; SI: v_cvt_f32_u32
45; SI-DAG: v_cvt_f32_u32
46; SI-DAG: v_rcp_f32
47; SI: v_cvt_u32_f32
48
49; EG: UINT_TO_FLT
50; EG-DAG: UINT_TO_FLT
51; EG-DAG: RECIP_IEEE
52; EG: FLT_TO_UINT
53define void @udiv24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
54  %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
55  %num = load i32 addrspace(1) * %in, align 4
56  %den = load i32 addrspace(1) * %den_ptr, align 4
57  %num.i24.0 = shl i32 %num, 8
58  %den.i24.0 = shl i32 %den, 8
59  %num.i24 = lshr i32 %num.i24.0, 8
60  %den.i24 = lshr i32 %den.i24.0, 8
61  %result = udiv i32 %num.i24, %den.i24
62  store i32 %result, i32 addrspace(1)* %out, align 4
63  ret void
64}
65
66; FUNC-LABEL: {{^}}udiv25_i32:
67; RCP_IFLAG is for URECIP in the full 32b alg
68; SI: v_rcp_iflag
69; SI-NOT: v_rcp_f32
70
71; EG-NOT: UINT_TO_FLT
72; EG-NOT: RECIP_IEEE
73define void @udiv25_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
74  %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
75  %num = load i32 addrspace(1) * %in, align 4
76  %den = load i32 addrspace(1) * %den_ptr, align 4
77  %num.i24.0 = shl i32 %num, 7
78  %den.i24.0 = shl i32 %den, 7
79  %num.i24 = lshr i32 %num.i24.0, 7
80  %den.i24 = lshr i32 %den.i24.0, 7
81  %result = udiv i32 %num.i24, %den.i24
82  store i32 %result, i32 addrspace(1)* %out, align 4
83  ret void
84}
85
86; FUNC-LABEL: {{^}}test_no_udiv24_i32_1:
87; RCP_IFLAG is for URECIP in the full 32b alg
88; SI: v_rcp_iflag
89; SI-NOT: v_rcp_f32
90
91; EG-NOT: UINT_TO_FLT
92; EG-NOT: RECIP_IEEE
93define void @test_no_udiv24_i32_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
94  %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
95  %num = load i32 addrspace(1) * %in, align 4
96  %den = load i32 addrspace(1) * %den_ptr, align 4
97  %num.i24.0 = shl i32 %num, 8
98  %den.i24.0 = shl i32 %den, 7
99  %num.i24 = lshr i32 %num.i24.0, 8
100  %den.i24 = lshr i32 %den.i24.0, 7
101  %result = udiv i32 %num.i24, %den.i24
102  store i32 %result, i32 addrspace(1)* %out, align 4
103  ret void
104}
105
106; FUNC-LABEL: {{^}}test_no_udiv24_i32_2:
107; RCP_IFLAG is for URECIP in the full 32b alg
108; SI: v_rcp_iflag
109; SI-NOT: v_rcp_f32
110
111; EG-NOT: UINT_TO_FLT
112; EG-NOT: RECIP_IEEE
113define void @test_no_udiv24_i32_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
114  %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
115  %num = load i32 addrspace(1) * %in, align 4
116  %den = load i32 addrspace(1) * %den_ptr, align 4
117  %num.i24.0 = shl i32 %num, 7
118  %den.i24.0 = shl i32 %den, 8
119  %num.i24 = lshr i32 %num.i24.0, 7
120  %den.i24 = lshr i32 %den.i24.0, 8
121  %result = udiv i32 %num.i24, %den.i24
122  store i32 %result, i32 addrspace(1)* %out, align 4
123  ret void
124}
125
126; FUNC-LABEL: {{^}}urem24_i8:
127; SI: v_cvt_f32_ubyte
128; SI: v_cvt_f32_ubyte
129; SI: v_rcp_f32
130; SI: v_cvt_u32_f32
131
132; EG: UINT_TO_FLT
133; EG-DAG: UINT_TO_FLT
134; EG-DAG: RECIP_IEEE
135; EG: FLT_TO_UINT
136define void @urem24_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) {
137  %den_ptr = getelementptr i8 addrspace(1)* %in, i8 1
138  %num = load i8 addrspace(1) * %in
139  %den = load i8 addrspace(1) * %den_ptr
140  %result = urem i8 %num, %den
141  store i8 %result, i8 addrspace(1)* %out
142  ret void
143}
144
145; FUNC-LABEL: {{^}}urem24_i16:
146; SI: v_cvt_f32_u32
147; SI: v_cvt_f32_u32
148; SI: v_rcp_f32
149; SI: v_cvt_u32_f32
150
151; EG: UINT_TO_FLT
152; EG-DAG: UINT_TO_FLT
153; EG-DAG: RECIP_IEEE
154; EG: FLT_TO_UINT
155define void @urem24_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
156  %den_ptr = getelementptr i16 addrspace(1)* %in, i16 1
157  %num = load i16 addrspace(1) * %in, align 2
158  %den = load i16 addrspace(1) * %den_ptr, align 2
159  %result = urem i16 %num, %den
160  store i16 %result, i16 addrspace(1)* %out, align 2
161  ret void
162}
163
164; FUNC-LABEL: {{^}}urem24_i32:
165; SI: v_cvt_f32_u32
166; SI: v_cvt_f32_u32
167; SI: v_rcp_f32
168; SI: v_cvt_u32_f32
169
170; EG: UINT_TO_FLT
171; EG-DAG: UINT_TO_FLT
172; EG-DAG: RECIP_IEEE
173; EG: FLT_TO_UINT
174define void @urem24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
175  %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
176  %num = load i32 addrspace(1) * %in, align 4
177  %den = load i32 addrspace(1) * %den_ptr, align 4
178  %num.i24.0 = shl i32 %num, 8
179  %den.i24.0 = shl i32 %den, 8
180  %num.i24 = lshr i32 %num.i24.0, 8
181  %den.i24 = lshr i32 %den.i24.0, 8
182  %result = urem i32 %num.i24, %den.i24
183  store i32 %result, i32 addrspace(1)* %out, align 4
184  ret void
185}
186
187; FUNC-LABEL: {{^}}urem25_i32:
188; RCP_IFLAG is for URECIP in the full 32b alg
189; SI: v_rcp_iflag
190; SI-NOT: v_rcp_f32
191
192; EG-NOT: UINT_TO_FLT
193; EG-NOT: RECIP_IEEE
194define void @urem25_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
195  %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
196  %num = load i32 addrspace(1) * %in, align 4
197  %den = load i32 addrspace(1) * %den_ptr, align 4
198  %num.i24.0 = shl i32 %num, 7
199  %den.i24.0 = shl i32 %den, 7
200  %num.i24 = lshr i32 %num.i24.0, 7
201  %den.i24 = lshr i32 %den.i24.0, 7
202  %result = urem i32 %num.i24, %den.i24
203  store i32 %result, i32 addrspace(1)* %out, align 4
204  ret void
205}
206
207; FUNC-LABEL: {{^}}test_no_urem24_i32_1:
208; RCP_IFLAG is for URECIP in the full 32b alg
209; SI: v_rcp_iflag
210; SI-NOT: v_rcp_f32
211
212; EG-NOT: UINT_TO_FLT
213; EG-NOT: RECIP_IEEE
214define void @test_no_urem24_i32_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
215  %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
216  %num = load i32 addrspace(1) * %in, align 4
217  %den = load i32 addrspace(1) * %den_ptr, align 4
218  %num.i24.0 = shl i32 %num, 8
219  %den.i24.0 = shl i32 %den, 7
220  %num.i24 = lshr i32 %num.i24.0, 8
221  %den.i24 = lshr i32 %den.i24.0, 7
222  %result = urem i32 %num.i24, %den.i24
223  store i32 %result, i32 addrspace(1)* %out, align 4
224  ret void
225}
226
227; FUNC-LABEL: {{^}}test_no_urem24_i32_2:
228; RCP_IFLAG is for URECIP in the full 32b alg
229; SI: v_rcp_iflag
230; SI-NOT: v_rcp_f32
231
232; EG-NOT: UINT_TO_FLT
233; EG-NOT: RECIP_IEEE
234define void @test_no_urem24_i32_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
235  %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
236  %num = load i32 addrspace(1) * %in, align 4
237  %den = load i32 addrspace(1) * %den_ptr, align 4
238  %num.i24.0 = shl i32 %num, 7
239  %den.i24.0 = shl i32 %den, 8
240  %num.i24 = lshr i32 %num.i24.0, 7
241  %den.i24 = lshr i32 %den.i24.0, 8
242  %result = urem i32 %num.i24, %den.i24
243  store i32 %result, i32 addrspace(1)* %out, align 4
244  ret void
245}
246