1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-amd-amdpal < %s | FileCheck -check-prefixes=CHECK,GISEL %s
3; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-amd-amdpal < %s | FileCheck -check-prefixes=CHECK,CGP %s
4
5; The same 32-bit expansion is implemented in the legalizer and in AMDGPUCodeGenPrepare.
6
7define i32 @v_udiv_i32(i32 %num, i32 %den) {
8; GISEL-LABEL: v_udiv_i32:
9; GISEL:       ; %bb.0:
10; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11; GISEL-NEXT:    v_cvt_f32_u32_e32 v2, v1
12; GISEL-NEXT:    v_sub_i32_e32 v3, vcc, 0, v1
13; GISEL-NEXT:    v_rcp_iflag_f32_e32 v2, v2
14; GISEL-NEXT:    v_mul_f32_e32 v2, 0x4f7ffffe, v2
15; GISEL-NEXT:    v_cvt_u32_f32_e32 v2, v2
16; GISEL-NEXT:    v_mul_lo_u32 v3, v3, v2
17; GISEL-NEXT:    v_mul_hi_u32 v3, v2, v3
18; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
19; GISEL-NEXT:    v_mul_hi_u32 v2, v0, v2
20; GISEL-NEXT:    v_mul_lo_u32 v3, v2, v1
21; GISEL-NEXT:    v_add_i32_e32 v4, vcc, 1, v2
22; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v3
23; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
24; GISEL-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
25; GISEL-NEXT:    v_sub_i32_e64 v3, s[4:5], v0, v1
26; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
27; GISEL-NEXT:    v_add_i32_e32 v3, vcc, 1, v2
28; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
29; GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc
30; GISEL-NEXT:    s_setpc_b64 s[30:31]
31;
32; CGP-LABEL: v_udiv_i32:
33; CGP:       ; %bb.0:
34; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
35; CGP-NEXT:    v_cvt_f32_u32_e32 v2, v1
36; CGP-NEXT:    v_sub_i32_e32 v3, vcc, 0, v1
37; CGP-NEXT:    v_rcp_f32_e32 v2, v2
38; CGP-NEXT:    v_mul_f32_e32 v2, 0x4f7ffffe, v2
39; CGP-NEXT:    v_cvt_u32_f32_e32 v2, v2
40; CGP-NEXT:    v_mul_lo_u32 v3, v3, v2
41; CGP-NEXT:    v_mul_lo_u32 v4, 0, v3
42; CGP-NEXT:    v_mul_hi_u32 v3, v2, v3
43; CGP-NEXT:    v_add_i32_e32 v3, vcc, v4, v3
44; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
45; CGP-NEXT:    v_mul_lo_u32 v3, 0, v2
46; CGP-NEXT:    v_mul_hi_u32 v2, v0, v2
47; CGP-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
48; CGP-NEXT:    v_mul_lo_u32 v3, v2, v1
49; CGP-NEXT:    v_add_i32_e32 v4, vcc, 1, v2
50; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v3
51; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
52; CGP-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
53; CGP-NEXT:    v_sub_i32_e64 v3, s[4:5], v0, v1
54; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
55; CGP-NEXT:    v_add_i32_e32 v3, vcc, 1, v2
56; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
57; CGP-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc
58; CGP-NEXT:    s_setpc_b64 s[30:31]
59  %result = udiv i32 %num, %den
60  ret i32 %result
61}
62
63; FIXME: This is a workaround for not handling uniform VGPR case.
64declare i32 @llvm.amdgcn.readfirstlane(i32)
65
66define amdgpu_ps i32 @s_udiv_i32(i32 inreg %num, i32 inreg %den) {
67; GISEL-LABEL: s_udiv_i32:
68; GISEL:       ; %bb.0:
69; GISEL-NEXT:    v_cvt_f32_u32_e32 v0, s1
70; GISEL-NEXT:    s_sub_i32 s2, 0, s1
71; GISEL-NEXT:    v_rcp_iflag_f32_e32 v0, v0
72; GISEL-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
73; GISEL-NEXT:    v_cvt_u32_f32_e32 v0, v0
74; GISEL-NEXT:    v_mul_lo_u32 v1, s2, v0
75; GISEL-NEXT:    v_mul_hi_u32 v1, v0, v1
76; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
77; GISEL-NEXT:    v_mul_hi_u32 v0, s0, v0
78; GISEL-NEXT:    v_mul_lo_u32 v1, v0, s1
79; GISEL-NEXT:    v_add_i32_e32 v2, vcc, 1, v0
80; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, s0, v1
81; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s1, v1
82; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
83; GISEL-NEXT:    v_subrev_i32_e64 v2, s[2:3], s1, v1
84; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
85; GISEL-NEXT:    v_add_i32_e32 v2, vcc, 1, v0
86; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s1, v1
87; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
88; GISEL-NEXT:    v_readfirstlane_b32 s0, v0
89; GISEL-NEXT:    ; return to shader part epilog
90;
91; CGP-LABEL: s_udiv_i32:
92; CGP:       ; %bb.0:
93; CGP-NEXT:    v_cvt_f32_u32_e32 v0, s1
94; CGP-NEXT:    s_sub_i32 s2, 0, s1
95; CGP-NEXT:    v_rcp_f32_e32 v0, v0
96; CGP-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
97; CGP-NEXT:    v_cvt_u32_f32_e32 v0, v0
98; CGP-NEXT:    v_mul_lo_u32 v1, s2, v0
99; CGP-NEXT:    v_mul_lo_u32 v2, 0, v1
100; CGP-NEXT:    v_mul_hi_u32 v1, v0, v1
101; CGP-NEXT:    v_add_i32_e32 v1, vcc, v2, v1
102; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
103; CGP-NEXT:    v_mul_lo_u32 v1, 0, v0
104; CGP-NEXT:    v_mul_hi_u32 v0, s0, v0
105; CGP-NEXT:    v_add_i32_e32 v0, vcc, v1, v0
106; CGP-NEXT:    v_mul_lo_u32 v1, v0, s1
107; CGP-NEXT:    v_add_i32_e32 v2, vcc, 1, v0
108; CGP-NEXT:    v_sub_i32_e32 v1, vcc, s0, v1
109; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s1, v1
110; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
111; CGP-NEXT:    v_subrev_i32_e64 v2, s[2:3], s1, v1
112; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
113; CGP-NEXT:    v_add_i32_e32 v2, vcc, 1, v0
114; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s1, v1
115; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
116; CGP-NEXT:    v_readfirstlane_b32 s0, v0
117; CGP-NEXT:    ; return to shader part epilog
118  %result = udiv i32 %num, %den
119  %readlane = call i32 @llvm.amdgcn.readfirstlane(i32 %result)
120  ret i32 %readlane
121}
122
123define <2 x i32> @v_udiv_v2i32(<2 x i32> %num, <2 x i32> %den) {
124; GISEL-LABEL: v_udiv_v2i32:
125; GISEL:       ; %bb.0:
126; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
127; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, v2
128; GISEL-NEXT:    v_sub_i32_e32 v5, vcc, 0, v2
129; GISEL-NEXT:    v_cvt_f32_u32_e32 v6, v3
130; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, 0, v3
131; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
132; GISEL-NEXT:    v_rcp_iflag_f32_e32 v6, v6
133; GISEL-NEXT:    v_mul_f32_e32 v4, 0x4f7ffffe, v4
134; GISEL-NEXT:    v_mul_f32_e32 v6, 0x4f7ffffe, v6
135; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
136; GISEL-NEXT:    v_cvt_u32_f32_e32 v6, v6
137; GISEL-NEXT:    v_mul_lo_u32 v5, v5, v4
138; GISEL-NEXT:    v_mul_lo_u32 v7, v7, v6
139; GISEL-NEXT:    v_mul_hi_u32 v5, v4, v5
140; GISEL-NEXT:    v_mul_hi_u32 v7, v6, v7
141; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
142; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v6, v7
143; GISEL-NEXT:    v_mul_hi_u32 v4, v0, v4
144; GISEL-NEXT:    v_mul_hi_u32 v5, v1, v5
145; GISEL-NEXT:    v_mul_lo_u32 v6, v4, v2
146; GISEL-NEXT:    v_add_i32_e32 v7, vcc, 1, v4
147; GISEL-NEXT:    v_mul_lo_u32 v8, v5, v3
148; GISEL-NEXT:    v_add_i32_e32 v9, vcc, 1, v5
149; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v6
150; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v8
151; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
152; GISEL-NEXT:    v_cndmask_b32_e32 v4, v4, v7, vcc
153; GISEL-NEXT:    v_sub_i32_e64 v6, s[4:5], v0, v2
154; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v3
155; GISEL-NEXT:    v_cndmask_b32_e64 v5, v5, v9, s[4:5]
156; GISEL-NEXT:    v_sub_i32_e64 v7, s[6:7], v1, v3
157; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
158; GISEL-NEXT:    v_add_i32_e32 v6, vcc, 1, v4
159; GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, v7, s[4:5]
160; GISEL-NEXT:    v_add_i32_e32 v7, vcc, 1, v5
161; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
162; GISEL-NEXT:    v_cndmask_b32_e32 v0, v4, v6, vcc
163; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
164; GISEL-NEXT:    v_cndmask_b32_e32 v1, v5, v7, vcc
165; GISEL-NEXT:    s_setpc_b64 s[30:31]
166;
167; CGP-LABEL: v_udiv_v2i32:
168; CGP:       ; %bb.0:
169; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
170; CGP-NEXT:    v_cvt_f32_u32_e32 v4, v2
171; CGP-NEXT:    v_sub_i32_e32 v5, vcc, 0, v2
172; CGP-NEXT:    v_cvt_f32_u32_e32 v6, v3
173; CGP-NEXT:    v_sub_i32_e32 v7, vcc, 0, v3
174; CGP-NEXT:    v_rcp_f32_e32 v4, v4
175; CGP-NEXT:    v_rcp_f32_e32 v6, v6
176; CGP-NEXT:    v_mul_f32_e32 v4, 0x4f7ffffe, v4
177; CGP-NEXT:    v_mul_f32_e32 v6, 0x4f7ffffe, v6
178; CGP-NEXT:    v_cvt_u32_f32_e32 v4, v4
179; CGP-NEXT:    v_cvt_u32_f32_e32 v6, v6
180; CGP-NEXT:    v_mul_lo_u32 v5, v5, v4
181; CGP-NEXT:    v_mul_lo_u32 v7, v7, v6
182; CGP-NEXT:    v_mul_lo_u32 v8, 0, v5
183; CGP-NEXT:    v_mul_hi_u32 v5, v4, v5
184; CGP-NEXT:    v_mul_lo_u32 v9, 0, v7
185; CGP-NEXT:    v_mul_hi_u32 v7, v6, v7
186; CGP-NEXT:    v_add_i32_e32 v5, vcc, v8, v5
187; CGP-NEXT:    v_add_i32_e32 v7, vcc, v9, v7
188; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
189; CGP-NEXT:    v_add_i32_e32 v5, vcc, v6, v7
190; CGP-NEXT:    v_mul_lo_u32 v6, 0, v4
191; CGP-NEXT:    v_mul_hi_u32 v4, v0, v4
192; CGP-NEXT:    v_mul_lo_u32 v7, 0, v5
193; CGP-NEXT:    v_mul_hi_u32 v5, v1, v5
194; CGP-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
195; CGP-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
196; CGP-NEXT:    v_mul_lo_u32 v6, v4, v2
197; CGP-NEXT:    v_add_i32_e32 v7, vcc, 1, v4
198; CGP-NEXT:    v_mul_lo_u32 v8, v5, v3
199; CGP-NEXT:    v_add_i32_e32 v9, vcc, 1, v5
200; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v6
201; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v8
202; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
203; CGP-NEXT:    v_cndmask_b32_e32 v4, v4, v7, vcc
204; CGP-NEXT:    v_sub_i32_e64 v6, s[4:5], v0, v2
205; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v3
206; CGP-NEXT:    v_cndmask_b32_e64 v5, v5, v9, s[4:5]
207; CGP-NEXT:    v_sub_i32_e64 v7, s[6:7], v1, v3
208; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
209; CGP-NEXT:    v_add_i32_e32 v6, vcc, 1, v4
210; CGP-NEXT:    v_cndmask_b32_e64 v1, v1, v7, s[4:5]
211; CGP-NEXT:    v_add_i32_e32 v7, vcc, 1, v5
212; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
213; CGP-NEXT:    v_cndmask_b32_e32 v0, v4, v6, vcc
214; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
215; CGP-NEXT:    v_cndmask_b32_e32 v1, v5, v7, vcc
216; CGP-NEXT:    s_setpc_b64 s[30:31]
217  %result = udiv <2 x i32> %num, %den
218  ret <2 x i32> %result
219}
220
221define i32 @v_udiv_i32_pow2k_denom(i32 %num) {
222; CHECK-LABEL: v_udiv_i32_pow2k_denom:
223; CHECK:       ; %bb.0:
224; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
225; CHECK-NEXT:    s_movk_i32 s6, 0x1000
226; CHECK-NEXT:    v_mov_b32_e32 v1, 0xfffff000
227; CHECK-NEXT:    v_cvt_f32_u32_e32 v2, s6
228; CHECK-NEXT:    v_rcp_iflag_f32_e32 v2, v2
229; CHECK-NEXT:    v_mul_f32_e32 v2, 0x4f7ffffe, v2
230; CHECK-NEXT:    v_cvt_u32_f32_e32 v2, v2
231; CHECK-NEXT:    v_mul_lo_u32 v1, v1, v2
232; CHECK-NEXT:    v_mul_hi_u32 v1, v2, v1
233; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v2, v1
234; CHECK-NEXT:    v_mul_hi_u32 v1, v0, v1
235; CHECK-NEXT:    v_lshlrev_b32_e32 v2, 12, v1
236; CHECK-NEXT:    v_add_i32_e32 v3, vcc, 1, v1
237; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
238; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s6, v0
239; CHECK-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
240; CHECK-NEXT:    v_subrev_i32_e64 v2, s[4:5], s6, v0
241; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
242; CHECK-NEXT:    v_add_i32_e32 v2, vcc, 1, v1
243; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s6, v0
244; CHECK-NEXT:    v_cndmask_b32_e32 v0, v1, v2, vcc
245; CHECK-NEXT:    s_setpc_b64 s[30:31]
246  %result = udiv i32 %num, 4096
247  ret i32 %result
248}
249
250define <2 x i32> @v_udiv_v2i32_pow2k_denom(<2 x i32> %num) {
251; GISEL-LABEL: v_udiv_v2i32_pow2k_denom:
252; GISEL:       ; %bb.0:
253; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
254; GISEL-NEXT:    s_movk_i32 s8, 0x1000
255; GISEL-NEXT:    v_cvt_f32_u32_e32 v2, s8
256; GISEL-NEXT:    s_sub_i32 s4, 0, s8
257; GISEL-NEXT:    v_rcp_iflag_f32_e32 v2, v2
258; GISEL-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v2
259; GISEL-NEXT:    v_mul_f32_e32 v2, 0x4f7ffffe, v2
260; GISEL-NEXT:    v_cvt_u32_f32_e32 v3, v3
261; GISEL-NEXT:    v_cvt_u32_f32_e32 v2, v2
262; GISEL-NEXT:    v_mul_lo_u32 v4, s4, v3
263; GISEL-NEXT:    v_mul_lo_u32 v5, s4, v2
264; GISEL-NEXT:    v_mul_hi_u32 v4, v3, v4
265; GISEL-NEXT:    v_mul_hi_u32 v5, v2, v5
266; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
267; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v5
268; GISEL-NEXT:    v_mul_hi_u32 v3, v0, v3
269; GISEL-NEXT:    v_mul_hi_u32 v2, v1, v2
270; GISEL-NEXT:    v_lshlrev_b32_e32 v4, 12, v3
271; GISEL-NEXT:    v_add_i32_e32 v5, vcc, 1, v3
272; GISEL-NEXT:    v_lshlrev_b32_e32 v6, 12, v2
273; GISEL-NEXT:    v_add_i32_e32 v7, vcc, 1, v2
274; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
275; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v6
276; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s8, v0
277; GISEL-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
278; GISEL-NEXT:    v_subrev_i32_e64 v4, s[4:5], s8, v0
279; GISEL-NEXT:    v_cmp_le_u32_e64 s[4:5], s8, v1
280; GISEL-NEXT:    v_cndmask_b32_e64 v2, v2, v7, s[4:5]
281; GISEL-NEXT:    v_subrev_i32_e64 v5, s[6:7], s8, v1
282; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
283; GISEL-NEXT:    v_add_i32_e32 v4, vcc, 1, v3
284; GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, v5, s[4:5]
285; GISEL-NEXT:    v_add_i32_e32 v5, vcc, 1, v2
286; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s8, v0
287; GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v4, vcc
288; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s8, v1
289; GISEL-NEXT:    v_cndmask_b32_e32 v1, v2, v5, vcc
290; GISEL-NEXT:    s_setpc_b64 s[30:31]
291;
292; CGP-LABEL: v_udiv_v2i32_pow2k_denom:
293; CGP:       ; %bb.0:
294; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
295; CGP-NEXT:    s_movk_i32 s4, 0x1000
296; CGP-NEXT:    v_mov_b32_e32 v2, 0x1000
297; CGP-NEXT:    s_movk_i32 s5, 0xf000
298; CGP-NEXT:    v_mov_b32_e32 v3, 0xfffff000
299; CGP-NEXT:    v_cvt_f32_u32_e32 v4, s4
300; CGP-NEXT:    v_cvt_f32_u32_e32 v5, v2
301; CGP-NEXT:    v_rcp_iflag_f32_e32 v4, v4
302; CGP-NEXT:    v_rcp_iflag_f32_e32 v5, v5
303; CGP-NEXT:    v_mul_f32_e32 v4, 0x4f7ffffe, v4
304; CGP-NEXT:    v_mul_f32_e32 v5, 0x4f7ffffe, v5
305; CGP-NEXT:    v_cvt_u32_f32_e32 v4, v4
306; CGP-NEXT:    v_cvt_u32_f32_e32 v5, v5
307; CGP-NEXT:    v_mul_lo_u32 v6, s5, v4
308; CGP-NEXT:    v_mul_lo_u32 v3, v3, v5
309; CGP-NEXT:    v_mul_hi_u32 v6, v4, v6
310; CGP-NEXT:    v_mul_hi_u32 v3, v5, v3
311; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
312; CGP-NEXT:    v_add_i32_e32 v3, vcc, v5, v3
313; CGP-NEXT:    v_mul_hi_u32 v4, v0, v4
314; CGP-NEXT:    v_mul_hi_u32 v3, v1, v3
315; CGP-NEXT:    v_lshlrev_b32_e32 v5, 12, v4
316; CGP-NEXT:    v_add_i32_e32 v6, vcc, 1, v4
317; CGP-NEXT:    v_lshlrev_b32_e32 v7, 12, v3
318; CGP-NEXT:    v_add_i32_e32 v8, vcc, 1, v3
319; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v5
320; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v7
321; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s4, v0
322; CGP-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
323; CGP-NEXT:    v_subrev_i32_e64 v5, s[4:5], s4, v0
324; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v2
325; CGP-NEXT:    v_cndmask_b32_e64 v3, v3, v8, s[4:5]
326; CGP-NEXT:    v_sub_i32_e64 v6, s[6:7], v1, v2
327; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
328; CGP-NEXT:    v_add_i32_e32 v5, vcc, 1, v4
329; CGP-NEXT:    v_cndmask_b32_e64 v1, v1, v6, s[4:5]
330; CGP-NEXT:    v_add_i32_e32 v6, vcc, 1, v3
331; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
332; CGP-NEXT:    v_cndmask_b32_e32 v0, v4, v5, vcc
333; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v2
334; CGP-NEXT:    v_cndmask_b32_e32 v1, v3, v6, vcc
335; CGP-NEXT:    s_setpc_b64 s[30:31]
336  %result = udiv <2 x i32> %num, <i32 4096, i32 4096>
337  ret <2 x i32> %result
338}
339
340define i32 @v_udiv_i32_oddk_denom(i32 %num) {
341; CHECK-LABEL: v_udiv_i32_oddk_denom:
342; CHECK:       ; %bb.0:
343; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
344; CHECK-NEXT:    s_mov_b32 s6, 0x12d8fb
345; CHECK-NEXT:    v_mov_b32_e32 v1, 0xffed2705
346; CHECK-NEXT:    v_cvt_f32_u32_e32 v2, s6
347; CHECK-NEXT:    v_rcp_iflag_f32_e32 v2, v2
348; CHECK-NEXT:    v_mul_f32_e32 v2, 0x4f7ffffe, v2
349; CHECK-NEXT:    v_cvt_u32_f32_e32 v2, v2
350; CHECK-NEXT:    v_mul_lo_u32 v1, v1, v2
351; CHECK-NEXT:    v_mul_hi_u32 v1, v2, v1
352; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v2, v1
353; CHECK-NEXT:    v_mul_hi_u32 v1, v0, v1
354; CHECK-NEXT:    v_mul_lo_u32 v2, v1, s6
355; CHECK-NEXT:    v_add_i32_e32 v3, vcc, 1, v1
356; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
357; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s6, v0
358; CHECK-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
359; CHECK-NEXT:    v_subrev_i32_e64 v2, s[4:5], s6, v0
360; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
361; CHECK-NEXT:    v_add_i32_e32 v2, vcc, 1, v1
362; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s6, v0
363; CHECK-NEXT:    v_cndmask_b32_e32 v0, v1, v2, vcc
364; CHECK-NEXT:    s_setpc_b64 s[30:31]
365  %result = udiv i32 %num, 1235195
366  ret i32 %result
367}
368
369define <2 x i32> @v_udiv_v2i32_oddk_denom(<2 x i32> %num) {
370; GISEL-LABEL: v_udiv_v2i32_oddk_denom:
371; GISEL:       ; %bb.0:
372; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
373; GISEL-NEXT:    s_mov_b32 s8, 0x12d8fb
374; GISEL-NEXT:    v_cvt_f32_u32_e32 v2, s8
375; GISEL-NEXT:    s_sub_i32 s4, 0, s8
376; GISEL-NEXT:    v_rcp_iflag_f32_e32 v2, v2
377; GISEL-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v2
378; GISEL-NEXT:    v_mul_f32_e32 v2, 0x4f7ffffe, v2
379; GISEL-NEXT:    v_cvt_u32_f32_e32 v3, v3
380; GISEL-NEXT:    v_cvt_u32_f32_e32 v2, v2
381; GISEL-NEXT:    v_mul_lo_u32 v4, s4, v3
382; GISEL-NEXT:    v_mul_lo_u32 v5, s4, v2
383; GISEL-NEXT:    v_mul_hi_u32 v4, v3, v4
384; GISEL-NEXT:    v_mul_hi_u32 v5, v2, v5
385; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
386; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v5
387; GISEL-NEXT:    v_mul_hi_u32 v3, v0, v3
388; GISEL-NEXT:    v_mul_hi_u32 v2, v1, v2
389; GISEL-NEXT:    v_mul_lo_u32 v4, v3, s8
390; GISEL-NEXT:    v_add_i32_e32 v5, vcc, 1, v3
391; GISEL-NEXT:    v_mul_lo_u32 v6, v2, s8
392; GISEL-NEXT:    v_add_i32_e32 v7, vcc, 1, v2
393; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
394; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v6
395; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s8, v0
396; GISEL-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
397; GISEL-NEXT:    v_subrev_i32_e64 v4, s[4:5], s8, v0
398; GISEL-NEXT:    v_cmp_le_u32_e64 s[4:5], s8, v1
399; GISEL-NEXT:    v_cndmask_b32_e64 v2, v2, v7, s[4:5]
400; GISEL-NEXT:    v_subrev_i32_e64 v5, s[6:7], s8, v1
401; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
402; GISEL-NEXT:    v_add_i32_e32 v4, vcc, 1, v3
403; GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, v5, s[4:5]
404; GISEL-NEXT:    v_add_i32_e32 v5, vcc, 1, v2
405; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s8, v0
406; GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v4, vcc
407; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s8, v1
408; GISEL-NEXT:    v_cndmask_b32_e32 v1, v2, v5, vcc
409; GISEL-NEXT:    s_setpc_b64 s[30:31]
410;
411; CGP-LABEL: v_udiv_v2i32_oddk_denom:
412; CGP:       ; %bb.0:
413; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
414; CGP-NEXT:    s_mov_b32 s4, 0x12d8fb
415; CGP-NEXT:    v_mov_b32_e32 v2, 0x12d8fb
416; CGP-NEXT:    s_mov_b32 s5, 0xffed2705
417; CGP-NEXT:    v_cvt_f32_u32_e32 v3, s4
418; CGP-NEXT:    v_cvt_f32_u32_e32 v4, v2
419; CGP-NEXT:    v_rcp_iflag_f32_e32 v3, v3
420; CGP-NEXT:    v_rcp_iflag_f32_e32 v4, v4
421; CGP-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
422; CGP-NEXT:    v_mul_f32_e32 v4, 0x4f7ffffe, v4
423; CGP-NEXT:    v_cvt_u32_f32_e32 v3, v3
424; CGP-NEXT:    v_cvt_u32_f32_e32 v4, v4
425; CGP-NEXT:    v_mul_lo_u32 v5, s5, v3
426; CGP-NEXT:    v_mul_lo_u32 v6, s5, v4
427; CGP-NEXT:    v_mul_hi_u32 v5, v3, v5
428; CGP-NEXT:    v_mul_hi_u32 v6, v4, v6
429; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
430; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
431; CGP-NEXT:    v_mul_hi_u32 v3, v0, v3
432; CGP-NEXT:    v_mul_hi_u32 v4, v1, v4
433; CGP-NEXT:    v_mul_lo_u32 v5, v3, s4
434; CGP-NEXT:    v_add_i32_e32 v6, vcc, 1, v3
435; CGP-NEXT:    v_mul_lo_u32 v7, v4, v2
436; CGP-NEXT:    v_add_i32_e32 v8, vcc, 1, v4
437; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v5
438; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v7
439; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s4, v0
440; CGP-NEXT:    v_cndmask_b32_e32 v3, v3, v6, vcc
441; CGP-NEXT:    v_subrev_i32_e64 v5, s[4:5], s4, v0
442; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v2
443; CGP-NEXT:    v_cndmask_b32_e64 v4, v4, v8, s[4:5]
444; CGP-NEXT:    v_sub_i32_e64 v6, s[6:7], v1, v2
445; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
446; CGP-NEXT:    v_add_i32_e32 v5, vcc, 1, v3
447; CGP-NEXT:    v_cndmask_b32_e64 v1, v1, v6, s[4:5]
448; CGP-NEXT:    v_add_i32_e32 v6, vcc, 1, v4
449; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
450; CGP-NEXT:    v_cndmask_b32_e32 v0, v3, v5, vcc
451; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v2
452; CGP-NEXT:    v_cndmask_b32_e32 v1, v4, v6, vcc
453; CGP-NEXT:    s_setpc_b64 s[30:31]
454  %result = udiv <2 x i32> %num, <i32 1235195, i32 1235195>
455  ret <2 x i32> %result
456}
457
458define i32 @v_udiv_i32_pow2_shl_denom(i32 %x, i32 %y) {
459; CHECK-LABEL: v_udiv_i32_pow2_shl_denom:
460; CHECK:       ; %bb.0:
461; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
462; CHECK-NEXT:    v_lshl_b32_e32 v1, 0x1000, v1
463; CHECK-NEXT:    v_cvt_f32_u32_e32 v2, v1
464; CHECK-NEXT:    v_sub_i32_e32 v3, vcc, 0, v1
465; CHECK-NEXT:    v_rcp_iflag_f32_e32 v2, v2
466; CHECK-NEXT:    v_mul_f32_e32 v2, 0x4f7ffffe, v2
467; CHECK-NEXT:    v_cvt_u32_f32_e32 v2, v2
468; CHECK-NEXT:    v_mul_lo_u32 v3, v3, v2
469; CHECK-NEXT:    v_mul_hi_u32 v3, v2, v3
470; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
471; CHECK-NEXT:    v_mul_hi_u32 v2, v0, v2
472; CHECK-NEXT:    v_mul_lo_u32 v3, v2, v1
473; CHECK-NEXT:    v_add_i32_e32 v4, vcc, 1, v2
474; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v3
475; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
476; CHECK-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
477; CHECK-NEXT:    v_sub_i32_e64 v3, s[4:5], v0, v1
478; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
479; CHECK-NEXT:    v_add_i32_e32 v3, vcc, 1, v2
480; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
481; CHECK-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc
482; CHECK-NEXT:    s_setpc_b64 s[30:31]
483  %shl.y = shl i32 4096, %y
484  %r = udiv i32 %x, %shl.y
485  ret i32 %r
486}
487
488define <2 x i32> @v_udiv_v2i32_pow2_shl_denom(<2 x i32> %x, <2 x i32> %y) {
489; GISEL-LABEL: v_udiv_v2i32_pow2_shl_denom:
490; GISEL:       ; %bb.0:
491; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
492; GISEL-NEXT:    s_movk_i32 s4, 0x1000
493; GISEL-NEXT:    v_lshl_b32_e32 v2, s4, v2
494; GISEL-NEXT:    v_lshl_b32_e32 v3, s4, v3
495; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, v2
496; GISEL-NEXT:    v_sub_i32_e32 v5, vcc, 0, v2
497; GISEL-NEXT:    v_cvt_f32_u32_e32 v6, v3
498; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, 0, v3
499; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
500; GISEL-NEXT:    v_rcp_iflag_f32_e32 v6, v6
501; GISEL-NEXT:    v_mul_f32_e32 v4, 0x4f7ffffe, v4
502; GISEL-NEXT:    v_mul_f32_e32 v6, 0x4f7ffffe, v6
503; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
504; GISEL-NEXT:    v_cvt_u32_f32_e32 v6, v6
505; GISEL-NEXT:    v_mul_lo_u32 v5, v5, v4
506; GISEL-NEXT:    v_mul_lo_u32 v7, v7, v6
507; GISEL-NEXT:    v_mul_hi_u32 v5, v4, v5
508; GISEL-NEXT:    v_mul_hi_u32 v7, v6, v7
509; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
510; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v6, v7
511; GISEL-NEXT:    v_mul_hi_u32 v4, v0, v4
512; GISEL-NEXT:    v_mul_hi_u32 v5, v1, v5
513; GISEL-NEXT:    v_mul_lo_u32 v6, v4, v2
514; GISEL-NEXT:    v_add_i32_e32 v7, vcc, 1, v4
515; GISEL-NEXT:    v_mul_lo_u32 v8, v5, v3
516; GISEL-NEXT:    v_add_i32_e32 v9, vcc, 1, v5
517; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v6
518; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v8
519; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
520; GISEL-NEXT:    v_cndmask_b32_e32 v4, v4, v7, vcc
521; GISEL-NEXT:    v_sub_i32_e64 v6, s[4:5], v0, v2
522; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v3
523; GISEL-NEXT:    v_cndmask_b32_e64 v5, v5, v9, s[4:5]
524; GISEL-NEXT:    v_sub_i32_e64 v7, s[6:7], v1, v3
525; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
526; GISEL-NEXT:    v_add_i32_e32 v6, vcc, 1, v4
527; GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, v7, s[4:5]
528; GISEL-NEXT:    v_add_i32_e32 v7, vcc, 1, v5
529; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
530; GISEL-NEXT:    v_cndmask_b32_e32 v0, v4, v6, vcc
531; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
532; GISEL-NEXT:    v_cndmask_b32_e32 v1, v5, v7, vcc
533; GISEL-NEXT:    s_setpc_b64 s[30:31]
534;
535; CGP-LABEL: v_udiv_v2i32_pow2_shl_denom:
536; CGP:       ; %bb.0:
537; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
538; CGP-NEXT:    s_movk_i32 s4, 0x1000
539; CGP-NEXT:    v_lshl_b32_e32 v2, s4, v2
540; CGP-NEXT:    v_lshl_b32_e32 v3, s4, v3
541; CGP-NEXT:    v_cvt_f32_u32_e32 v4, v2
542; CGP-NEXT:    v_sub_i32_e32 v5, vcc, 0, v2
543; CGP-NEXT:    v_cvt_f32_u32_e32 v6, v3
544; CGP-NEXT:    v_sub_i32_e32 v7, vcc, 0, v3
545; CGP-NEXT:    v_rcp_f32_e32 v4, v4
546; CGP-NEXT:    v_rcp_f32_e32 v6, v6
547; CGP-NEXT:    v_mul_f32_e32 v4, 0x4f7ffffe, v4
548; CGP-NEXT:    v_mul_f32_e32 v6, 0x4f7ffffe, v6
549; CGP-NEXT:    v_cvt_u32_f32_e32 v4, v4
550; CGP-NEXT:    v_cvt_u32_f32_e32 v6, v6
551; CGP-NEXT:    v_mul_lo_u32 v5, v5, v4
552; CGP-NEXT:    v_mul_lo_u32 v7, v7, v6
553; CGP-NEXT:    v_mul_lo_u32 v8, 0, v5
554; CGP-NEXT:    v_mul_hi_u32 v5, v4, v5
555; CGP-NEXT:    v_mul_lo_u32 v9, 0, v7
556; CGP-NEXT:    v_mul_hi_u32 v7, v6, v7
557; CGP-NEXT:    v_add_i32_e32 v5, vcc, v8, v5
558; CGP-NEXT:    v_add_i32_e32 v7, vcc, v9, v7
559; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
560; CGP-NEXT:    v_add_i32_e32 v5, vcc, v6, v7
561; CGP-NEXT:    v_mul_lo_u32 v6, 0, v4
562; CGP-NEXT:    v_mul_hi_u32 v4, v0, v4
563; CGP-NEXT:    v_mul_lo_u32 v7, 0, v5
564; CGP-NEXT:    v_mul_hi_u32 v5, v1, v5
565; CGP-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
566; CGP-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
567; CGP-NEXT:    v_mul_lo_u32 v6, v4, v2
568; CGP-NEXT:    v_add_i32_e32 v7, vcc, 1, v4
569; CGP-NEXT:    v_mul_lo_u32 v8, v5, v3
570; CGP-NEXT:    v_add_i32_e32 v9, vcc, 1, v5
571; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v6
572; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v8
573; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
574; CGP-NEXT:    v_cndmask_b32_e32 v4, v4, v7, vcc
575; CGP-NEXT:    v_sub_i32_e64 v6, s[4:5], v0, v2
576; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v3
577; CGP-NEXT:    v_cndmask_b32_e64 v5, v5, v9, s[4:5]
578; CGP-NEXT:    v_sub_i32_e64 v7, s[6:7], v1, v3
579; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
580; CGP-NEXT:    v_add_i32_e32 v6, vcc, 1, v4
581; CGP-NEXT:    v_cndmask_b32_e64 v1, v1, v7, s[4:5]
582; CGP-NEXT:    v_add_i32_e32 v7, vcc, 1, v5
583; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
584; CGP-NEXT:    v_cndmask_b32_e32 v0, v4, v6, vcc
585; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
586; CGP-NEXT:    v_cndmask_b32_e32 v1, v5, v7, vcc
587; CGP-NEXT:    s_setpc_b64 s[30:31]
588  %shl.y = shl <2 x i32> <i32 4096, i32 4096>, %y
589  %r = udiv <2 x i32> %x, %shl.y
590  ret <2 x i32> %r
591}
592
593define i32 @v_udiv_i32_24bit(i32 %num, i32 %den) {
594; GISEL-LABEL: v_udiv_i32_24bit:
595; GISEL:       ; %bb.0:
596; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
597; GISEL-NEXT:    s_mov_b32 s4, 0xffffff
598; GISEL-NEXT:    v_and_b32_e32 v0, s4, v0
599; GISEL-NEXT:    v_and_b32_e32 v1, s4, v1
600; GISEL-NEXT:    v_cvt_f32_u32_e32 v2, v1
601; GISEL-NEXT:    v_sub_i32_e32 v3, vcc, 0, v1
602; GISEL-NEXT:    v_rcp_iflag_f32_e32 v2, v2
603; GISEL-NEXT:    v_mul_f32_e32 v2, 0x4f7ffffe, v2
604; GISEL-NEXT:    v_cvt_u32_f32_e32 v2, v2
605; GISEL-NEXT:    v_mul_lo_u32 v3, v3, v2
606; GISEL-NEXT:    v_mul_hi_u32 v3, v2, v3
607; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
608; GISEL-NEXT:    v_mul_hi_u32 v2, v0, v2
609; GISEL-NEXT:    v_mul_lo_u32 v3, v2, v1
610; GISEL-NEXT:    v_add_i32_e32 v4, vcc, 1, v2
611; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v3
612; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
613; GISEL-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
614; GISEL-NEXT:    v_sub_i32_e64 v3, s[4:5], v0, v1
615; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
616; GISEL-NEXT:    v_add_i32_e32 v3, vcc, 1, v2
617; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
618; GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc
619; GISEL-NEXT:    s_setpc_b64 s[30:31]
620;
621; CGP-LABEL: v_udiv_i32_24bit:
622; CGP:       ; %bb.0:
623; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
624; CGP-NEXT:    s_mov_b32 s4, 0xffffff
625; CGP-NEXT:    v_and_b32_e32 v0, s4, v0
626; CGP-NEXT:    v_and_b32_e32 v1, s4, v1
627; CGP-NEXT:    v_cvt_f32_u32_e32 v2, v1
628; CGP-NEXT:    v_sub_i32_e32 v3, vcc, 0, v1
629; CGP-NEXT:    v_rcp_f32_e32 v2, v2
630; CGP-NEXT:    v_mul_f32_e32 v2, 0x4f7ffffe, v2
631; CGP-NEXT:    v_cvt_u32_f32_e32 v2, v2
632; CGP-NEXT:    v_mul_lo_u32 v3, v3, v2
633; CGP-NEXT:    v_mul_lo_u32 v4, 0, v3
634; CGP-NEXT:    v_mul_hi_u32 v3, v2, v3
635; CGP-NEXT:    v_add_i32_e32 v3, vcc, v4, v3
636; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
637; CGP-NEXT:    v_mul_lo_u32 v3, 0, v2
638; CGP-NEXT:    v_mul_hi_u32 v2, v0, v2
639; CGP-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
640; CGP-NEXT:    v_mul_lo_u32 v3, v2, v1
641; CGP-NEXT:    v_add_i32_e32 v4, vcc, 1, v2
642; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v3
643; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
644; CGP-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
645; CGP-NEXT:    v_sub_i32_e64 v3, s[4:5], v0, v1
646; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
647; CGP-NEXT:    v_add_i32_e32 v3, vcc, 1, v2
648; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
649; CGP-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc
650; CGP-NEXT:    s_setpc_b64 s[30:31]
651  %num.mask = and i32 %num, 16777215
652  %den.mask = and i32 %den, 16777215
653  %result = udiv i32 %num.mask, %den.mask
654  ret i32 %result
655}
656
657define <2 x i32> @v_udiv_v2i32_24bit(<2 x i32> %num, <2 x i32> %den) {
658; GISEL-LABEL: v_udiv_v2i32_24bit:
659; GISEL:       ; %bb.0:
660; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
661; GISEL-NEXT:    s_mov_b32 s4, 0xffffff
662; GISEL-NEXT:    v_and_b32_e32 v0, s4, v0
663; GISEL-NEXT:    v_and_b32_e32 v1, s4, v1
664; GISEL-NEXT:    v_and_b32_e32 v2, s4, v2
665; GISEL-NEXT:    v_and_b32_e32 v3, s4, v3
666; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, v2
667; GISEL-NEXT:    v_sub_i32_e32 v5, vcc, 0, v2
668; GISEL-NEXT:    v_cvt_f32_u32_e32 v6, v3
669; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, 0, v3
670; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
671; GISEL-NEXT:    v_rcp_iflag_f32_e32 v6, v6
672; GISEL-NEXT:    v_mul_f32_e32 v4, 0x4f7ffffe, v4
673; GISEL-NEXT:    v_mul_f32_e32 v6, 0x4f7ffffe, v6
674; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
675; GISEL-NEXT:    v_cvt_u32_f32_e32 v6, v6
676; GISEL-NEXT:    v_mul_lo_u32 v5, v5, v4
677; GISEL-NEXT:    v_mul_lo_u32 v7, v7, v6
678; GISEL-NEXT:    v_mul_hi_u32 v5, v4, v5
679; GISEL-NEXT:    v_mul_hi_u32 v7, v6, v7
680; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
681; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v6, v7
682; GISEL-NEXT:    v_mul_hi_u32 v4, v0, v4
683; GISEL-NEXT:    v_mul_hi_u32 v5, v1, v5
684; GISEL-NEXT:    v_mul_lo_u32 v6, v4, v2
685; GISEL-NEXT:    v_add_i32_e32 v7, vcc, 1, v4
686; GISEL-NEXT:    v_mul_lo_u32 v8, v5, v3
687; GISEL-NEXT:    v_add_i32_e32 v9, vcc, 1, v5
688; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v6
689; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v8
690; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
691; GISEL-NEXT:    v_cndmask_b32_e32 v4, v4, v7, vcc
692; GISEL-NEXT:    v_sub_i32_e64 v6, s[4:5], v0, v2
693; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v3
694; GISEL-NEXT:    v_cndmask_b32_e64 v5, v5, v9, s[4:5]
695; GISEL-NEXT:    v_sub_i32_e64 v7, s[6:7], v1, v3
696; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
697; GISEL-NEXT:    v_add_i32_e32 v6, vcc, 1, v4
698; GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, v7, s[4:5]
699; GISEL-NEXT:    v_add_i32_e32 v7, vcc, 1, v5
700; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
701; GISEL-NEXT:    v_cndmask_b32_e32 v0, v4, v6, vcc
702; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
703; GISEL-NEXT:    v_cndmask_b32_e32 v1, v5, v7, vcc
704; GISEL-NEXT:    s_setpc_b64 s[30:31]
705;
706; CGP-LABEL: v_udiv_v2i32_24bit:
707; CGP:       ; %bb.0:
708; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
709; CGP-NEXT:    s_mov_b32 s4, 0xffffff
710; CGP-NEXT:    v_and_b32_e32 v0, s4, v0
711; CGP-NEXT:    v_and_b32_e32 v1, s4, v1
712; CGP-NEXT:    v_and_b32_e32 v2, s4, v2
713; CGP-NEXT:    v_and_b32_e32 v3, s4, v3
714; CGP-NEXT:    v_cvt_f32_u32_e32 v4, v2
715; CGP-NEXT:    v_sub_i32_e32 v5, vcc, 0, v2
716; CGP-NEXT:    v_cvt_f32_u32_e32 v6, v3
717; CGP-NEXT:    v_sub_i32_e32 v7, vcc, 0, v3
718; CGP-NEXT:    v_rcp_f32_e32 v4, v4
719; CGP-NEXT:    v_rcp_f32_e32 v6, v6
720; CGP-NEXT:    v_mul_f32_e32 v4, 0x4f7ffffe, v4
721; CGP-NEXT:    v_mul_f32_e32 v6, 0x4f7ffffe, v6
722; CGP-NEXT:    v_cvt_u32_f32_e32 v4, v4
723; CGP-NEXT:    v_cvt_u32_f32_e32 v6, v6
724; CGP-NEXT:    v_mul_lo_u32 v5, v5, v4
725; CGP-NEXT:    v_mul_lo_u32 v7, v7, v6
726; CGP-NEXT:    v_mul_lo_u32 v8, 0, v5
727; CGP-NEXT:    v_mul_hi_u32 v5, v4, v5
728; CGP-NEXT:    v_mul_lo_u32 v9, 0, v7
729; CGP-NEXT:    v_mul_hi_u32 v7, v6, v7
730; CGP-NEXT:    v_add_i32_e32 v5, vcc, v8, v5
731; CGP-NEXT:    v_add_i32_e32 v7, vcc, v9, v7
732; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
733; CGP-NEXT:    v_add_i32_e32 v5, vcc, v6, v7
734; CGP-NEXT:    v_mul_lo_u32 v6, 0, v4
735; CGP-NEXT:    v_mul_hi_u32 v4, v0, v4
736; CGP-NEXT:    v_mul_lo_u32 v7, 0, v5
737; CGP-NEXT:    v_mul_hi_u32 v5, v1, v5
738; CGP-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
739; CGP-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
740; CGP-NEXT:    v_mul_lo_u32 v6, v4, v2
741; CGP-NEXT:    v_add_i32_e32 v7, vcc, 1, v4
742; CGP-NEXT:    v_mul_lo_u32 v8, v5, v3
743; CGP-NEXT:    v_add_i32_e32 v9, vcc, 1, v5
744; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v6
745; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v8
746; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
747; CGP-NEXT:    v_cndmask_b32_e32 v4, v4, v7, vcc
748; CGP-NEXT:    v_sub_i32_e64 v6, s[4:5], v0, v2
749; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v3
750; CGP-NEXT:    v_cndmask_b32_e64 v5, v5, v9, s[4:5]
751; CGP-NEXT:    v_sub_i32_e64 v7, s[6:7], v1, v3
752; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
753; CGP-NEXT:    v_add_i32_e32 v6, vcc, 1, v4
754; CGP-NEXT:    v_cndmask_b32_e64 v1, v1, v7, s[4:5]
755; CGP-NEXT:    v_add_i32_e32 v7, vcc, 1, v5
756; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
757; CGP-NEXT:    v_cndmask_b32_e32 v0, v4, v6, vcc
758; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
759; CGP-NEXT:    v_cndmask_b32_e32 v1, v5, v7, vcc
760; CGP-NEXT:    s_setpc_b64 s[30:31]
761  %num.mask = and <2 x i32> %num, <i32 16777215, i32 16777215>
762  %den.mask = and <2 x i32> %den, <i32 16777215, i32 16777215>
763  %result = udiv <2 x i32> %num.mask, %den.mask
764  ret <2 x i32> %result
765}
766