1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-amd-amdhsa < %s | FileCheck -check-prefixes=CHECK,GISEL %s
3; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-amd-amdhsa < %s | FileCheck -check-prefixes=CHECK,CGP %s
4
5; The same 32-bit expansion is implemented in the legalizer and in AMDGPUCodeGenPrepare.
6
7define i32 @v_sdiv_i32(i32 %num, i32 %den) {
8; GISEL-LABEL: v_sdiv_i32:
9; GISEL:       ; %bb.0:
10; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11; GISEL-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
12; GISEL-NEXT:    v_ashrrev_i32_e32 v3, 31, v1
13; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
14; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v3
15; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v2
16; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v3
17; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, v1
18; GISEL-NEXT:    v_sub_i32_e32 v5, vcc, 0, v1
19; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
20; GISEL-NEXT:    v_mul_f32_e32 v4, 0x4f7ffffe, v4
21; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
22; GISEL-NEXT:    v_mul_lo_u32 v5, v5, v4
23; GISEL-NEXT:    v_mul_hi_u32 v5, v4, v5
24; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
25; GISEL-NEXT:    v_mul_hi_u32 v4, v0, v4
26; GISEL-NEXT:    v_mul_lo_u32 v5, v4, v1
27; GISEL-NEXT:    v_add_i32_e32 v6, vcc, 1, v4
28; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v5
29; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
30; GISEL-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
31; GISEL-NEXT:    v_sub_i32_e64 v5, s[4:5], v0, v1
32; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
33; GISEL-NEXT:    v_add_i32_e32 v5, vcc, 1, v4
34; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
35; GISEL-NEXT:    v_cndmask_b32_e32 v0, v4, v5, vcc
36; GISEL-NEXT:    v_xor_b32_e32 v1, v2, v3
37; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v1
38; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
39; GISEL-NEXT:    s_setpc_b64 s[30:31]
40;
41; CGP-LABEL: v_sdiv_i32:
42; CGP:       ; %bb.0:
43; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
44; CGP-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
45; CGP-NEXT:    v_ashrrev_i32_e32 v3, 31, v1
46; CGP-NEXT:    v_xor_b32_e32 v4, v2, v3
47; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
48; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v3
49; CGP-NEXT:    v_xor_b32_e32 v0, v0, v2
50; CGP-NEXT:    v_xor_b32_e32 v1, v1, v3
51; CGP-NEXT:    v_cvt_f32_u32_e32 v2, v1
52; CGP-NEXT:    v_sub_i32_e32 v3, vcc, 0, v1
53; CGP-NEXT:    v_mul_lo_u32 v5, v0, 0
54; CGP-NEXT:    v_rcp_f32_e32 v2, v2
55; CGP-NEXT:    v_mul_f32_e32 v2, 0x4f7ffffe, v2
56; CGP-NEXT:    v_cvt_u32_f32_e32 v2, v2
57; CGP-NEXT:    v_mul_lo_u32 v3, v3, v2
58; CGP-NEXT:    v_mul_lo_u32 v6, v2, 0
59; CGP-NEXT:    v_mul_lo_u32 v7, 0, v3
60; CGP-NEXT:    v_mul_hi_u32 v3, v2, v3
61; CGP-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
62; CGP-NEXT:    v_add_i32_e32 v3, vcc, v6, v3
63; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
64; CGP-NEXT:    v_mul_lo_u32 v3, 0, v2
65; CGP-NEXT:    v_mul_hi_u32 v2, v0, v2
66; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
67; CGP-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
68; CGP-NEXT:    v_mul_lo_u32 v3, v2, v1
69; CGP-NEXT:    v_add_i32_e32 v5, vcc, 1, v2
70; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v3
71; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
72; CGP-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc
73; CGP-NEXT:    v_sub_i32_e64 v3, s[4:5], v0, v1
74; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
75; CGP-NEXT:    v_add_i32_e32 v3, vcc, 1, v2
76; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
77; CGP-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc
78; CGP-NEXT:    v_xor_b32_e32 v0, v0, v4
79; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
80; CGP-NEXT:    s_setpc_b64 s[30:31]
81  %result = sdiv i32 %num, %den
82  ret i32 %result
83}
84
85; FIXME: This is a workaround for not handling uniform VGPR case.
86declare i32 @llvm.amdgcn.readfirstlane(i32)
87
88define amdgpu_ps i32 @s_sdiv_i32(i32 inreg %num, i32 inreg %den) {
89; GISEL-LABEL: s_sdiv_i32:
90; GISEL:       ; %bb.0:
91; GISEL-NEXT:    s_ashr_i32 s2, s0, 31
92; GISEL-NEXT:    s_ashr_i32 s3, s1, 31
93; GISEL-NEXT:    s_add_i32 s0, s0, s2
94; GISEL-NEXT:    s_add_i32 s1, s1, s3
95; GISEL-NEXT:    s_xor_b32 s0, s0, s2
96; GISEL-NEXT:    s_xor_b32 s4, s1, s3
97; GISEL-NEXT:    v_cvt_f32_u32_e32 v0, s4
98; GISEL-NEXT:    s_sub_i32 s1, 0, s4
99; GISEL-NEXT:    v_rcp_iflag_f32_e32 v0, v0
100; GISEL-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
101; GISEL-NEXT:    v_cvt_u32_f32_e32 v0, v0
102; GISEL-NEXT:    v_mul_lo_u32 v1, s1, v0
103; GISEL-NEXT:    v_mul_hi_u32 v1, v0, v1
104; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
105; GISEL-NEXT:    v_mul_hi_u32 v0, s0, v0
106; GISEL-NEXT:    v_mul_lo_u32 v1, v0, s4
107; GISEL-NEXT:    v_add_i32_e32 v2, vcc, 1, v0
108; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, s0, v1
109; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s4, v1
110; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
111; GISEL-NEXT:    v_subrev_i32_e64 v2, s[0:1], s4, v1
112; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
113; GISEL-NEXT:    v_add_i32_e32 v2, vcc, 1, v0
114; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s4, v1
115; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
116; GISEL-NEXT:    s_xor_b32 s0, s2, s3
117; GISEL-NEXT:    v_xor_b32_e32 v0, s0, v0
118; GISEL-NEXT:    v_subrev_i32_e32 v0, vcc, s0, v0
119; GISEL-NEXT:    v_readfirstlane_b32 s0, v0
120; GISEL-NEXT:    ; return to shader part epilog
121;
122; CGP-LABEL: s_sdiv_i32:
123; CGP:       ; %bb.0:
124; CGP-NEXT:    s_ashr_i32 s2, s0, 31
125; CGP-NEXT:    s_ashr_i32 s3, s1, 31
126; CGP-NEXT:    s_xor_b32 s4, s2, s3
127; CGP-NEXT:    s_add_i32 s0, s0, s2
128; CGP-NEXT:    s_add_i32 s1, s1, s3
129; CGP-NEXT:    s_xor_b32 s0, s0, s2
130; CGP-NEXT:    s_xor_b32 s5, s1, s3
131; CGP-NEXT:    v_cvt_f32_u32_e32 v0, s5
132; CGP-NEXT:    s_sub_i32 s1, 0, s5
133; CGP-NEXT:    s_bfe_u64 s[2:3], s[0:1], 0x200000
134; CGP-NEXT:    v_rcp_f32_e32 v0, v0
135; CGP-NEXT:    v_mul_lo_u32 v1, s2, 0
136; CGP-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
137; CGP-NEXT:    v_cvt_u32_f32_e32 v0, v0
138; CGP-NEXT:    v_mul_lo_u32 v2, s1, v0
139; CGP-NEXT:    v_mul_lo_u32 v3, v0, 0
140; CGP-NEXT:    v_mul_lo_u32 v4, 0, v2
141; CGP-NEXT:    v_mul_hi_u32 v2, v0, v2
142; CGP-NEXT:    v_add_i32_e32 v3, vcc, v4, v3
143; CGP-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
144; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
145; CGP-NEXT:    v_mul_lo_u32 v2, s3, v0
146; CGP-NEXT:    v_mul_hi_u32 v0, s2, v0
147; CGP-NEXT:    v_add_i32_e32 v1, vcc, v2, v1
148; CGP-NEXT:    v_add_i32_e32 v0, vcc, v1, v0
149; CGP-NEXT:    v_mul_lo_u32 v1, v0, s5
150; CGP-NEXT:    v_add_i32_e32 v2, vcc, 1, v0
151; CGP-NEXT:    v_sub_i32_e32 v1, vcc, s0, v1
152; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s5, v1
153; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
154; CGP-NEXT:    v_subrev_i32_e64 v2, s[0:1], s5, v1
155; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
156; CGP-NEXT:    v_add_i32_e32 v2, vcc, 1, v0
157; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s5, v1
158; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
159; CGP-NEXT:    v_xor_b32_e32 v0, s4, v0
160; CGP-NEXT:    v_subrev_i32_e32 v0, vcc, s4, v0
161; CGP-NEXT:    v_readfirstlane_b32 s0, v0
162; CGP-NEXT:    ; return to shader part epilog
163  %result = sdiv i32 %num, %den
164  %readlane = call i32 @llvm.amdgcn.readfirstlane(i32 %result)
165  ret i32 %readlane
166}
167
168define <2 x i32> @v_sdiv_v2i32(<2 x i32> %num, <2 x i32> %den) {
169; GISEL-LABEL: v_sdiv_v2i32:
170; GISEL:       ; %bb.0:
171; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
172; GISEL-NEXT:    v_ashrrev_i32_e32 v4, 31, v0
173; GISEL-NEXT:    v_ashrrev_i32_e32 v5, 31, v2
174; GISEL-NEXT:    v_ashrrev_i32_e32 v6, 31, v1
175; GISEL-NEXT:    v_ashrrev_i32_e32 v7, 31, v3
176; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
177; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v5
178; GISEL-NEXT:    v_xor_b32_e32 v8, v4, v5
179; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v6
180; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
181; GISEL-NEXT:    v_xor_b32_e32 v9, v6, v7
182; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v4
183; GISEL-NEXT:    v_xor_b32_e32 v2, v2, v5
184; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v6
185; GISEL-NEXT:    v_xor_b32_e32 v3, v3, v7
186; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, v2
187; GISEL-NEXT:    v_sub_i32_e32 v5, vcc, 0, v2
188; GISEL-NEXT:    v_cvt_f32_u32_e32 v6, v3
189; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, 0, v3
190; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
191; GISEL-NEXT:    v_rcp_iflag_f32_e32 v6, v6
192; GISEL-NEXT:    v_mul_f32_e32 v4, 0x4f7ffffe, v4
193; GISEL-NEXT:    v_mul_f32_e32 v6, 0x4f7ffffe, v6
194; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
195; GISEL-NEXT:    v_cvt_u32_f32_e32 v6, v6
196; GISEL-NEXT:    v_mul_lo_u32 v5, v5, v4
197; GISEL-NEXT:    v_mul_lo_u32 v7, v7, v6
198; GISEL-NEXT:    v_mul_hi_u32 v5, v4, v5
199; GISEL-NEXT:    v_mul_hi_u32 v7, v6, v7
200; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
201; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v6, v7
202; GISEL-NEXT:    v_mul_hi_u32 v4, v0, v4
203; GISEL-NEXT:    v_mul_hi_u32 v5, v1, v5
204; GISEL-NEXT:    v_mul_lo_u32 v6, v4, v2
205; GISEL-NEXT:    v_add_i32_e32 v7, vcc, 1, v4
206; GISEL-NEXT:    v_mul_lo_u32 v10, v5, v3
207; GISEL-NEXT:    v_add_i32_e32 v11, vcc, 1, v5
208; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v6
209; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v10
210; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
211; GISEL-NEXT:    v_cndmask_b32_e32 v4, v4, v7, vcc
212; GISEL-NEXT:    v_sub_i32_e64 v6, s[4:5], v0, v2
213; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v3
214; GISEL-NEXT:    v_cndmask_b32_e64 v5, v5, v11, s[4:5]
215; GISEL-NEXT:    v_sub_i32_e64 v7, s[6:7], v1, v3
216; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
217; GISEL-NEXT:    v_add_i32_e32 v6, vcc, 1, v4
218; GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, v7, s[4:5]
219; GISEL-NEXT:    v_add_i32_e32 v7, vcc, 1, v5
220; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
221; GISEL-NEXT:    v_cndmask_b32_e32 v0, v4, v6, vcc
222; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
223; GISEL-NEXT:    v_cndmask_b32_e32 v1, v5, v7, vcc
224; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v8
225; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v9
226; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v8
227; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v9
228; GISEL-NEXT:    s_setpc_b64 s[30:31]
229;
230; CGP-LABEL: v_sdiv_v2i32:
231; CGP:       ; %bb.0:
232; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
233; CGP-NEXT:    v_ashrrev_i32_e32 v4, 31, v0
234; CGP-NEXT:    v_ashrrev_i32_e32 v5, 31, v2
235; CGP-NEXT:    v_ashrrev_i32_e32 v6, 31, v1
236; CGP-NEXT:    v_ashrrev_i32_e32 v7, 31, v3
237; CGP-NEXT:    v_xor_b32_e32 v8, v4, v5
238; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
239; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v5
240; CGP-NEXT:    v_xor_b32_e32 v9, v6, v7
241; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v6
242; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
243; CGP-NEXT:    v_xor_b32_e32 v0, v0, v4
244; CGP-NEXT:    v_xor_b32_e32 v2, v2, v5
245; CGP-NEXT:    v_xor_b32_e32 v1, v1, v6
246; CGP-NEXT:    v_xor_b32_e32 v3, v3, v7
247; CGP-NEXT:    v_cvt_f32_u32_e32 v4, v2
248; CGP-NEXT:    v_sub_i32_e32 v5, vcc, 0, v2
249; CGP-NEXT:    v_mul_lo_u32 v6, v0, 0
250; CGP-NEXT:    v_cvt_f32_u32_e32 v7, v3
251; CGP-NEXT:    v_sub_i32_e32 v10, vcc, 0, v3
252; CGP-NEXT:    v_mul_lo_u32 v11, v1, 0
253; CGP-NEXT:    v_rcp_f32_e32 v4, v4
254; CGP-NEXT:    v_rcp_f32_e32 v7, v7
255; CGP-NEXT:    v_mul_f32_e32 v4, 0x4f7ffffe, v4
256; CGP-NEXT:    v_mul_f32_e32 v7, 0x4f7ffffe, v7
257; CGP-NEXT:    v_cvt_u32_f32_e32 v4, v4
258; CGP-NEXT:    v_cvt_u32_f32_e32 v7, v7
259; CGP-NEXT:    v_mul_lo_u32 v5, v5, v4
260; CGP-NEXT:    v_mul_lo_u32 v12, v4, 0
261; CGP-NEXT:    v_mul_lo_u32 v10, v10, v7
262; CGP-NEXT:    v_mul_lo_u32 v13, v7, 0
263; CGP-NEXT:    v_mul_lo_u32 v14, 0, v5
264; CGP-NEXT:    v_mul_hi_u32 v5, v4, v5
265; CGP-NEXT:    v_mul_lo_u32 v15, 0, v10
266; CGP-NEXT:    v_mul_hi_u32 v10, v7, v10
267; CGP-NEXT:    v_add_i32_e32 v12, vcc, v14, v12
268; CGP-NEXT:    v_add_i32_e32 v13, vcc, v15, v13
269; CGP-NEXT:    v_add_i32_e32 v5, vcc, v12, v5
270; CGP-NEXT:    v_add_i32_e32 v10, vcc, v13, v10
271; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
272; CGP-NEXT:    v_add_i32_e32 v5, vcc, v7, v10
273; CGP-NEXT:    v_mul_lo_u32 v7, 0, v4
274; CGP-NEXT:    v_mul_hi_u32 v4, v0, v4
275; CGP-NEXT:    v_mul_lo_u32 v10, 0, v5
276; CGP-NEXT:    v_mul_hi_u32 v5, v1, v5
277; CGP-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
278; CGP-NEXT:    v_add_i32_e32 v7, vcc, v10, v11
279; CGP-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
280; CGP-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
281; CGP-NEXT:    v_mul_lo_u32 v6, v4, v2
282; CGP-NEXT:    v_add_i32_e32 v7, vcc, 1, v4
283; CGP-NEXT:    v_mul_lo_u32 v10, v5, v3
284; CGP-NEXT:    v_add_i32_e32 v11, vcc, 1, v5
285; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v6
286; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v10
287; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
288; CGP-NEXT:    v_cndmask_b32_e32 v4, v4, v7, vcc
289; CGP-NEXT:    v_sub_i32_e64 v6, s[4:5], v0, v2
290; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v3
291; CGP-NEXT:    v_cndmask_b32_e64 v5, v5, v11, s[4:5]
292; CGP-NEXT:    v_sub_i32_e64 v7, s[6:7], v1, v3
293; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
294; CGP-NEXT:    v_add_i32_e32 v6, vcc, 1, v4
295; CGP-NEXT:    v_cndmask_b32_e64 v1, v1, v7, s[4:5]
296; CGP-NEXT:    v_add_i32_e32 v7, vcc, 1, v5
297; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
298; CGP-NEXT:    v_cndmask_b32_e32 v0, v4, v6, vcc
299; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
300; CGP-NEXT:    v_cndmask_b32_e32 v1, v5, v7, vcc
301; CGP-NEXT:    v_xor_b32_e32 v0, v0, v8
302; CGP-NEXT:    v_xor_b32_e32 v1, v1, v9
303; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v8
304; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v9
305; CGP-NEXT:    s_setpc_b64 s[30:31]
306  %result = sdiv <2 x i32> %num, %den
307  ret <2 x i32> %result
308}
309
310define i32 @v_sdiv_i32_pow2k_denom(i32 %num) {
311; CHECK-LABEL: v_sdiv_i32_pow2k_denom:
312; CHECK:       ; %bb.0:
313; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
314; CHECK-NEXT:    s_movk_i32 s6, 0x1000
315; CHECK-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
316; CHECK-NEXT:    v_mov_b32_e32 v2, 0xfffff000
317; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
318; CHECK-NEXT:    v_cvt_f32_u32_e32 v3, s6
319; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v1
320; CHECK-NEXT:    v_rcp_iflag_f32_e32 v3, v3
321; CHECK-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
322; CHECK-NEXT:    v_cvt_u32_f32_e32 v3, v3
323; CHECK-NEXT:    v_mul_lo_u32 v2, v2, v3
324; CHECK-NEXT:    v_mul_hi_u32 v2, v3, v2
325; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
326; CHECK-NEXT:    v_mul_hi_u32 v2, v0, v2
327; CHECK-NEXT:    v_lshlrev_b32_e32 v3, 12, v2
328; CHECK-NEXT:    v_add_i32_e32 v4, vcc, 1, v2
329; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v3
330; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s6, v0
331; CHECK-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
332; CHECK-NEXT:    v_subrev_i32_e64 v3, s[4:5], s6, v0
333; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
334; CHECK-NEXT:    v_add_i32_e32 v3, vcc, 1, v2
335; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s6, v0
336; CHECK-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc
337; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v1
338; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
339; CHECK-NEXT:    s_setpc_b64 s[30:31]
340  %result = sdiv i32 %num, 4096
341  ret i32 %result
342}
343
344define <2 x i32> @v_sdiv_v2i32_pow2k_denom(<2 x i32> %num) {
345; GISEL-LABEL: v_sdiv_v2i32_pow2k_denom:
346; GISEL:       ; %bb.0:
347; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
348; GISEL-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
349; GISEL-NEXT:    s_add_i32 s8, 0x1000, 0
350; GISEL-NEXT:    v_ashrrev_i32_e32 v3, 31, v1
351; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
352; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, s8
353; GISEL-NEXT:    s_sub_i32 s4, 0, s8
354; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v3
355; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v2
356; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
357; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v3
358; GISEL-NEXT:    v_mul_f32_e32 v5, 0x4f7ffffe, v4
359; GISEL-NEXT:    v_mul_f32_e32 v4, 0x4f7ffffe, v4
360; GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v5
361; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
362; GISEL-NEXT:    v_mul_lo_u32 v6, s4, v5
363; GISEL-NEXT:    v_mul_lo_u32 v7, s4, v4
364; GISEL-NEXT:    v_mul_hi_u32 v6, v5, v6
365; GISEL-NEXT:    v_mul_hi_u32 v7, v4, v7
366; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
367; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v7
368; GISEL-NEXT:    v_mul_hi_u32 v5, v0, v5
369; GISEL-NEXT:    v_mul_hi_u32 v4, v1, v4
370; GISEL-NEXT:    v_mul_lo_u32 v6, v5, s8
371; GISEL-NEXT:    v_add_i32_e32 v7, vcc, 1, v5
372; GISEL-NEXT:    v_mul_lo_u32 v8, v4, s8
373; GISEL-NEXT:    v_add_i32_e32 v9, vcc, 1, v4
374; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v6
375; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v8
376; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s8, v0
377; GISEL-NEXT:    v_cndmask_b32_e32 v5, v5, v7, vcc
378; GISEL-NEXT:    v_subrev_i32_e64 v6, s[4:5], s8, v0
379; GISEL-NEXT:    v_cmp_le_u32_e64 s[4:5], s8, v1
380; GISEL-NEXT:    v_cndmask_b32_e64 v4, v4, v9, s[4:5]
381; GISEL-NEXT:    v_subrev_i32_e64 v7, s[6:7], s8, v1
382; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
383; GISEL-NEXT:    v_add_i32_e32 v6, vcc, 1, v5
384; GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, v7, s[4:5]
385; GISEL-NEXT:    v_add_i32_e32 v7, vcc, 1, v4
386; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s8, v0
387; GISEL-NEXT:    v_cndmask_b32_e32 v0, v5, v6, vcc
388; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s8, v1
389; GISEL-NEXT:    v_cndmask_b32_e32 v1, v4, v7, vcc
390; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v2
391; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v3
392; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
393; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v3
394; GISEL-NEXT:    s_setpc_b64 s[30:31]
395;
396; CGP-LABEL: v_sdiv_v2i32_pow2k_denom:
397; CGP:       ; %bb.0:
398; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
399; CGP-NEXT:    s_movk_i32 s4, 0x1000
400; CGP-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
401; CGP-NEXT:    v_mov_b32_e32 v3, 0x1000
402; CGP-NEXT:    s_mov_b32 s5, 0xfffff000
403; CGP-NEXT:    v_mov_b32_e32 v4, 0xfffff000
404; CGP-NEXT:    v_ashrrev_i32_e32 v5, 31, v1
405; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
406; CGP-NEXT:    v_cvt_f32_u32_e32 v6, s4
407; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v5
408; CGP-NEXT:    v_cvt_f32_u32_e32 v7, v3
409; CGP-NEXT:    v_xor_b32_e32 v0, v0, v2
410; CGP-NEXT:    v_rcp_iflag_f32_e32 v6, v6
411; CGP-NEXT:    v_xor_b32_e32 v1, v1, v5
412; CGP-NEXT:    v_rcp_iflag_f32_e32 v7, v7
413; CGP-NEXT:    v_mul_f32_e32 v6, 0x4f7ffffe, v6
414; CGP-NEXT:    v_mul_f32_e32 v7, 0x4f7ffffe, v7
415; CGP-NEXT:    v_cvt_u32_f32_e32 v6, v6
416; CGP-NEXT:    v_cvt_u32_f32_e32 v7, v7
417; CGP-NEXT:    v_mul_lo_u32 v8, s5, v6
418; CGP-NEXT:    v_mul_lo_u32 v4, v4, v7
419; CGP-NEXT:    v_mul_hi_u32 v8, v6, v8
420; CGP-NEXT:    v_mul_hi_u32 v4, v7, v4
421; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
422; CGP-NEXT:    v_add_i32_e32 v4, vcc, v7, v4
423; CGP-NEXT:    v_mul_hi_u32 v6, v0, v6
424; CGP-NEXT:    v_mul_hi_u32 v4, v1, v4
425; CGP-NEXT:    v_lshlrev_b32_e32 v7, 12, v6
426; CGP-NEXT:    v_add_i32_e32 v8, vcc, 1, v6
427; CGP-NEXT:    v_lshlrev_b32_e32 v9, 12, v4
428; CGP-NEXT:    v_add_i32_e32 v10, vcc, 1, v4
429; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v7
430; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v9
431; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s4, v0
432; CGP-NEXT:    v_cndmask_b32_e32 v6, v6, v8, vcc
433; CGP-NEXT:    v_subrev_i32_e64 v7, s[4:5], s4, v0
434; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v3
435; CGP-NEXT:    v_cndmask_b32_e64 v4, v4, v10, s[4:5]
436; CGP-NEXT:    v_sub_i32_e64 v8, s[6:7], v1, v3
437; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
438; CGP-NEXT:    v_add_i32_e32 v7, vcc, 1, v6
439; CGP-NEXT:    v_cndmask_b32_e64 v1, v1, v8, s[4:5]
440; CGP-NEXT:    v_add_i32_e32 v8, vcc, 1, v4
441; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v3
442; CGP-NEXT:    v_cndmask_b32_e32 v0, v6, v7, vcc
443; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
444; CGP-NEXT:    v_cndmask_b32_e32 v1, v4, v8, vcc
445; CGP-NEXT:    v_xor_b32_e32 v0, v0, v2
446; CGP-NEXT:    v_xor_b32_e32 v1, v1, v5
447; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
448; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v5
449; CGP-NEXT:    s_setpc_b64 s[30:31]
450  %result = sdiv <2 x i32> %num, <i32 4096, i32 4096>
451  ret <2 x i32> %result
452}
453
454define i32 @v_sdiv_i32_oddk_denom(i32 %num) {
455; CHECK-LABEL: v_sdiv_i32_oddk_denom:
456; CHECK:       ; %bb.0:
457; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
458; CHECK-NEXT:    s_mov_b32 s6, 0x12d8fb
459; CHECK-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
460; CHECK-NEXT:    v_mov_b32_e32 v2, 0xffed2705
461; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
462; CHECK-NEXT:    v_cvt_f32_u32_e32 v3, s6
463; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v1
464; CHECK-NEXT:    v_rcp_iflag_f32_e32 v3, v3
465; CHECK-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
466; CHECK-NEXT:    v_cvt_u32_f32_e32 v3, v3
467; CHECK-NEXT:    v_mul_lo_u32 v2, v2, v3
468; CHECK-NEXT:    v_mul_hi_u32 v2, v3, v2
469; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
470; CHECK-NEXT:    v_mul_hi_u32 v2, v0, v2
471; CHECK-NEXT:    v_mul_lo_u32 v3, v2, s6
472; CHECK-NEXT:    v_add_i32_e32 v4, vcc, 1, v2
473; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v3
474; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s6, v0
475; CHECK-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
476; CHECK-NEXT:    v_subrev_i32_e64 v3, s[4:5], s6, v0
477; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
478; CHECK-NEXT:    v_add_i32_e32 v3, vcc, 1, v2
479; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s6, v0
480; CHECK-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc
481; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v1
482; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
483; CHECK-NEXT:    s_setpc_b64 s[30:31]
484  %result = sdiv i32 %num, 1235195
485  ret i32 %result
486}
487
488define <2 x i32> @v_sdiv_v2i32_oddk_denom(<2 x i32> %num) {
489; GISEL-LABEL: v_sdiv_v2i32_oddk_denom:
490; GISEL:       ; %bb.0:
491; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
492; GISEL-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
493; GISEL-NEXT:    s_add_i32 s8, 0x12d8fb, 0
494; GISEL-NEXT:    v_ashrrev_i32_e32 v3, 31, v1
495; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
496; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, s8
497; GISEL-NEXT:    s_sub_i32 s4, 0, s8
498; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v3
499; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v2
500; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
501; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v3
502; GISEL-NEXT:    v_mul_f32_e32 v5, 0x4f7ffffe, v4
503; GISEL-NEXT:    v_mul_f32_e32 v4, 0x4f7ffffe, v4
504; GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v5
505; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
506; GISEL-NEXT:    v_mul_lo_u32 v6, s4, v5
507; GISEL-NEXT:    v_mul_lo_u32 v7, s4, v4
508; GISEL-NEXT:    v_mul_hi_u32 v6, v5, v6
509; GISEL-NEXT:    v_mul_hi_u32 v7, v4, v7
510; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
511; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v7
512; GISEL-NEXT:    v_mul_hi_u32 v5, v0, v5
513; GISEL-NEXT:    v_mul_hi_u32 v4, v1, v4
514; GISEL-NEXT:    v_mul_lo_u32 v6, v5, s8
515; GISEL-NEXT:    v_add_i32_e32 v7, vcc, 1, v5
516; GISEL-NEXT:    v_mul_lo_u32 v8, v4, s8
517; GISEL-NEXT:    v_add_i32_e32 v9, vcc, 1, v4
518; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v6
519; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v8
520; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s8, v0
521; GISEL-NEXT:    v_cndmask_b32_e32 v5, v5, v7, vcc
522; GISEL-NEXT:    v_subrev_i32_e64 v6, s[4:5], s8, v0
523; GISEL-NEXT:    v_cmp_le_u32_e64 s[4:5], s8, v1
524; GISEL-NEXT:    v_cndmask_b32_e64 v4, v4, v9, s[4:5]
525; GISEL-NEXT:    v_subrev_i32_e64 v7, s[6:7], s8, v1
526; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
527; GISEL-NEXT:    v_add_i32_e32 v6, vcc, 1, v5
528; GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, v7, s[4:5]
529; GISEL-NEXT:    v_add_i32_e32 v7, vcc, 1, v4
530; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s8, v0
531; GISEL-NEXT:    v_cndmask_b32_e32 v0, v5, v6, vcc
532; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s8, v1
533; GISEL-NEXT:    v_cndmask_b32_e32 v1, v4, v7, vcc
534; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v2
535; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v3
536; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
537; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v3
538; GISEL-NEXT:    s_setpc_b64 s[30:31]
539;
540; CGP-LABEL: v_sdiv_v2i32_oddk_denom:
541; CGP:       ; %bb.0:
542; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
543; CGP-NEXT:    s_mov_b32 s4, 0x12d8fb
544; CGP-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
545; CGP-NEXT:    v_mov_b32_e32 v3, 0x12d8fb
546; CGP-NEXT:    s_mov_b32 s5, 0xffed2705
547; CGP-NEXT:    v_mov_b32_e32 v4, 0xffed2705
548; CGP-NEXT:    v_ashrrev_i32_e32 v5, 31, v1
549; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
550; CGP-NEXT:    v_cvt_f32_u32_e32 v6, s4
551; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v5
552; CGP-NEXT:    v_cvt_f32_u32_e32 v7, v3
553; CGP-NEXT:    v_xor_b32_e32 v0, v0, v2
554; CGP-NEXT:    v_rcp_iflag_f32_e32 v6, v6
555; CGP-NEXT:    v_xor_b32_e32 v1, v1, v5
556; CGP-NEXT:    v_rcp_iflag_f32_e32 v7, v7
557; CGP-NEXT:    v_mul_f32_e32 v6, 0x4f7ffffe, v6
558; CGP-NEXT:    v_mul_f32_e32 v7, 0x4f7ffffe, v7
559; CGP-NEXT:    v_cvt_u32_f32_e32 v6, v6
560; CGP-NEXT:    v_cvt_u32_f32_e32 v7, v7
561; CGP-NEXT:    v_mul_lo_u32 v8, s5, v6
562; CGP-NEXT:    v_mul_lo_u32 v4, v4, v7
563; CGP-NEXT:    v_mul_hi_u32 v8, v6, v8
564; CGP-NEXT:    v_mul_hi_u32 v4, v7, v4
565; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
566; CGP-NEXT:    v_add_i32_e32 v4, vcc, v7, v4
567; CGP-NEXT:    v_mul_hi_u32 v6, v0, v6
568; CGP-NEXT:    v_mul_hi_u32 v4, v1, v4
569; CGP-NEXT:    v_mul_lo_u32 v7, v6, s4
570; CGP-NEXT:    v_add_i32_e32 v8, vcc, 1, v6
571; CGP-NEXT:    v_mul_lo_u32 v9, v4, v3
572; CGP-NEXT:    v_add_i32_e32 v10, vcc, 1, v4
573; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v7
574; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v9
575; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s4, v0
576; CGP-NEXT:    v_cndmask_b32_e32 v6, v6, v8, vcc
577; CGP-NEXT:    v_subrev_i32_e64 v7, s[4:5], s4, v0
578; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v3
579; CGP-NEXT:    v_cndmask_b32_e64 v4, v4, v10, s[4:5]
580; CGP-NEXT:    v_sub_i32_e64 v8, s[6:7], v1, v3
581; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
582; CGP-NEXT:    v_add_i32_e32 v7, vcc, 1, v6
583; CGP-NEXT:    v_cndmask_b32_e64 v1, v1, v8, s[4:5]
584; CGP-NEXT:    v_add_i32_e32 v8, vcc, 1, v4
585; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v3
586; CGP-NEXT:    v_cndmask_b32_e32 v0, v6, v7, vcc
587; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
588; CGP-NEXT:    v_cndmask_b32_e32 v1, v4, v8, vcc
589; CGP-NEXT:    v_xor_b32_e32 v0, v0, v2
590; CGP-NEXT:    v_xor_b32_e32 v1, v1, v5
591; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
592; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v5
593; CGP-NEXT:    s_setpc_b64 s[30:31]
594  %result = sdiv <2 x i32> %num, <i32 1235195, i32 1235195>
595  ret <2 x i32> %result
596}
597
598define i32 @v_sdiv_i32_pow2_shl_denom(i32 %x, i32 %y) {
599; CHECK-LABEL: v_sdiv_i32_pow2_shl_denom:
600; CHECK:       ; %bb.0:
601; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
602; CHECK-NEXT:    v_lshl_b32_e32 v1, 0x1000, v1
603; CHECK-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
604; CHECK-NEXT:    v_ashrrev_i32_e32 v3, 31, v1
605; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
606; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v3
607; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v2
608; CHECK-NEXT:    v_xor_b32_e32 v1, v1, v3
609; CHECK-NEXT:    v_cvt_f32_u32_e32 v4, v1
610; CHECK-NEXT:    v_sub_i32_e32 v5, vcc, 0, v1
611; CHECK-NEXT:    v_rcp_iflag_f32_e32 v4, v4
612; CHECK-NEXT:    v_mul_f32_e32 v4, 0x4f7ffffe, v4
613; CHECK-NEXT:    v_cvt_u32_f32_e32 v4, v4
614; CHECK-NEXT:    v_mul_lo_u32 v5, v5, v4
615; CHECK-NEXT:    v_mul_hi_u32 v5, v4, v5
616; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
617; CHECK-NEXT:    v_mul_hi_u32 v4, v0, v4
618; CHECK-NEXT:    v_mul_lo_u32 v5, v4, v1
619; CHECK-NEXT:    v_add_i32_e32 v6, vcc, 1, v4
620; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v5
621; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
622; CHECK-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
623; CHECK-NEXT:    v_sub_i32_e64 v5, s[4:5], v0, v1
624; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
625; CHECK-NEXT:    v_add_i32_e32 v5, vcc, 1, v4
626; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
627; CHECK-NEXT:    v_cndmask_b32_e32 v0, v4, v5, vcc
628; CHECK-NEXT:    v_xor_b32_e32 v1, v2, v3
629; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v1
630; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
631; CHECK-NEXT:    s_setpc_b64 s[30:31]
632  %shl.y = shl i32 4096, %y
633  %r = sdiv i32 %x, %shl.y
634  ret i32 %r
635}
636
637define <2 x i32> @v_sdiv_v2i32_pow2_shl_denom(<2 x i32> %x, <2 x i32> %y) {
638; GISEL-LABEL: v_sdiv_v2i32_pow2_shl_denom:
639; GISEL:       ; %bb.0:
640; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
641; GISEL-NEXT:    s_movk_i32 s4, 0x1000
642; GISEL-NEXT:    v_ashrrev_i32_e32 v4, 31, v0
643; GISEL-NEXT:    v_ashrrev_i32_e32 v5, 31, v1
644; GISEL-NEXT:    v_lshl_b32_e32 v2, s4, v2
645; GISEL-NEXT:    v_lshl_b32_e32 v3, s4, v3
646; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
647; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v5
648; GISEL-NEXT:    v_ashrrev_i32_e32 v6, 31, v2
649; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v4
650; GISEL-NEXT:    v_ashrrev_i32_e32 v7, 31, v3
651; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v5
652; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v6
653; GISEL-NEXT:    v_xor_b32_e32 v4, v4, v6
654; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
655; GISEL-NEXT:    v_xor_b32_e32 v5, v5, v7
656; GISEL-NEXT:    v_xor_b32_e32 v2, v2, v6
657; GISEL-NEXT:    v_xor_b32_e32 v3, v3, v7
658; GISEL-NEXT:    v_cvt_f32_u32_e32 v6, v2
659; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, 0, v2
660; GISEL-NEXT:    v_cvt_f32_u32_e32 v8, v3
661; GISEL-NEXT:    v_sub_i32_e32 v9, vcc, 0, v3
662; GISEL-NEXT:    v_rcp_iflag_f32_e32 v6, v6
663; GISEL-NEXT:    v_rcp_iflag_f32_e32 v8, v8
664; GISEL-NEXT:    v_mul_f32_e32 v6, 0x4f7ffffe, v6
665; GISEL-NEXT:    v_mul_f32_e32 v8, 0x4f7ffffe, v8
666; GISEL-NEXT:    v_cvt_u32_f32_e32 v6, v6
667; GISEL-NEXT:    v_cvt_u32_f32_e32 v8, v8
668; GISEL-NEXT:    v_mul_lo_u32 v7, v7, v6
669; GISEL-NEXT:    v_mul_lo_u32 v9, v9, v8
670; GISEL-NEXT:    v_mul_hi_u32 v7, v6, v7
671; GISEL-NEXT:    v_mul_hi_u32 v9, v8, v9
672; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
673; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v8, v9
674; GISEL-NEXT:    v_mul_hi_u32 v6, v0, v6
675; GISEL-NEXT:    v_mul_hi_u32 v7, v1, v7
676; GISEL-NEXT:    v_mul_lo_u32 v8, v6, v2
677; GISEL-NEXT:    v_add_i32_e32 v9, vcc, 1, v6
678; GISEL-NEXT:    v_mul_lo_u32 v10, v7, v3
679; GISEL-NEXT:    v_add_i32_e32 v11, vcc, 1, v7
680; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v8
681; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v10
682; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
683; GISEL-NEXT:    v_cndmask_b32_e32 v6, v6, v9, vcc
684; GISEL-NEXT:    v_sub_i32_e64 v8, s[4:5], v0, v2
685; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v3
686; GISEL-NEXT:    v_cndmask_b32_e64 v7, v7, v11, s[4:5]
687; GISEL-NEXT:    v_sub_i32_e64 v9, s[6:7], v1, v3
688; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
689; GISEL-NEXT:    v_add_i32_e32 v8, vcc, 1, v6
690; GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, v9, s[4:5]
691; GISEL-NEXT:    v_add_i32_e32 v9, vcc, 1, v7
692; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
693; GISEL-NEXT:    v_cndmask_b32_e32 v0, v6, v8, vcc
694; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
695; GISEL-NEXT:    v_cndmask_b32_e32 v1, v7, v9, vcc
696; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v4
697; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v5
698; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
699; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v5
700; GISEL-NEXT:    s_setpc_b64 s[30:31]
701;
702; CGP-LABEL: v_sdiv_v2i32_pow2_shl_denom:
703; CGP:       ; %bb.0:
704; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
705; CGP-NEXT:    s_movk_i32 s4, 0x1000
706; CGP-NEXT:    v_ashrrev_i32_e32 v4, 31, v0
707; CGP-NEXT:    v_ashrrev_i32_e32 v5, 31, v1
708; CGP-NEXT:    v_lshl_b32_e32 v2, s4, v2
709; CGP-NEXT:    v_lshl_b32_e32 v3, s4, v3
710; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
711; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v5
712; CGP-NEXT:    v_ashrrev_i32_e32 v6, 31, v2
713; CGP-NEXT:    v_xor_b32_e32 v0, v0, v4
714; CGP-NEXT:    v_ashrrev_i32_e32 v7, 31, v3
715; CGP-NEXT:    v_xor_b32_e32 v1, v1, v5
716; CGP-NEXT:    v_xor_b32_e32 v4, v4, v6
717; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v6
718; CGP-NEXT:    v_mul_lo_u32 v8, v0, 0
719; CGP-NEXT:    v_xor_b32_e32 v5, v5, v7
720; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
721; CGP-NEXT:    v_mul_lo_u32 v9, v1, 0
722; CGP-NEXT:    v_xor_b32_e32 v2, v2, v6
723; CGP-NEXT:    v_xor_b32_e32 v3, v3, v7
724; CGP-NEXT:    v_cvt_f32_u32_e32 v6, v2
725; CGP-NEXT:    v_sub_i32_e32 v7, vcc, 0, v2
726; CGP-NEXT:    v_cvt_f32_u32_e32 v10, v3
727; CGP-NEXT:    v_sub_i32_e32 v11, vcc, 0, v3
728; CGP-NEXT:    v_rcp_f32_e32 v6, v6
729; CGP-NEXT:    v_rcp_f32_e32 v10, v10
730; CGP-NEXT:    v_mul_f32_e32 v6, 0x4f7ffffe, v6
731; CGP-NEXT:    v_mul_f32_e32 v10, 0x4f7ffffe, v10
732; CGP-NEXT:    v_cvt_u32_f32_e32 v6, v6
733; CGP-NEXT:    v_cvt_u32_f32_e32 v10, v10
734; CGP-NEXT:    v_mul_lo_u32 v7, v7, v6
735; CGP-NEXT:    v_mul_lo_u32 v12, v6, 0
736; CGP-NEXT:    v_mul_lo_u32 v11, v11, v10
737; CGP-NEXT:    v_mul_lo_u32 v13, v10, 0
738; CGP-NEXT:    v_mul_lo_u32 v14, 0, v7
739; CGP-NEXT:    v_mul_hi_u32 v7, v6, v7
740; CGP-NEXT:    v_mul_lo_u32 v15, 0, v11
741; CGP-NEXT:    v_mul_hi_u32 v11, v10, v11
742; CGP-NEXT:    v_add_i32_e32 v12, vcc, v14, v12
743; CGP-NEXT:    v_add_i32_e32 v13, vcc, v15, v13
744; CGP-NEXT:    v_add_i32_e32 v7, vcc, v12, v7
745; CGP-NEXT:    v_add_i32_e32 v11, vcc, v13, v11
746; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
747; CGP-NEXT:    v_add_i32_e32 v7, vcc, v10, v11
748; CGP-NEXT:    v_mul_lo_u32 v10, 0, v6
749; CGP-NEXT:    v_mul_hi_u32 v6, v0, v6
750; CGP-NEXT:    v_mul_lo_u32 v11, 0, v7
751; CGP-NEXT:    v_mul_hi_u32 v7, v1, v7
752; CGP-NEXT:    v_add_i32_e32 v8, vcc, v10, v8
753; CGP-NEXT:    v_add_i32_e32 v9, vcc, v11, v9
754; CGP-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
755; CGP-NEXT:    v_add_i32_e32 v7, vcc, v9, v7
756; CGP-NEXT:    v_mul_lo_u32 v8, v6, v2
757; CGP-NEXT:    v_add_i32_e32 v9, vcc, 1, v6
758; CGP-NEXT:    v_mul_lo_u32 v10, v7, v3
759; CGP-NEXT:    v_add_i32_e32 v11, vcc, 1, v7
760; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v8
761; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v10
762; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
763; CGP-NEXT:    v_cndmask_b32_e32 v6, v6, v9, vcc
764; CGP-NEXT:    v_sub_i32_e64 v8, s[4:5], v0, v2
765; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v3
766; CGP-NEXT:    v_cndmask_b32_e64 v7, v7, v11, s[4:5]
767; CGP-NEXT:    v_sub_i32_e64 v9, s[6:7], v1, v3
768; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
769; CGP-NEXT:    v_add_i32_e32 v8, vcc, 1, v6
770; CGP-NEXT:    v_cndmask_b32_e64 v1, v1, v9, s[4:5]
771; CGP-NEXT:    v_add_i32_e32 v9, vcc, 1, v7
772; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
773; CGP-NEXT:    v_cndmask_b32_e32 v0, v6, v8, vcc
774; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
775; CGP-NEXT:    v_cndmask_b32_e32 v1, v7, v9, vcc
776; CGP-NEXT:    v_xor_b32_e32 v0, v0, v4
777; CGP-NEXT:    v_xor_b32_e32 v1, v1, v5
778; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
779; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v5
780; CGP-NEXT:    s_setpc_b64 s[30:31]
781  %shl.y = shl <2 x i32> <i32 4096, i32 4096>, %y
782  %r = sdiv <2 x i32> %x, %shl.y
783  ret <2 x i32> %r
784}
785
786define i32 @v_sdiv_i32_24bit(i32 %num, i32 %den) {
787; GISEL-LABEL: v_sdiv_i32_24bit:
788; GISEL:       ; %bb.0:
789; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
790; GISEL-NEXT:    s_mov_b32 s4, 0xffffff
791; GISEL-NEXT:    v_and_b32_e32 v0, s4, v0
792; GISEL-NEXT:    v_and_b32_e32 v1, s4, v1
793; GISEL-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
794; GISEL-NEXT:    v_ashrrev_i32_e32 v3, 31, v1
795; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
796; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v3
797; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v2
798; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v3
799; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, v1
800; GISEL-NEXT:    v_sub_i32_e32 v5, vcc, 0, v1
801; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
802; GISEL-NEXT:    v_mul_f32_e32 v4, 0x4f7ffffe, v4
803; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
804; GISEL-NEXT:    v_mul_lo_u32 v5, v5, v4
805; GISEL-NEXT:    v_mul_hi_u32 v5, v4, v5
806; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
807; GISEL-NEXT:    v_mul_hi_u32 v4, v0, v4
808; GISEL-NEXT:    v_mul_lo_u32 v5, v4, v1
809; GISEL-NEXT:    v_add_i32_e32 v6, vcc, 1, v4
810; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v5
811; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
812; GISEL-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
813; GISEL-NEXT:    v_sub_i32_e64 v5, s[4:5], v0, v1
814; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
815; GISEL-NEXT:    v_add_i32_e32 v5, vcc, 1, v4
816; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
817; GISEL-NEXT:    v_cndmask_b32_e32 v0, v4, v5, vcc
818; GISEL-NEXT:    v_xor_b32_e32 v1, v2, v3
819; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v1
820; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
821; GISEL-NEXT:    s_setpc_b64 s[30:31]
822;
823; CGP-LABEL: v_sdiv_i32_24bit:
824; CGP:       ; %bb.0:
825; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
826; CGP-NEXT:    s_mov_b32 s4, 0xffffff
827; CGP-NEXT:    v_and_b32_e32 v0, s4, v0
828; CGP-NEXT:    v_and_b32_e32 v1, s4, v1
829; CGP-NEXT:    v_cvt_f32_u32_e32 v2, v1
830; CGP-NEXT:    v_sub_i32_e32 v3, vcc, 0, v1
831; CGP-NEXT:    v_mul_lo_u32 v4, v0, 0
832; CGP-NEXT:    v_rcp_f32_e32 v2, v2
833; CGP-NEXT:    v_mul_f32_e32 v2, 0x4f7ffffe, v2
834; CGP-NEXT:    v_cvt_u32_f32_e32 v2, v2
835; CGP-NEXT:    v_mul_lo_u32 v3, v3, v2
836; CGP-NEXT:    v_mul_lo_u32 v5, v2, 0
837; CGP-NEXT:    v_mul_lo_u32 v6, 0, v3
838; CGP-NEXT:    v_mul_hi_u32 v3, v2, v3
839; CGP-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
840; CGP-NEXT:    v_add_i32_e32 v3, vcc, v5, v3
841; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
842; CGP-NEXT:    v_mul_lo_u32 v3, 0, v2
843; CGP-NEXT:    v_mul_hi_u32 v2, v0, v2
844; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
845; CGP-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
846; CGP-NEXT:    v_mul_lo_u32 v3, v2, v1
847; CGP-NEXT:    v_add_i32_e32 v4, vcc, 1, v2
848; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v3
849; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
850; CGP-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
851; CGP-NEXT:    v_sub_i32_e64 v3, s[4:5], v0, v1
852; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
853; CGP-NEXT:    v_add_i32_e32 v3, vcc, 1, v2
854; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
855; CGP-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc
856; CGP-NEXT:    s_setpc_b64 s[30:31]
857  %num.mask = and i32 %num, 16777215
858  %den.mask = and i32 %den, 16777215
859  %result = sdiv i32 %num.mask, %den.mask
860  ret i32 %result
861}
862
863define <2 x i32> @v_sdiv_v2i32_24bit(<2 x i32> %num, <2 x i32> %den) {
864; GISEL-LABEL: v_sdiv_v2i32_24bit:
865; GISEL:       ; %bb.0:
866; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
867; GISEL-NEXT:    s_mov_b32 s4, 0xffffff
868; GISEL-NEXT:    v_and_b32_e32 v0, s4, v0
869; GISEL-NEXT:    v_and_b32_e32 v1, s4, v1
870; GISEL-NEXT:    v_and_b32_e32 v2, s4, v2
871; GISEL-NEXT:    v_and_b32_e32 v3, s4, v3
872; GISEL-NEXT:    v_ashrrev_i32_e32 v4, 31, v0
873; GISEL-NEXT:    v_ashrrev_i32_e32 v5, 31, v2
874; GISEL-NEXT:    v_ashrrev_i32_e32 v6, 31, v1
875; GISEL-NEXT:    v_ashrrev_i32_e32 v7, 31, v3
876; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
877; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v5
878; GISEL-NEXT:    v_xor_b32_e32 v8, v4, v5
879; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v6
880; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
881; GISEL-NEXT:    v_xor_b32_e32 v9, v6, v7
882; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v4
883; GISEL-NEXT:    v_xor_b32_e32 v2, v2, v5
884; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v6
885; GISEL-NEXT:    v_xor_b32_e32 v3, v3, v7
886; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, v2
887; GISEL-NEXT:    v_sub_i32_e32 v5, vcc, 0, v2
888; GISEL-NEXT:    v_cvt_f32_u32_e32 v6, v3
889; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, 0, v3
890; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
891; GISEL-NEXT:    v_rcp_iflag_f32_e32 v6, v6
892; GISEL-NEXT:    v_mul_f32_e32 v4, 0x4f7ffffe, v4
893; GISEL-NEXT:    v_mul_f32_e32 v6, 0x4f7ffffe, v6
894; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
895; GISEL-NEXT:    v_cvt_u32_f32_e32 v6, v6
896; GISEL-NEXT:    v_mul_lo_u32 v5, v5, v4
897; GISEL-NEXT:    v_mul_lo_u32 v7, v7, v6
898; GISEL-NEXT:    v_mul_hi_u32 v5, v4, v5
899; GISEL-NEXT:    v_mul_hi_u32 v7, v6, v7
900; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
901; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v6, v7
902; GISEL-NEXT:    v_mul_hi_u32 v4, v0, v4
903; GISEL-NEXT:    v_mul_hi_u32 v5, v1, v5
904; GISEL-NEXT:    v_mul_lo_u32 v6, v4, v2
905; GISEL-NEXT:    v_add_i32_e32 v7, vcc, 1, v4
906; GISEL-NEXT:    v_mul_lo_u32 v10, v5, v3
907; GISEL-NEXT:    v_add_i32_e32 v11, vcc, 1, v5
908; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v6
909; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v10
910; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
911; GISEL-NEXT:    v_cndmask_b32_e32 v4, v4, v7, vcc
912; GISEL-NEXT:    v_sub_i32_e64 v6, s[4:5], v0, v2
913; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v3
914; GISEL-NEXT:    v_cndmask_b32_e64 v5, v5, v11, s[4:5]
915; GISEL-NEXT:    v_sub_i32_e64 v7, s[6:7], v1, v3
916; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
917; GISEL-NEXT:    v_add_i32_e32 v6, vcc, 1, v4
918; GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, v7, s[4:5]
919; GISEL-NEXT:    v_add_i32_e32 v7, vcc, 1, v5
920; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
921; GISEL-NEXT:    v_cndmask_b32_e32 v0, v4, v6, vcc
922; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
923; GISEL-NEXT:    v_cndmask_b32_e32 v1, v5, v7, vcc
924; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v8
925; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v9
926; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v8
927; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v9
928; GISEL-NEXT:    s_setpc_b64 s[30:31]
929;
930; CGP-LABEL: v_sdiv_v2i32_24bit:
931; CGP:       ; %bb.0:
932; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
933; CGP-NEXT:    s_mov_b32 s4, 0xffffff
934; CGP-NEXT:    v_and_b32_e32 v0, s4, v0
935; CGP-NEXT:    v_and_b32_e32 v1, s4, v1
936; CGP-NEXT:    v_and_b32_e32 v2, s4, v2
937; CGP-NEXT:    v_and_b32_e32 v3, s4, v3
938; CGP-NEXT:    v_cvt_f32_u32_e32 v4, v2
939; CGP-NEXT:    v_sub_i32_e32 v5, vcc, 0, v2
940; CGP-NEXT:    v_mul_lo_u32 v6, v0, 0
941; CGP-NEXT:    v_cvt_f32_u32_e32 v7, v3
942; CGP-NEXT:    v_sub_i32_e32 v8, vcc, 0, v3
943; CGP-NEXT:    v_mul_lo_u32 v9, v1, 0
944; CGP-NEXT:    v_rcp_f32_e32 v4, v4
945; CGP-NEXT:    v_rcp_f32_e32 v7, v7
946; CGP-NEXT:    v_mul_f32_e32 v4, 0x4f7ffffe, v4
947; CGP-NEXT:    v_mul_f32_e32 v7, 0x4f7ffffe, v7
948; CGP-NEXT:    v_cvt_u32_f32_e32 v4, v4
949; CGP-NEXT:    v_cvt_u32_f32_e32 v7, v7
950; CGP-NEXT:    v_mul_lo_u32 v5, v5, v4
951; CGP-NEXT:    v_mul_lo_u32 v10, v4, 0
952; CGP-NEXT:    v_mul_lo_u32 v8, v8, v7
953; CGP-NEXT:    v_mul_lo_u32 v11, v7, 0
954; CGP-NEXT:    v_mul_lo_u32 v12, 0, v5
955; CGP-NEXT:    v_mul_hi_u32 v5, v4, v5
956; CGP-NEXT:    v_mul_lo_u32 v13, 0, v8
957; CGP-NEXT:    v_mul_hi_u32 v8, v7, v8
958; CGP-NEXT:    v_add_i32_e32 v10, vcc, v12, v10
959; CGP-NEXT:    v_add_i32_e32 v11, vcc, v13, v11
960; CGP-NEXT:    v_add_i32_e32 v5, vcc, v10, v5
961; CGP-NEXT:    v_add_i32_e32 v8, vcc, v11, v8
962; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
963; CGP-NEXT:    v_add_i32_e32 v5, vcc, v7, v8
964; CGP-NEXT:    v_mul_lo_u32 v7, 0, v4
965; CGP-NEXT:    v_mul_hi_u32 v4, v0, v4
966; CGP-NEXT:    v_mul_lo_u32 v8, 0, v5
967; CGP-NEXT:    v_mul_hi_u32 v5, v1, v5
968; CGP-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
969; CGP-NEXT:    v_add_i32_e32 v7, vcc, v8, v9
970; CGP-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
971; CGP-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
972; CGP-NEXT:    v_mul_lo_u32 v6, v4, v2
973; CGP-NEXT:    v_add_i32_e32 v7, vcc, 1, v4
974; CGP-NEXT:    v_mul_lo_u32 v8, v5, v3
975; CGP-NEXT:    v_add_i32_e32 v9, vcc, 1, v5
976; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v6
977; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v8
978; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
979; CGP-NEXT:    v_cndmask_b32_e32 v4, v4, v7, vcc
980; CGP-NEXT:    v_sub_i32_e64 v6, s[4:5], v0, v2
981; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v3
982; CGP-NEXT:    v_cndmask_b32_e64 v5, v5, v9, s[4:5]
983; CGP-NEXT:    v_sub_i32_e64 v7, s[6:7], v1, v3
984; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
985; CGP-NEXT:    v_add_i32_e32 v6, vcc, 1, v4
986; CGP-NEXT:    v_cndmask_b32_e64 v1, v1, v7, s[4:5]
987; CGP-NEXT:    v_add_i32_e32 v7, vcc, 1, v5
988; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
989; CGP-NEXT:    v_cndmask_b32_e32 v0, v4, v6, vcc
990; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
991; CGP-NEXT:    v_cndmask_b32_e32 v1, v5, v7, vcc
992; CGP-NEXT:    s_setpc_b64 s[30:31]
993  %num.mask = and <2 x i32> %num, <i32 16777215, i32 16777215>
994  %den.mask = and <2 x i32> %den, <i32 16777215, i32 16777215>
995  %result = sdiv <2 x i32> %num.mask, %den.mask
996  ret <2 x i32> %result
997}
998